Skip to content

Commit 9a07763

Browse files
committed
fix
1 parent 9da8dd0 commit 9a07763

File tree

5 files changed

+47
-43
lines changed

5 files changed

+47
-43
lines changed

Cargo.lock

Lines changed: 19 additions & 17 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/audio-utils/src/lib.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,15 @@ pub trait AudioFormatExt: AsyncSource {
3030
pub fn i16_to_f32_samples(samples: &[i16]) -> Vec<f32> {
3131
samples
3232
.iter()
33-
.map(|&sample| sample as f32 / std::i16::MAX as f32)
33+
.map(|&sample| sample as f32 / 32768.0)
3434
.collect()
3535
}
3636

3737
pub fn f32_to_i16_samples(samples: &[f32]) -> Vec<i16> {
3838
samples
3939
.iter()
4040
.map(|&sample| {
41-
let scaled =
42-
(sample * std::i16::MAX as f32).clamp(std::i16::MIN as f32, std::i16::MAX as f32);
41+
let scaled = (sample * 32768.0).clamp(-32768.0, 32768.0);
4342
scaled as i16
4443
})
4544
.collect()

crates/pyannote/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ reqwest = { workspace = true, features = ["json"], optional = true }
1414
url = { workspace = true, optional = true }
1515

1616
hypr-onnx = { workspace = true, optional = true }
17-
knf-rs = { git = "https://github.com/thewh1teagle/pyannote-rs", rev = "e3abad6", package = "knf-rs", optional = true }
17+
knf-rs = { version = "0.2.9", optional = true }
1818

1919
anyhow = { workspace = true }
2020
thiserror = { workspace = true }

plugins/local-stt/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ hypr-chunker = { workspace = true }
2929
hypr-db-user = { workspace = true }
3030
hypr-file = { workspace = true }
3131
hypr-listener-interface = { workspace = true }
32-
hypr-pyannote = { workspace = true, features = ["local"] }
32+
hypr-pyannote = { workspace = true, features = [] }
3333
hypr-whisper = { workspace = true, features = ["local"] }
3434
hypr-ws-utils = { workspace = true }
3535

plugins/local-stt/src/ext.rs

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -176,31 +176,34 @@ impl<R: Runtime, T: Manager<R>> LocalSttPluginExt<R> for T {
176176
.dynamic_prompt("")
177177
.build();
178178

179-
let mut segmenter = hypr_pyannote::local::segmentation::Segmenter::new(16000).unwrap();
179+
// TODO
180+
// https://github.com/thewh1teagle/pyannote-rs/issues/13
181+
182+
// let mut segmenter = hypr_pyannote::local::segmentation::Segmenter::new(16000).unwrap();
180183
let segments = segmenter.process(&samples, 16000).unwrap();
181184

182185
let mut words = Vec::new();
183186

184-
for segment in segments {
185-
let audio_f32 = hypr_audio_utils::i16_to_f32_samples(&segment.samples);
186-
187-
let whisper_segments = model.transcribe(&audio_f32).unwrap();
188-
189-
for whisper_segment in whisper_segments {
190-
let start_sec: f64 = segment.start + (whisper_segment.start() as f64);
191-
let end_sec: f64 = segment.start + (whisper_segment.end() as f64);
192-
let start_ms = (start_sec * 1000.0) as u64;
193-
let end_ms = (end_sec * 1000.0) as u64;
194-
195-
words.push(Word {
196-
text: whisper_segment.text().to_string(),
197-
speaker: None,
198-
confidence: Some(whisper_segment.confidence()),
199-
start_ms: Some(start_ms),
200-
end_ms: Some(end_ms),
201-
});
202-
}
203-
}
187+
// for segment in segments {
188+
// let audio_f32 = hypr_audio_utils::i16_to_f32_samples(&segment.samples);
189+
190+
// let whisper_segments = model.transcribe(&audio_f32).unwrap();
191+
192+
// for whisper_segment in whisper_segments {
193+
// let start_sec: f64 = segment.start + (whisper_segment.start() as f64);
194+
// let end_sec: f64 = segment.start + (whisper_segment.end() as f64);
195+
// let start_ms = (start_sec * 1000.0) as u64;
196+
// let end_ms = (end_sec * 1000.0) as u64;
197+
198+
// words.push(Word {
199+
// text: whisper_segment.text().to_string(),
200+
// speaker: None,
201+
// confidence: Some(whisper_segment.confidence()),
202+
// start_ms: Some(start_ms),
203+
// end_ms: Some(end_ms),
204+
// });
205+
// }
206+
// }
204207

205208
Ok(words)
206209
}

0 commit comments

Comments
 (0)