fastrepl
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 2 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎crates/agc/Cargo.toml‎
Lines changed: 1 addition & 2 deletions b/‎crates/agc/Cargo.toml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎crates/agc/src/lib.rs‎
Lines changed: 29 additions & 67 deletions b/‎crates/agc/src/lib.rs‎
Lines changed: 29 additions & 67 deletions
diff --git a/‎crates/vad-ext/src/continuous2.rs‎
Lines changed: 17 additions & 118 deletions b/‎crates/vad-ext/src/continuous2.rs‎
Lines changed: 17 additions & 118 deletions
diff --git a/‎crates/vad-ext/src/lib.rs‎
Lines changed: 2 additions & 0 deletions b/‎crates/vad-ext/src/lib.rs‎
Lines changed: 2 additions & 0 deletions
@@ -10,7 +10,6 @@ rodio = { workspace = true }
 
 [dependencies]
 hypr-audio-utils = { workspace = true }
-hypr-vad3 = { workspace = true }
-hypr-vvad = { workspace = true }
+hypr-vad-ext = { workspace = true }
 
 dagc = "0.1.1"
@@ -1,84 +1,52 @@
 use dagc::MonoAgc;
-use hypr_audio_utils::f32_to_i16_samples;
-use hypr_vvad::VoiceActivityDetector;
+use hypr_vad_ext::{StreamingVad, VadConfig};
 
 pub struct VadAgc {
     agc: MonoAgc,
-    vad: VoiceActivityDetector,
-    frame_size: usize,
-    vad_tail: Vec<f32>,
-    last_is_speech: bool,
+    vad: Option<StreamingVad>,
+    vad_cfg: VadConfig,
+    mask_non_speech: bool,
 }
 
 impl VadAgc {
     pub fn new(desired_output_rms: f32, distortion_factor: f32) -> Self {
         Self {
             agc: MonoAgc::new(desired_output_rms, distortion_factor).expect("failed_to_create_agc"),
-            vad: VoiceActivityDetector::new(),
-            frame_size: 0,
-            vad_tail: Vec::new(),
-            last_is_speech: true,
+            vad: None,
+            vad_cfg: VadConfig::default(),
+            mask_non_speech: false,
         }
     }
 
+    pub fn with_masking(mut self, mask_non_speech: bool) -> Self {
+        self.mask_non_speech = mask_non_speech;
+        self
+    }
+
+    pub fn with_vad_config(mut self, cfg: VadConfig) -> Self {
+        self.vad_cfg = cfg;
+        self
+    }
+
     pub fn process(&mut self, samples: &mut [f32]) {
         if samples.is_empty() {
             return;
         }
 
-        if self.frame_size == 0 {
-            self.frame_size = hypr_vad3::choose_optimal_frame_size(samples.len());
-        }
-        let frame_size = self.frame_size;
-
-        let mut pos = 0;
+        let vad = self
+            .vad
+            .get_or_insert_with(|| StreamingVad::with_config(samples.len(), self.vad_cfg.clone()));
 
-        if !self.vad_tail.is_empty() {
-            let needed = frame_size - self.vad_tail.len();
-            let to_take = needed.min(samples.len());
+        let agc = &mut self.agc;
+        let mask_non_speech = self.mask_non_speech;
 
-            let mut frame_f32 = std::mem::take(&mut self.vad_tail);
-            frame_f32.reserve(frame_size - frame_f32.len());
-            frame_f32.extend_from_slice(&samples[..to_take]);
-
-            if frame_f32.len() == frame_size {
-                let i16_samples = f32_to_i16_samples(&frame_f32);
-                let is_speech = self.vad.predict_16khz(&i16_samples).unwrap_or(true);
-                self.last_is_speech = is_speech;
-
-                self.agc.freeze_gain(!is_speech);
-                self.agc.process(&mut samples[..to_take]);
-
-                pos = to_take;
-            } else {
-                self.vad_tail = frame_f32;
-
-                self.agc.freeze_gain(!self.last_is_speech);
-                self.agc.process(samples);
-                return;
+        vad.process_in_place(samples, |frame, is_speech| {
+            agc.freeze_gain(!is_speech);
+            if !is_speech && mask_non_speech {
+                frame.fill(0.0);
             }
-        }
-
-        while samples.len() - pos >= frame_size {
-            let frame = &mut samples[pos..pos + frame_size];
-
-            let i16_samples = f32_to_i16_samples(frame);
-            let is_speech = self.vad.predict_16khz(&i16_samples).unwrap_or(true);
-            self.last_is_speech = is_speech;
-
-            self.agc.freeze_gain(!is_speech);
-            self.agc.process(frame);
-
-            pos += frame_size;
-        }
-
-        if pos < samples.len() {
-            self.vad_tail.clear();
-            self.vad_tail.extend_from_slice(&samples[pos..]);
-
-            self.agc.freeze_gain(!self.last_is_speech);
-            self.agc.process(&mut samples[pos..]);
-        }
+            agc.process(frame);
+        });
     }
 
     pub fn gain(&self) -> f32 {
@@ -88,13 +56,7 @@ impl VadAgc {
 
 impl Default for VadAgc {
     fn default() -> Self {
-        Self {
-            agc: MonoAgc::new(0.03, 0.0001).expect("failed_to_create_agc"),
-            vad: VoiceActivityDetector::new(),
-            frame_size: 0,
-            vad_tail: Vec::new(),
-            last_is_speech: true,
-        }
+        Self::new(0.03, 0.0001)
     }
 }
 
 
@@ -4,34 +4,31 @@ use std::{
 };
 
 use futures_util::Stream;
-use hypr_audio_utils::f32_to_i16_samples;
-use hypr_vvad::VoiceActivityDetector;
+
+use crate::{StreamingVad, VadConfig};
 
 pub struct ContinuousVadMaskStream<S> {
     inner: S,
-    vad: VoiceActivityDetector,
-    hangover_frames: usize,
-    trailing_non_speech: usize,
-    in_speech: bool,
-    scratch_frame: Vec<f32>,
-    amplitude_floor: f32,
+    vad: Option<StreamingVad>,
+    cfg: VadConfig,
 }
 
 impl<S> ContinuousVadMaskStream<S> {
     pub fn new(inner: S) -> Self {
         Self {
             inner,
-            vad: VoiceActivityDetector::new(),
-            hangover_frames: 3,
-            trailing_non_speech: 0,
-            in_speech: true,
-            scratch_frame: Vec::new(),
-            amplitude_floor: 0.001,
+            vad: None,
+            cfg: VadConfig::default(),
         }
     }
 
     pub fn with_hangover_frames(mut self, frames: usize) -> Self {
-        self.hangover_frames = frames;
+        self.cfg.hangover_frames = frames;
+        self
+    }
+
+    pub fn with_amplitude_floor(mut self, floor: f32) -> Self {
+        self.cfg.amplitude_floor = floor;
         self
     }
 
@@ -40,68 +37,15 @@ impl<S> ContinuousVadMaskStream<S> {
             return;
         }
 
-        let frame_size = hypr_vad3::choose_optimal_frame_size(chunk.len());
-        debug_assert!(frame_size > 0, "VAD frame size must be > 0");
-
-        for frame in chunk.chunks_mut(frame_size) {
-            self.process_frame(frame, frame_size);
-        }
-    }
-
-    fn smooth_vad_decision(&mut self, raw_is_speech: bool) -> bool {
-        if raw_is_speech {
-            self.in_speech = true;
-            self.trailing_non_speech = 0;
-            true
-        } else if self.in_speech && self.trailing_non_speech < self.hangover_frames {
-            self.trailing_non_speech += 1;
-            true
-        } else {
-            self.in_speech = false;
-            self.trailing_non_speech = 0;
-            false
-        }
-    }
-
-    fn process_frame(&mut self, frame: &mut [f32], frame_size: usize) {
-        if frame.is_empty() {
-            return;
-        }
+        let vad = self
+            .vad
+            .get_or_insert_with(|| StreamingVad::with_config(chunk.len(), self.cfg.clone()));
 
-        let rms = Self::calculate_rms(frame);
-        if rms < self.amplitude_floor {
-            let is_speech = self.smooth_vad_decision(false);
+        vad.process_in_place(chunk, |frame, is_speech| {
             if !is_speech {
                 frame.fill(0.0);
             }
-            return;
-        }
-
-        let raw_is_speech = if frame.len() == frame_size {
-            let i16_samples = f32_to_i16_samples(frame);
-            self.vad.predict_16khz(&i16_samples).unwrap_or(true)
-        } else {
-            self.scratch_frame.clear();
-            self.scratch_frame.extend_from_slice(frame);
-            self.scratch_frame.resize(frame_size, 0.0);
-
-            let i16_samples = f32_to_i16_samples(&self.scratch_frame);
-            self.vad.predict_16khz(&i16_samples).unwrap_or(true)
-        };
-
-        let is_speech = self.smooth_vad_decision(raw_is_speech);
-
-        if !is_speech {
-            frame.fill(0.0);
-        }
-    }
-
-    fn calculate_rms(samples: &[f32]) -> f32 {
-        if samples.is_empty() {
-            return 0.0;
-        }
-        let sum_sq: f32 = samples.iter().map(|&s| s * s).sum();
-        (sum_sq / samples.len() as f32).sqrt()
+        });
     }
 }
 
@@ -194,8 +138,6 @@ mod tests {
             }
         }
 
-        // We should not *introduce* any non-zero samples, and the vast majority
-        // of silence should stay zero.
         let non_zero_count = masked_samples.iter().filter(|&&s| s != 0.0).count();
         assert!(
             non_zero_count < 100,
@@ -204,48 +146,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_hangover_logic() {
-        // Use an empty inner stream; we only care about the internal state machine.
-        let mut vad_stream = ContinuousVadMaskStream::new(stream::empty::<Result<Vec<f32>, ()>>());
-        vad_stream.hangover_frames = 3;
-
-        // Initial state is conservative: in_speech = true
-        assert!(vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 0);
-
-        // Simulate raw VAD decisions: T, F, F, F, F
-        // First: raw speech
-        assert!(vad_stream.smooth_vad_decision(true));
-        assert!(vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 0);
-
-        // First false: still treated as speech (hangover 1/3)
-        assert!(vad_stream.smooth_vad_decision(false));
-        assert!(vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 1);
-
-        // Second false: still speech (hangover 2/3)
-        assert!(vad_stream.smooth_vad_decision(false));
-        assert!(vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 2);
-
-        // Third false: still speech (hangover 3/3)
-        assert!(vad_stream.smooth_vad_decision(false));
-        assert!(vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 3);
-
-        // Fourth false: now we finally flip to non-speech
-        assert!(!vad_stream.smooth_vad_decision(false));
-        assert!(!vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 0);
-
-        // More false: stays non-speech
-        assert!(!vad_stream.smooth_vad_decision(false));
-        assert!(!vad_stream.in_speech);
-        assert_eq!(vad_stream.trailing_non_speech, 0);
-    }
-
     #[tokio::test]
     async fn test_different_chunk_sizes() {
         let input_audio = rodio::Decoder::new(std::io::BufReader::new(
@@ -316,7 +216,6 @@ mod tests {
 
     #[test]
     fn test_frame_size_selection() {
-        // Sanity-check assumptions about the VAD helper we're using.
         assert_eq!(hypr_vad3::choose_optimal_frame_size(160), 160);
         assert_eq!(hypr_vad3::choose_optimal_frame_size(320), 320);
         assert_eq!(hypr_vad3::choose_optimal_frame_size(480), 480);
 
@@ -1,10 +1,12 @@
 mod continuous;
 mod continuous2;
 mod error;
+mod streaming;
 
 pub use continuous::*;
 pub use continuous2::*;
 pub use error::*;
+pub use streaming::*;
 
 #[cfg(test)]
 pub mod tests {