//! Audio capture implementation using cpal.
use anyhow::Result;
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use cpal::{Device, SampleFormat, Stream, StreamConfig};
use crossbeam_channel::Sender;
use std::sync::Arc;
use parking_lot::Mutex;
/// Size of waveform ring buffer
const WAVEFORM_SIZE: usize = 2048;
/// Audio capture stream.
pub struct CaptureStream {
stream: Stream,
config: StreamConfig,
}
impl CaptureStream {
/// List available input devices.
pub fn list_devices() -> Vec<String> {
let host = cpal::default_host();
host.input_devices()
.map(|devices| {
devices
.filter_map(|d| d.name().ok())
.collect()
})
.unwrap_or_default()
}
/// Create a new capture stream.
///
/// - `capture_buffer`: Buffer for transcription (only filled when is_recording is true)
/// - `waveform_buffer`: Ring buffer for visualization (always filled)
/// - `is_recording`: Flag to control transcription recording
/// - `level_tx`: Channel to send audio levels for visualization
pub fn new(
capture_buffer: Arc<Mutex<Vec<f32>>>,
waveform_buffer: Arc<Mutex<Vec<f32>>>,
is_recording: Arc<Mutex<bool>>,
level_tx: Sender<f32>,
) -> Result<Self> {
let host = cpal::default_host();
// List all devices for debugging
tracing::info!("Available input devices:");
if let Ok(devices) = host.input_devices() {
for device in devices {
if let Ok(name) = device.name() {
tracing::info!(" - {}", name);
}
}
}
// Try to find a hardware microphone first (bypassing virtual audio devices)
// Prefer "Arctis" or "Microphone" but avoid "Sonar" virtual devices
let device = host.input_devices()
.ok()
.and_then(|mut devices| {
devices.find(|d| {
if let Ok(name) = d.name() {
let lower = name.to_lowercase();
// Prefer hardware devices, skip virtual ones
(lower.contains("arctis") || lower.contains("microphone"))
&& !lower.contains("sonar")
&& !lower.contains("virtual")
} else {
false
}
})
})
.or_else(|| host.default_input_device())
.ok_or_else(|| anyhow::anyhow!("No input device available"))?;
let config = device.default_input_config()?;
let sample_format = config.sample_format();
let config: StreamConfig = config.into();
tracing::info!(
"Using input device: {} ({:?} @ {}Hz, {} channels)",
device.name().unwrap_or_default(),
sample_format,
config.sample_rate.0,
config.channels
);
let stream = match sample_format {
SampleFormat::F32 => build_stream::<f32>(&device, &config, capture_buffer, waveform_buffer, is_recording, level_tx)?,
SampleFormat::I16 => build_stream::<i16>(&device, &config, capture_buffer, waveform_buffer, is_recording, level_tx)?,
SampleFormat::U16 => build_stream::<u16>(&device, &config, capture_buffer, waveform_buffer, is_recording, level_tx)?,
_ => return Err(anyhow::anyhow!("Unsupported sample format: {:?}", sample_format)),
};
Ok(Self { stream, config })
}
/// Start the capture stream.
pub fn start(&self) -> Result<()> {
self.stream.play()?;
Ok(())
}
/// Pause the capture stream.
#[allow(dead_code)]
pub fn pause(&self) -> Result<()> {
self.stream.pause()?;
Ok(())
}
/// Get the sample rate.
pub fn sample_rate(&self) -> u32 {
self.config.sample_rate.0
}
}
fn build_stream<T>(
device: &Device,
config: &StreamConfig,
capture_buffer: Arc<Mutex<Vec<f32>>>,
waveform_buffer: Arc<Mutex<Vec<f32>>>,
is_recording: Arc<Mutex<bool>>,
level_tx: Sender<f32>,
) -> Result<Stream>
where
T: cpal::Sample + cpal::SizedSample,
f32: cpal::FromSample<T>,
{
let err_fn = |err| tracing::error!("Audio stream error: {}", err);
let channels = config.channels as usize;
let stream = device.build_input_stream(
config,
move |data: &[T], _: &cpal::InputCallbackInfo| {
// Convert to mono f32 and calculate RMS
let mut sum_sq = 0.0f32;
let mut mono_samples = Vec::with_capacity(data.len() / channels);
for frame in data.chunks(channels) {
// Average channels to mono
let mono: f32 = frame
.iter()
.map(|s| <f32 as cpal::FromSample<T>>::from_sample_(*s))
.sum::<f32>()
/ channels as f32;
mono_samples.push(mono);
sum_sq += mono * mono;
}
// Always update waveform buffer (ring buffer behavior)
{
let mut waveform = waveform_buffer.lock();
for sample in &mono_samples {
// Shift left and add new sample
if waveform.len() >= WAVEFORM_SIZE {
waveform.remove(0);
}
waveform.push(*sample);
}
}
// Only fill capture buffer when recording
if *is_recording.lock() {
let mut capture = capture_buffer.lock();
capture.extend_from_slice(&mono_samples);
}
// Send audio level (amplified for visibility)
let rms = (sum_sq / mono_samples.len().max(1) as f32).sqrt();
// Amplify and clamp for better visualization
let level = (rms * 10.0).min(1.0);
let _ = level_tx.try_send(level);
},
err_fn,
None,
)?;
Ok(stream)
}