//! Voice Integration - Transcription and TTS via liquid-rust
//!
//! Provides voice capabilities for the dashboard:
//! - Speech-to-text transcription with salience analysis
//! - Speaker recognition via Phoenix Protocol
//! - Text-to-speech with multiple voice personas
//!
//! Requires the `voice` feature flag and liquid-rust models.
//! Currently returns "not implemented" stubs until liquid-rust is integrated.
use axum::{
extract::Multipart,
http::StatusCode,
Json,
};
use serde::{Deserialize, Serialize};
/// Transcription result with salience and speaker info
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TranscriptionResult {
/// Transcribed text
pub text: String,
/// Salience score (0.0 to 1.0) - how important/urgent
pub salience: f32,
/// Identified speaker (if registered in Phoenix DB)
pub speaker: Option<String>,
/// Speaker identification confidence
pub speaker_confidence: Option<f32>,
/// Emotional profile
pub emotion: Option<EmotionProfile>,
}
/// Emotional profile from voice analysis
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmotionProfile {
/// Valence: positive (1.0) to negative (-1.0)
pub valence: f32,
/// Arousal: excited (1.0) to calm (0.0)
pub arousal: f32,
/// Voice stability (0.0 to 1.0)
pub stability: f32,
}
/// TTS request
#[derive(Debug, Deserialize)]
pub struct SpeakRequest {
/// Text to speak
pub text: String,
/// Voice persona to use
#[serde(default = "default_voice")]
pub voice: String,
}
fn default_voice() -> String {
"aye".to_string()
}
/// Speaker registration request
#[derive(Debug, Deserialize)]
pub struct RegisterSpeakerRequest {
/// Label for the speaker (e.g., "Hue")
pub label: String,
}
// =============================================================================
// Stub API Handlers (until liquid-rust is integrated)
// =============================================================================
/// Transcribe uploaded audio
///
/// POST /api/voice/transcribe
/// Content-Type: multipart/form-data
///
/// Returns: TranscriptionResult with text, salience, and optional speaker ID
pub async fn transcribe(
mut _multipart: Multipart,
) -> Result<Json<TranscriptionResult>, (StatusCode, String)> {
// TODO: Enable when liquid-rust is integrated
// For now, return a stub response
Err((
StatusCode::NOT_IMPLEMENTED,
"Voice transcription requires liquid-rust integration. \
See docs/plans/2025-11-11-realtime-collaborative-dashboard-design.md"
.to_string(),
))
}
/// Register a speaker for Phoenix Protocol recognition
///
/// POST /api/voice/register
/// Content-Type: multipart/form-data
/// Fields: label (text), audio (file)
pub async fn register_speaker(
mut _multipart: Multipart,
) -> Result<Json<serde_json::Value>, (StatusCode, String)> {
Err((
StatusCode::NOT_IMPLEMENTED,
"Speaker registration requires liquid-rust integration.".to_string(),
))
}
/// Generate speech from text using TTS
///
/// POST /api/voice/speak
/// Content-Type: application/json
/// Body: { "text": "Hello", "voice": "aye" }
///
/// Available voices: aye, omnimom, claude, alert, sky, adam, bella, nicole, michael
pub async fn speak(
Json(_req): Json<SpeakRequest>,
) -> Result<impl axum::response::IntoResponse, (StatusCode, String)> {
Err::<([(axum::http::header::HeaderName, &str); 1], Vec<u8>), _>((
StatusCode::NOT_IMPLEMENTED,
"TTS requires liquid-rust integration.".to_string(),
))
}
// =============================================================================
// Future: Full implementation when liquid-rust is integrated
// =============================================================================
//
// When liquid-rust is ready, this module will provide:
//
// 1. VoiceEngine struct holding:
// - LfmModel for transcription
// - PhoenixSpeakerDB for speaker recognition
// - TtsEngine for text-to-speech
//
// 2. Real implementations of:
// - transcribe() -> Decode audio, run inference, analyze salience
// - register_speaker() -> Add voice to Phoenix DB
// - speak() -> Generate WAV audio from text
//
// 3. Integration with dashboard state:
// - Voice hints sent via WebSocket
// - Salience metrics displayed in UI
// - Speaker identification in activity log
//
// See ../liquid-rust/examples/aye_ears.rs for reference implementation.