Skip to main content
Glama

Voice Mode

by mbailey
devices.py10.9 kB
"""Audio device management tools.""" import logging from typing import Optional import sounddevice as sd from voice_mode.server import mcp from voice_mode.shared import startup_initialization logger = logging.getLogger("voicemode") @mcp.tool() async def check_audio_devices() -> str: """List available audio input and output devices""" try: devices = sd.query_devices() input_devices = [] output_devices = [] for i, device in enumerate(devices): if device['max_input_channels'] > 0: input_devices.append(f" [{i}] {device['name']} ({device['max_input_channels']} channels)") if device['max_output_channels'] > 0: output_devices.append(f" [{i}] {device['name']} ({device['max_output_channels']} channels)") default_input = sd.query_devices(kind='input') default_output = sd.query_devices(kind='output') result = [] result.append("Audio Devices:") result.append(f"\nDefault Input: [{default_input['index']}] {default_input['name']}") result.append(f"Default Output: [{default_output['index']}] {default_output['name']}") result.append("\n\nInput Devices:") result.extend(input_devices) result.append("\n\nOutput Devices:") result.extend(output_devices) return "\n".join(result) except Exception as e: logger.error(f"Error listing audio devices: {e}") return f"Error listing audio devices: {str(e)}" @mcp.tool() async def voice_status() -> str: """Check the status of all voice services including TTS, STT, LiveKit, and audio devices. IMPORTANT: Only use this tool for debugging when voice services fail. The system has automatic failover, so try using services directly first. This tool is for troubleshooting only. Provides a unified view of the voice infrastructure configuration and health. """ from voice_mode.provider_discovery import provider_registry from voice_mode.config import TTS_BASE_URLS, STT_BASE_URLS try: # Ensure registry is initialized await provider_registry.initialize() status_lines = ["Voice Service Status:"] status_lines.append("=" * 50) # TTS Endpoints status_lines.append("\nTTS Endpoints:") for url in TTS_BASE_URLS: endpoint_info = provider_registry.registry["tts"].get(url) if endpoint_info: # Check if there's an error if endpoint_info.last_error: status_lines.append(f" ❌ {url}") status_lines.append(f" Error: {endpoint_info.last_error}") else: status_lines.append(f" ✅ {url}") status_lines.append(f" Models: {', '.join(endpoint_info.models[:3]) if endpoint_info.models else 'none'}") status_lines.append(f" Voices: {len(endpoint_info.voices)} available") else: status_lines.append(f" ⚪ {url} (not discovered)") # STT Endpoints status_lines.append("\nSTT Endpoints:") for url in STT_BASE_URLS: endpoint_info = provider_registry.registry["stt"].get(url) if endpoint_info: # Check if there's an error if endpoint_info.last_error: status_lines.append(f" ❌ {url}") status_lines.append(f" Error: {endpoint_info.last_error}") else: status_lines.append(f" ✅ {url}") status_lines.append(f" Models: {', '.join(endpoint_info.models) if endpoint_info.models else 'none'}") else: status_lines.append(f" ⚪ {url} (not discovered)") # Configuration from voice_mode.config import ( TTS_VOICES, TTS_MODELS, PREFER_LOCAL, AUTO_START_KOKORO, AUDIO_FEEDBACK_ENABLED, LIVEKIT_URL ) status_lines.append("\nConfiguration:") status_lines.append(f" Preferred Voices: {', '.join(TTS_VOICES[:3])}{'...' if len(TTS_VOICES) > 3 else ''}") status_lines.append(f" Preferred Models: {', '.join(TTS_MODELS)}") status_lines.append(f" Prefer Local: {PREFER_LOCAL}") status_lines.append(f" Auto-start Kokoro: {AUTO_START_KOKORO}") status_lines.append(f" Audio Feedback: {'Enabled' if AUDIO_FEEDBACK_ENABLED else 'Disabled'}") status_lines.append(f" LiveKit URL: {LIVEKIT_URL}") # Audio devices try: default_input = sd.query_devices(kind='input') default_output = sd.query_devices(kind='output') status_lines.append("\nAudio Devices:") status_lines.append(f" Input: {default_input['name']}") status_lines.append(f" Output: {default_output['name']}") except: status_lines.append("\nAudio Devices: Unable to query") return "\n".join(status_lines) except Exception as e: logger.error(f"Error getting voice status: {e}") return f"Error getting voice status: {str(e)}" @mcp.tool() async def list_tts_voices(provider: Optional[str] = None) -> str: """List available TTS voices for different providers. Args: provider: Optional provider name ('openai' or 'kokoro'). If not specified, lists all available voices. Returns: A formatted list of available voices by provider. """ await startup_initialization() results = [] results.append("🔊 AVAILABLE TTS VOICES") results.append("=" * 40) # Determine which providers to check providers_to_check = [] if provider: if provider.lower() not in ['openai', 'kokoro']: return f"Error: Unknown provider '{provider}'. Valid options: 'openai', 'kokoro'" providers_to_check = [provider.lower()] else: providers_to_check = ['openai', 'kokoro'] # OpenAI voices if 'openai' in providers_to_check: results.append("\n📢 OpenAI Voices") results.append("-" * 40) # Standard voices (work with all models) results.append("\n**Standard Voices** (tts-1, tts-1-hd):") standard_voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] for voice in standard_voices: results.append(f" • {voice}") # Enhanced voices for gpt-4o-mini-tts results.append("\n**Enhanced Voices** (gpt-4o-mini-tts):") enhanced_voices = ['alloy', 'echo', 'shimmer'] for voice in enhanced_voices: results.append(f" • {voice} - supports emotional expression") results.append("\n**Voice Characteristics**:") voice_descriptions = { 'alloy': 'Natural and conversational (default)', 'echo': 'Smooth and conversational', 'fable': 'British accent, authoritative', 'onyx': 'Deep and authoritative', 'nova': 'Warm and friendly', 'shimmer': 'Expressive and engaging' } for voice, desc in voice_descriptions.items(): results.append(f" • {voice}: {desc}") # Kokoro voices if 'kokoro' in providers_to_check: results.append("\n\n🎭 Kokoro Voices (67 voices available)") results.append("-" * 40) results.append("\n**Voice Naming Convention**:") results.append(" • af_ = American Female") results.append(" • am_ = American Male") results.append(" • bf_ = British Female") results.append(" • bm_ = British Male") results.append(" • ef_ = Spanish Female") results.append(" • em_ = Spanish Male") results.append(" • ff_ = French Female") results.append(" • hf_ = Hindi Female") results.append(" • hm_ = Hindi Male") results.append(" • if_ = Italian Female") results.append(" • im_ = Italian Male") results.append(" • jf_ = Japanese Female") results.append(" • jm_ = Japanese Male") results.append(" • pf_ = Portuguese Female") results.append(" • pm_ = Portuguese Male") results.append(" • zf_ = Chinese Female") results.append(" • zm_ = Chinese Male") results.append("\n**Recommended Voices**:") kokoro_recommended = { 'af_sky': 'American Female - Natural and expressive (default)', 'af_sarah': 'American Female - Warm and friendly', 'am_adam': 'American Male - Clear and professional', 'bf_emma': 'British Female - Sophisticated accent', 'bm_george': 'British Male - Distinguished accent', 'ef_dora': 'Spanish Female - Clear and expressive', 'em_alex': 'Spanish Male - Natural voice' } for voice, desc in kokoro_recommended.items(): results.append(f" • {voice}: {desc}") results.append("\n**All Available Voices by Language**:") # American voices results.append("\n American Female (18 voices):") results.append(" af_alloy, af_aoede, af_bella, af_heart, af_jadzia, af_jessica,") results.append(" af_kore, af_nicole, af_nova, af_river, af_sarah, af_sky,") results.append(" af_v0, af_v0bella, af_v0irulan, af_v0nicole, af_v0sarah, af_v0sky") results.append("\n American Male (12 voices):") results.append(" am_adam, am_echo, am_eric, am_fenrir, am_liam, am_michael,") results.append(" am_onyx, am_puck, am_santa, am_v0adam, am_v0gurney, am_v0michael") # British voices results.append("\n British Female (5 voices):") results.append(" bf_alice, bf_emma, bf_lily, bf_v0emma, bf_v0isabella") results.append("\n British Male (6 voices):") results.append(" bm_daniel, bm_fable, bm_george, bm_lewis, bm_v0george, bm_v0lewis") # Other languages results.append("\n Spanish (3 voices): ef_dora, em_alex, em_santa") results.append(" French (1 voice): ff_siwis") results.append(" Hindi (4 voices): hf_alpha, hf_beta, hm_omega, hm_psi") results.append(" Italian (2 voices): if_sara, im_nicola") results.append(" Japanese (5 voices): jf_alpha, jf_gongitsune, jf_nezumi, jf_tebukuro, jm_kumo") results.append(" Portuguese (3 voices): pf_dora, pm_alex, pm_santa") results.append(" Chinese (8 voices): zf_xiaobei, zf_xiaoni, zf_xiaoxiao, zf_xiaoyi,") results.append(" zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang") return "\n".join(results)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mbailey/voicemode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server