Voice Mode

voicemode
voice_mode

config.py•51.5 KiB

""" Configuration and shared utilities for Voicemode Server. This module contains all configuration constants, global state, initialization functions, and shared utilities used across the voicemode server. """ import os import logging import asyncio import subprocess from pathlib import Path from typing import Dict, Optional from datetime import datetime # ==================== ENVIRONMENT CONFIGURATION ==================== def find_voicemode_env_files() -> list[Path]: """ Find .voicemode.env files by walking up the directory tree. Looks for (in order of priority - closest to current directory wins): 1. .voicemode.env in current or parent directories 2. .voicemode/voicemode.env in current or parent directories 3. ~/.voicemode/voicemode.env in user home (global config) Returns: List of Path objects in loading order (global first, then project-specific) """ config_files = [] # First add global config (lowest priority - loaded first) global_config = Path.home() / ".voicemode" / "voicemode.env" # Backwards compatibility: check for old filename if not global_config.exists(): old_global = Path.home() / ".voicemode" / ".voicemode.env" if old_global.exists(): global_config = old_global if global_config.exists(): config_files.append(global_config) # Then walk up directory tree for project-specific configs (higher priority) current_dir = Path.cwd() project_configs = [] while current_dir != current_dir.parent: # Check for standalone .voicemode.env first standalone_file = current_dir / ".voicemode.env" if standalone_file.exists(): project_configs.append(standalone_file) break # Stop at first found (closest wins) # Then check .voicemode/voicemode.env dir_file = current_dir / ".voicemode" / "voicemode.env" # Skip if this is the global config file (already added) if dir_file.exists() and dir_file != global_config: project_configs.append(dir_file) break # Stop at first found (closest wins) current_dir = current_dir.parent # Add project configs (they were collected closest-first, so add as-is) config_files.extend(project_configs) return config_files def load_voicemode_env(): """Load configuration from voicemode.env files, with cascading from global to project-specific.""" config_files = find_voicemode_env_files() # If no config files found, create default global config if not config_files: default_path = Path.home() / ".voicemode" / "voicemode.env" default_path.parent.mkdir(parents=True, exist_ok=True) default_config = '''# Voice Mode Configuration File # This file is automatically generated and can be customized # Environment variables always take precedence over this file ############# # Core Configuration ############# # Base directory for all voicemode data (default: ~/.voicemode) # VOICEMODE_BASE_DIR=~/.voicemode # Models directory (default: ~/.voicemode/models) # VOICEMODE_MODELS_DIR=~/.voicemode/models # Enable debug mode (true/false) # VOICEMODE_DEBUG=false # Enable VAD debug logging (true/false) # VOICEMODE_VAD_DEBUG=false # Save all audio and transcriptions (true/false) # VOICEMODE_SAVE_ALL=false # Save audio files (true/false) # VOICEMODE_SAVE_AUDIO=false # Save transcription files (true/false) # VOICEMODE_SAVE_TRANSCRIPTIONS=false # Skip TTS for faster text-only responses (true/false) # VOICEMODE_SKIP_TTS=false # Metrics output level in converse results (minimal/summary/verbose) # - minimal: Just the response text, no timing (saves tokens) # - summary: Response + compact timing string (default) # - verbose: Response + detailed metrics breakdown # VOICEMODE_METRICS_LEVEL=summary # Enable audio feedback chimes (true/false) # VOICEMODE_AUDIO_FEEDBACK=true # Enable sound fonts for tool use hooks (true/false, default: true) # VOICEMODE_SOUNDFONTS_ENABLED=true ############# # Tool Loading Configuration ############# # Control which MCP tools are loaded to reduce token usage # Whitelist mode - only load specified tools (most efficient) # VOICEMODE_TOOLS_ENABLED=converse,service # Blacklist mode - load all tools except specified ones # VOICEMODE_TOOLS_DISABLED=pronunciation_add,pronunciation_remove # Examples: # Minimal (just voice conversation): VOICEMODE_TOOLS_ENABLED=converse # Voice + config: VOICEMODE_TOOLS_ENABLED=converse,service,config_get,config_set # Load all tools: VOICEMODE_TOOLS_DISABLED= # All except pronunciation: VOICEMODE_TOOLS_DISABLED=pronunciation_add,pronunciation_remove,pronunciation_list # Default: converse,service (basic voice interaction and service management) ############# # Provider Configuration ############# # Comma-separated list of TTS endpoints # VOICEMODE_TTS_BASE_URLS=http://127.0.0.1:8880/v1,https://api.openai.com/v1 # Comma-separated list of STT endpoints # VOICEMODE_STT_BASE_URLS=http://127.0.0.1:2022/v1,https://api.openai.com/v1 # STT prompt for vocabulary biasing - helps Whisper recognize names and technical terms # Use when specific words are consistently misrecognized # Example: VOICEMODE_STT_PROMPT=tmux, Tali, kubectl, VoiceMode # VOICEMODE_STT_PROMPT= # Comma-separated list of preferred voices # VOICEMODE_VOICES=af_sky,alloy # Comma-separated list of preferred models # VOICEMODE_TTS_MODELS=tts-1,tts-1-hd,gpt-4o-mini-tts # Prefer local providers over cloud (true/false) # VOICEMODE_PREFER_LOCAL=true # Always attempt local providers (true/false) # VOICEMODE_ALWAYS_TRY_LOCAL=true # Auto-start Kokoro service (true/false) # VOICEMODE_AUTO_START_KOKORO=false ############# # Whisper Configuration ############# # Whisper model to use (tiny, base, small, medium, large, large-v2, large-v3) # VOICEMODE_WHISPER_MODEL=base # Whisper server port (default: 2022) # VOICEMODE_WHISPER_PORT=2022 # Number of threads for Whisper processing (auto-detected if not set) # VOICEMODE_WHISPER_THREADS= # Language for transcription (auto, en, es, fr, de, it, pt, ru, zh, ja, ko, etc.) # VOICEMODE_WHISPER_LANGUAGE=auto # Path to Whisper models # VOICEMODE_WHISPER_MODEL_PATH=~/.voicemode/services/whisper/models ############# # Kokoro Configuration ############# # Kokoro server port (default: 8880) # VOICEMODE_KOKORO_PORT=8880 # Directory for Kokoro models # VOICEMODE_KOKORO_MODELS_DIR=~/.voicemode/models/kokoro # Directory for Kokoro cache # VOICEMODE_KOKORO_CACHE_DIR=~/.voicemode/cache/kokoro # Default Kokoro voice # VOICEMODE_KOKORO_DEFAULT_VOICE=af_sky # Max requests before Kokoro worker restarts (mitigates memory leak) # See: https://github.com/hexgrad/kokoro/issues/152 # VOICEMODE_KOKORO_MAX_REQUESTS=200 ############# # Recording & Voice Activity Detection ############# # Default maximum listening duration in seconds (default: 120) # VOICEMODE_DEFAULT_LISTEN_DURATION=120.0 # Disable silence detection for noisy environments (true/false) # VOICEMODE_DISABLE_SILENCE_DETECTION=false # VAD aggressiveness level 0-3, higher = more strict (default: 3) # VOICEMODE_VAD_AGGRESSIVENESS=3 # Silence threshold in milliseconds before stopping (default: 1000) # VOICEMODE_SILENCE_THRESHOLD_MS=1000 # Minimum recording duration in seconds (default: 0.5) # VOICEMODE_MIN_RECORDING_DURATION=0.5 # Initial silence grace period before VAD starts (default: 1.0) # VOICEMODE_INITIAL_SILENCE_GRACE_PERIOD=1.0 # Audio feedback chime timing # Silence before chime in seconds - helps Bluetooth devices wake up (default: 0.1) # VOICEMODE_CHIME_LEADING_SILENCE=0.1 # Silence after chime in seconds - prevents cutoff (default: 0.2) # VOICEMODE_CHIME_TRAILING_SILENCE=0.2 ############# # Audio Format Configuration ############# # Global audio format: pcm, opus, mp3, wav, flac, aac (default: pcm) # VOICEMODE_AUDIO_FORMAT=pcm # TTS-specific format override (default: pcm for optimal streaming) # VOICEMODE_TTS_AUDIO_FORMAT=pcm # STT-specific format override (default: mp3 if global format is pcm, otherwise uses global format) # VOICEMODE_STT_AUDIO_FORMAT=mp3 # Format-specific quality settings # VOICEMODE_OPUS_BITRATE=32000 # VOICEMODE_MP3_BITRATE=64k # VOICEMODE_AAC_BITRATE=64k ############# # Streaming Configuration ############# # Enable streaming playback for lower latency (true/false, default: true) # VOICEMODE_STREAMING_ENABLED=true # Download chunk size in bytes (default: 4096) # VOICEMODE_STREAM_CHUNK_SIZE=4096 # Initial buffer before playback starts in milliseconds (default: 150) # VOICEMODE_STREAM_BUFFER_MS=150 # Maximum buffer size in seconds (default: 2.0) # VOICEMODE_STREAM_MAX_BUFFER=2.0 ############# # Event Logging ############# # Enable comprehensive event logging (true/false, default: true) # VOICEMODE_EVENT_LOG_ENABLED=true # Event log directory (default: ~/.voicemode/logs/events) # VOICEMODE_EVENT_LOG_DIR=~/.voicemode/logs/events # Log rotation policy (currently only 'daily' supported) # VOICEMODE_EVENT_LOG_ROTATION=daily ############# # Pronunciation System ############# # Enable pronunciation middleware (true/false, default: true) # VOICEMODE_PRONUNCIATION_ENABLED=true # Default pronunciation rules - common technical terms # Format: DIRECTION pattern replacement # description # See docs for full format details VOICEMODE_PRONOUNCE=" TTS \\bJSON\\b jason # JSON as jason TTS \\bYAML\\b yammel # YAML as yammel TTS \\bAPI\\b A P I # API as individual letters " # Add custom rules with VOICEMODE_PRONOUNCE_* variables # VOICEMODE_PRONOUNCE_CUSTOM= # Log pronunciation substitutions for debugging (true/false, default: false) # VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS=false ############# # Think Out Loud Mode (Experimental) ############# # Enable multi-voice thinking mode (true/false, default: false) # VOICEMODE_THINK_OUT_LOUD=false # Voice persona mappings for thinking roles (role:voice pairs, comma-separated) # VOICEMODE_THINKING_VOICES=analytical:am_adam,creative:af_sarah,critical:af_bella,synthesis:af_nova # Thinking presentation style: sequential, debate, or chorus (default: sequential) # VOICEMODE_THINKING_STYLE=sequential # Announce which voice is speaking (true/false, default: true) # VOICEMODE_THINKING_ANNOUNCE_VOICE=true ############# # Service Management ############# # Auto-enable services after installation (true/false, default: true) # VOICEMODE_SERVICE_AUTO_ENABLE=true ############# # HTTP Serve Configuration ############# # Host/IP address to bind the server to (default: 127.0.0.1) # VOICEMODE_SERVE_HOST=127.0.0.1 # Port to bind the server to (default: 8765) # VOICEMODE_SERVE_PORT=8765 # Transport protocol: streamable-http or sse (default: streamable-http) # VOICEMODE_SERVE_TRANSPORT=streamable-http # Security: Allow connections from local/private IP ranges (default: true) # VOICEMODE_SERVE_ALLOW_LOCAL=true # Security: Allow connections from Anthropic IP ranges for Claude Cowork (default: false) # VOICEMODE_SERVE_ALLOW_ANTHROPIC=false # Security: Allow connections from Tailscale IP range 100.64.0.0/10 (default: false) # VOICEMODE_SERVE_ALLOW_TAILSCALE=false # Security: Additional allowed CIDR ranges (comma-separated) # VOICEMODE_SERVE_ALLOWED_IPS= # Authentication: URL secret path segment (e.g., /secret-path/mcp) # VOICEMODE_SERVE_SECRET= # Authentication: Bearer token for Authorization header # VOICEMODE_SERVE_TOKEN= ############# # Advanced Configuration ############# # Download progress style: auto, rich, simple (default: auto) # VOICEMODE_PROGRESS_STYLE=auto ############# # Credential Storage ############# # Where to store OAuth credentials (keyring or plaintext) # keyring uses the OS keychain (macOS Keychain, Linux Secret Service) # plaintext stores in ~/.voicemode/credentials (chmod 600) # VOICEMODE_CREDENTIAL_STORE=keyring ############# # API Keys (set these in your environment for security) ############# # OpenAI API key for cloud TTS/STT # OPENAI_API_KEY=your-key-here ''' with open(default_path, 'w') as f: f.write(default_config) os.chmod(default_path, 0o600) # Secure permissions config_files = [default_path] # Load configuration from all files in order (global first, project-specific last) for config_path in config_files: if config_path.exists(): with open(config_path, 'r') as f: lines = f.readlines() i = 0 while i < len(lines): line = lines[i].strip() # Skip comments and empty lines if not line or line.startswith('#'): i += 1 continue # Parse KEY=VALUE format if '=' in line: key, value = line.split('=', 1) key = key.strip() value = value.strip() # Handle multiline quoted values if value and value[0] in ('"', "'"): quote_char = value[0] # Check if the quote is closed on the same line if len(value) > 1 and value[-1] == quote_char: # Single line quoted value - strip quotes value = value[1:-1] else: # Multiline quoted value - collect lines until closing quote value_parts = [value[1:]] # Start after opening quote i += 1 while i < len(lines): next_line = lines[i].rstrip('\n') if next_line.endswith(quote_char): # Found closing quote value_parts.append(next_line[:-1]) break else: value_parts.append(next_line) i += 1 value = '\n'.join(value_parts) # Only set if not already in environment (env vars take precedence) if key and key not in os.environ: os.environ[key] = value i += 1 # Load configuration file before other configuration load_voicemode_env() # Helper function to parse boolean environment variables def env_bool(env_var: str, default: bool = False) -> bool: """Parse boolean from environment variable.""" value = os.getenv(env_var, "").lower() return value in ("true", "1", "yes", "on") if value else default # Helper function to expand paths with tilde def expand_path(path_str: str) -> Path: """Expand tilde and environment variables in path strings.""" # First expand any environment variables expanded = os.path.expandvars(path_str) # Then expand tilde expanded = os.path.expanduser(expanded) return Path(expanded) # Base directory for all voicemode data BASE_DIR = expand_path(os.getenv("VOICEMODE_BASE_DIR", str(Path.home() / ".voicemode"))) # Unified directory structure AUDIO_DIR = BASE_DIR / "audio" TRANSCRIPTIONS_DIR = BASE_DIR / "transcriptions" LOGS_DIR = BASE_DIR / "logs" # CONFIG_DIR = BASE_DIR / "config" # Removed - config stored in .voicemode.env file instead MODELS_DIR = expand_path(os.getenv("VOICEMODE_MODELS_DIR", str(BASE_DIR / "models"))) # Debug configuration DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes", "on") TRACE_DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() == "trace" VAD_DEBUG = os.getenv("VOICEMODE_VAD_DEBUG", "").lower() in ("true", "1", "yes", "on") DEBUG_DIR = LOGS_DIR / "debug" # Debug files now go under logs # Master save-all configuration SAVE_ALL = os.getenv("VOICEMODE_SAVE_ALL", "").lower() in ("true", "1", "yes", "on") # Audio saving configuration # Enable if SAVE_ALL is true, DEBUG is true, or individually enabled SAVE_AUDIO = SAVE_ALL or DEBUG or os.getenv("VOICEMODE_SAVE_AUDIO", "").lower() in ("true", "1", "yes", "on") SAVE_TRANSCRIPTIONS = SAVE_ALL or DEBUG or os.getenv("VOICEMODE_SAVE_TRANSCRIPTIONS", "").lower() in ("true", "1", "yes", "on") # Audio feedback configuration AUDIO_FEEDBACK_ENABLED = os.getenv("VOICEMODE_AUDIO_FEEDBACK", "true").lower() in ("true", "1", "yes", "on") # Skip TTS configuration (skip text-to-speech for faster responses) SKIP_TTS = os.getenv("VOICEMODE_SKIP_TTS", "false").lower() in ("true", "1", "yes", "on") # TTS speed configuration (0.25-4.0, default None uses provider default) TTS_SPEED = float(os.getenv("VOICEMODE_TTS_SPEED")) if os.getenv("VOICEMODE_TTS_SPEED") else None # Metrics output level configuration (minimal/summary/verbose) # - minimal: Just the response text, no timing # - summary: Response + compact timing string (default) # - verbose: Response + detailed metrics breakdown _metrics_level = os.getenv("VOICEMODE_METRICS_LEVEL", "summary").lower() METRICS_LEVEL = _metrics_level if _metrics_level in ("minimal", "summary", "verbose") else "summary" # Local provider preference configuration PREFER_LOCAL = os.getenv("VOICEMODE_PREFER_LOCAL", "true").lower() in ("true", "1", "yes", "on") # Always try local providers (don't mark them as permanently unavailable) ALWAYS_TRY_LOCAL = os.getenv("VOICEMODE_ALWAYS_TRY_LOCAL", "true").lower() in ("true", "1", "yes", "on") # Use simple failover without health checks # Simple failover is now the only mode - configuration removed # Auto-start configuration AUTO_START_KOKORO = os.getenv("VOICEMODE_AUTO_START_KOKORO", "").lower() in ("true", "1", "yes", "on") # ==================== CONCH CONFIGURATION ==================== # The conch is a coordination mechanism for multi-agent voice conversations # Only the agent holding the conch may speak # Enable/disable the conch system entirely CONCH_ENABLED = os.getenv("VOICEMODE_CONCH_ENABLED", "true").lower() in ("true", "1", "yes", "on") # Maximum time (seconds) to wait for conch when wait_for_conch=true CONCH_TIMEOUT = float(os.getenv("VOICEMODE_CONCH_TIMEOUT", "60")) # How often (seconds) to check if conch is free when waiting CONCH_CHECK_INTERVAL = float(os.getenv("VOICEMODE_CONCH_CHECK_INTERVAL", "0.5")) # Maximum age (seconds) before a lock is considered stale and can be forcibly released # This prevents stuck locks from blocking all voice interactions indefinitely # Should be longer than your typical conversation turn (listen + TTS + buffer) # Default 300s (5 min) covers 2 min listen + long TTS. Set to 0 to disable. CONCH_LOCK_EXPIRY = float(os.getenv("VOICEMODE_CONCH_LOCK_EXPIRY", "300")) # ==================== SERVICE CONFIGURATION ==================== # OpenAI configuration OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Helper function to parse comma-separated lists def parse_comma_list(env_var: str, fallback: str) -> list: """Parse comma-separated list from environment variable.""" value = os.getenv(env_var, fallback) return [item.strip() for item in value.split(",") if item.strip()] # New provider endpoint lists configuration TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1") STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1") TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy") TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts") # STT prompt for vocabulary biasing (helps with specialized terminology) # See: https://platform.openai.com/docs/guides/speech-to-text#prompting STT_PROMPT = os.getenv("VOICEMODE_STT_PROMPT", "") # Voice preferences cache _cached_voice_preferences: Optional[list] = None _voice_preferences_loaded = False def get_voice_preferences() -> list[str]: """ Get voice preferences from configuration. Uses the VOICEMODE_VOICES configuration which is loaded from: 1. Environment variables (highest priority) 2. Project-specific .voicemode.env files 3. Global ~/.voicemode/voicemode.env file 4. Built-in defaults Returns: List of voice names in preference order """ global _cached_voice_preferences, _voice_preferences_loaded # Return cached preferences if already loaded if _voice_preferences_loaded: return _cached_voice_preferences or [] _voice_preferences_loaded = True # Get voices from TTS_VOICES configuration _cached_voice_preferences = TTS_VOICES.copy() logger.info(f"Voice preferences loaded: {_cached_voice_preferences}") return _cached_voice_preferences def clear_voice_preferences_cache(): """Clear the voice preferences cache, forcing a reload on next access.""" global _cached_voice_preferences, _voice_preferences_loaded _cached_voice_preferences = None _voice_preferences_loaded = False logger.debug("Voice preferences cache cleared") def reload_configuration(): """Reload configuration from files and clear all caches.""" # Clear voice preferences cache clear_voice_preferences_cache() # Reload environment configuration load_voicemode_env() # Update global configuration variables global TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, STT_BASE_URLS TTS_BASE_URLS = parse_comma_list("VOICEMODE_TTS_BASE_URLS", "http://127.0.0.1:8880/v1,https://api.openai.com/v1") STT_BASE_URLS = parse_comma_list("VOICEMODE_STT_BASE_URLS", "http://127.0.0.1:2022/v1,https://api.openai.com/v1") TTS_VOICES = parse_comma_list("VOICEMODE_VOICES", "af_sky,alloy") TTS_MODELS = parse_comma_list("VOICEMODE_TTS_MODELS", "tts-1,tts-1-hd,gpt-4o-mini-tts") logger.info("Configuration reloaded successfully") # Legacy variables have been removed - use the new list-based configuration: # - VOICEMODE_TTS_BASE_URLS (comma-separated list) # - VOICEMODE_STT_BASE_URLS (comma-separated list) # - VOICEMODE_VOICES (comma-separated list) # - VOICEMODE_TTS_MODELS (comma-separated list) # ==================== WHISPER CONFIGURATION ==================== # Default Whisper model for installation and runtime DEFAULT_WHISPER_MODEL = "base" # Whisper-specific configuration WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", DEFAULT_WHISPER_MODEL) WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022")) WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto") WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(Path.home() / ".voicemode" / "services" / "whisper" / "models"))) # ==================== KOKORO CONFIGURATION ==================== # Kokoro-specific configuration KOKORO_PORT = int(os.getenv("VOICEMODE_KOKORO_PORT", "8880")) KOKORO_MODELS_DIR = expand_path(os.getenv("VOICEMODE_KOKORO_MODELS_DIR", str(BASE_DIR / "models" / "kokoro"))) KOKORO_CACHE_DIR = expand_path(os.getenv("VOICEMODE_KOKORO_CACHE_DIR", str(BASE_DIR / "cache" / "kokoro"))) KOKORO_DEFAULT_VOICE = os.getenv("VOICEMODE_KOKORO_DEFAULT_VOICE", "af_sky") KOKORO_MAX_REQUESTS = int(os.getenv("VOICEMODE_KOKORO_MAX_REQUESTS", "200")) # ==================== SERVICE MANAGEMENT CONFIGURATION ==================== # Auto-enable services after installation SERVICE_AUTO_ENABLE = env_bool("VOICEMODE_SERVICE_AUTO_ENABLE", True) # ==================== CONNECT CONFIGURATION ==================== CONNECT_ENABLED = env_bool("VOICEMODE_CONNECT_ENABLED", False) CONNECT_WS_URL = os.getenv("VOICEMODE_CONNECT_WS_URL", "wss://voicemode.dev/ws") CONNECT_USERS = [u.strip() for u in os.getenv("VOICEMODE_CONNECT_USERS", "").split(",") if u.strip()] AGENT_NAME = os.getenv("VOICEMODE_AGENT_NAME", "") HOST_ALIAS = os.getenv("VOICEMODE_HOST_ALIAS", "") # Derived: effective hostname for addressing import socket as _socket CONNECT_HOST = HOST_ALIAS or _socket.gethostname().split('.')[0] # Backward compatibility: support old env var names during migration if not CONNECT_ENABLED and env_bool("VOICEMODE_CONNECT_AUTO", False): CONNECT_ENABLED = True # ==================== SOUND FONTS CONFIGURATION ==================== # Sound fonts are enabled by default for audio feedback during tool calls # Set VOICEMODE_SOUNDFONTS_ENABLED=false to disable SOUNDFONTS_ENABLED = env_bool("VOICEMODE_SOUNDFONTS_ENABLED", True) # ==================== AUDIO CONFIGURATION ==================== # Audio parameters SAMPLE_RATE = 24000 # Standard TTS sample rate for both OpenAI and Kokoro CHANNELS = 1 # ==================== SILENCE DETECTION CONFIGURATION ==================== # Disable silence detection (useful for noisy environments) # Silence detection is enabled by default DISABLE_SILENCE_DETECTION = os.getenv("VOICEMODE_DISABLE_SILENCE_DETECTION", "false").lower() in ("true", "1", "yes", "on") # VAD (Voice Activity Detection) configuration VAD_AGGRESSIVENESS = int(os.getenv("VOICEMODE_VAD_AGGRESSIVENESS", "3")) # 0-3, higher = more aggressive SILENCE_THRESHOLD_MS = int(os.getenv("VOICEMODE_SILENCE_THRESHOLD_MS", "1000")) # Stop after 1000ms (1 second) of silence MIN_RECORDING_DURATION = float(os.getenv("VOICEMODE_MIN_RECORDING_DURATION", "0.5")) # Minimum 0.5s recording VAD_CHUNK_DURATION_MS = 30 # VAD frame size (must be 10, 20, or 30ms) INITIAL_SILENCE_GRACE_PERIOD = float(os.getenv("VOICEMODE_INITIAL_SILENCE_GRACE_PERIOD", "1")) # No initial silence grace period by default # Default listen duration for converse tool DEFAULT_LISTEN_DURATION = float(os.getenv("VOICEMODE_DEFAULT_LISTEN_DURATION", "120.0")) # Default 120s listening time # Repeat phrase detection for audio replay REPEAT_PHRASES = parse_comma_list("VOICEMODE_REPEAT_PHRASES", "repeat,say that again,pardon,what,come again") # Wait phrase detection for pausing conversation WAIT_PHRASES = parse_comma_list("VOICEMODE_WAIT_PHRASES", "wait") # Wait duration in seconds when wait phrase is detected WAIT_DURATION = float(os.getenv("VOICEMODE_WAIT_DURATION", "60.0")) # Default 60s (1 minute) # Audio feedback chime configuration # Leading silence before chimes to allow Bluetooth devices to wake up CHIME_LEADING_SILENCE = float(os.getenv("VOICEMODE_CHIME_LEADING_SILENCE", "0.1")) # Default 0.1s - minimal delay for Bluetooth # Trailing silence after chimes to prevent cutoff CHIME_TRAILING_SILENCE = float(os.getenv("VOICEMODE_CHIME_TRAILING_SILENCE", "0.2")) # Default 0.2s - reduced for responsiveness # Audio format configuration AUDIO_FORMAT = os.getenv("VOICEMODE_AUDIO_FORMAT", "pcm").lower() TTS_AUDIO_FORMAT = os.getenv("VOICEMODE_TTS_AUDIO_FORMAT", "pcm").lower() # Default to PCM for optimal streaming # STT upload format - compressed for bandwidth efficiency # Supported: mp3, wav, flac, m4a, ogg (must be supported by STT provider) # Default: mp3 (32kbps, ~90% bandwidth reduction vs WAV) STT_AUDIO_FORMAT = os.getenv("VOICEMODE_STT_AUDIO_FORMAT", "mp3" if AUDIO_FORMAT == "pcm" else AUDIO_FORMAT).lower() # STT save format - format for saved recordings when SAVE_AUDIO is enabled # Supported: wav, mp3, flac (wav recommended for full quality archival) # Default: wav (uncompressed, full quality) STT_SAVE_FORMAT = os.getenv("VOICEMODE_STT_SAVE_FORMAT", "wav").lower() # STT compression mode - controls when audio is compressed before upload # Options: # auto - Compress for remote endpoints, skip for local (default) # Saves ~200-800ms transcode time for local endpoints where # bandwidth isn't a bottleneck. Remote uploads benefit from # smaller file sizes (MP3 is ~90% smaller than WAV). # always - Always compress regardless of endpoint type # never - Never compress, always send WAV (highest quality, larger files) STT_COMPRESS = os.getenv("VOICEMODE_STT_COMPRESS", "auto").lower() # Validate STT_COMPRESS value if STT_COMPRESS not in ("auto", "always", "never"): STT_COMPRESS = "auto" # Supported audio formats SUPPORTED_AUDIO_FORMATS = ["pcm", "opus", "mp3", "wav", "flac", "aac"] SUPPORTED_SAVE_FORMATS = ["wav", "mp3", "flac"] # Formats suitable for saving recordings # Validate formats (validation messages will be logged after logger is initialized) if AUDIO_FORMAT not in SUPPORTED_AUDIO_FORMATS: _invalid_audio_format = AUDIO_FORMAT AUDIO_FORMAT = "pcm" if TTS_AUDIO_FORMAT not in SUPPORTED_AUDIO_FORMATS: _invalid_tts_format = TTS_AUDIO_FORMAT TTS_AUDIO_FORMAT = AUDIO_FORMAT if STT_AUDIO_FORMAT not in SUPPORTED_AUDIO_FORMATS: _invalid_stt_format = STT_AUDIO_FORMAT STT_AUDIO_FORMAT = AUDIO_FORMAT if STT_SAVE_FORMAT not in SUPPORTED_SAVE_FORMATS: _invalid_stt_save_format = STT_SAVE_FORMAT STT_SAVE_FORMAT = "wav" # Format-specific quality settings OPUS_BITRATE = int(os.getenv("VOICEMODE_OPUS_BITRATE", "32000")) # Default 32kbps for voice MP3_BITRATE = os.getenv("VOICEMODE_MP3_BITRATE", "32k") # Default 32kbps (optimal for speech per Whisper research) AAC_BITRATE = os.getenv("VOICEMODE_AAC_BITRATE", "64k") # Default 64kbps # ==================== STREAMING CONFIGURATION ==================== # Streaming playback configuration STREAMING_ENABLED = os.getenv("VOICEMODE_STREAMING_ENABLED", "true").lower() in ("true", "1", "yes", "on") STREAM_CHUNK_SIZE = int(os.getenv("VOICEMODE_STREAM_CHUNK_SIZE", "4096")) # Download chunk size STREAM_BUFFER_MS = int(os.getenv("VOICEMODE_STREAM_BUFFER_MS", "150")) # Initial buffer before playback STREAM_MAX_BUFFER = float(os.getenv("VOICEMODE_STREAM_MAX_BUFFER", "2.0")) # Max buffer in seconds # ==================== EVENT LOGGING CONFIGURATION ==================== # Event logging configuration # Event logs are enabled by default, or if SAVE_ALL is true EVENT_LOG_ENABLED = SAVE_ALL or os.getenv("VOICEMODE_EVENT_LOG_ENABLED", "true").lower() in ("true", "1", "yes", "on") EVENT_LOG_DIR = os.getenv("VOICEMODE_EVENT_LOG_DIR", str(LOGS_DIR / "events")) EVENT_LOG_ROTATION = os.getenv("VOICEMODE_EVENT_LOG_ROTATION", "daily") # Currently only daily is supported # ==================== GLOBAL STATE ==================== # Service management service_processes: Dict[str, subprocess.Popen] = {} # Concurrency control for audio operations # This prevents multiple audio operations from interfering with stdio audio_operation_lock = asyncio.Lock() # Flag to track if startup initialization has run _startup_initialized = False # ==================== LOGGING CONFIGURATION ==================== def setup_logging() -> logging.Logger: """Configure logging for the voice-mode server. Returns: Logger instance configured for voice-mode """ log_level = logging.DEBUG if DEBUG else logging.INFO logging.basicConfig( level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger("voicemode") # Trace logging setup if TRACE_DEBUG: import sys from datetime import datetime # Create debug log directory debug_log_dir = Path.home() / ".voicemode" / "logs" / "debug" debug_log_dir.mkdir(parents=True, exist_ok=True) # Create dated debug log file debug_log_file = debug_log_dir / f"voicemode_debug_{datetime.now().strftime('%Y-%m-%d')}.log" # Set up file handler for debug logs debug_handler = logging.FileHandler(debug_log_file, mode='a') debug_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) # Enable debug logging for httpx and openai httpx_logger = logging.getLogger("httpx") httpx_logger.setLevel(logging.DEBUG) httpx_logger.addHandler(debug_handler) openai_logger = logging.getLogger("openai") openai_logger.setLevel(logging.DEBUG) openai_logger.addHandler(debug_handler) # Also add to main logger logger.addHandler(debug_handler) logger.info(f"Trace debug logging enabled, writing to {debug_log_file}") # Legacy trace file support trace_file = Path.home() / "voicemode_trace.log" trace_logger = logging.getLogger("voicemode.trace") trace_handler = logging.FileHandler(trace_file, mode='a') trace_handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s')) trace_logger.addHandler(trace_handler) trace_logger.setLevel(logging.DEBUG) def trace_calls(frame, event, arg): if event == 'call': code = frame.f_code if 'voicemode' in code.co_filename or 'voice_mode' in code.co_filename: trace_logger.debug(f"Called {code.co_filename}:{frame.f_lineno} {code.co_name}") elif event == 'exception': trace_logger.debug(f"Exception: {arg}") return trace_calls sys.settrace(trace_calls) logger.info(f"Trace debugging enabled, writing to: {trace_file}") # Also log to file in debug mode if DEBUG: debug_log_file = Path.home() / "voicemode_debug.log" file_handler = logging.FileHandler(debug_log_file, mode='a') file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) logger.addHandler(file_handler) logger.info(f"Debug logging to file: {debug_log_file}") # Suppress verbose binary data in HTTP logs if DEBUG: # Keep our debug logs but reduce HTTP client verbosity logging.getLogger("openai._base_client").setLevel(logging.INFO) logging.getLogger("httpcore").setLevel(logging.INFO) logging.getLogger("httpx").setLevel(logging.INFO) return logger # ==================== DIRECTORY INITIALIZATION ==================== def initialize_directories(): """Create necessary directories for voicemode data storage.""" # Create base directory BASE_DIR.mkdir(exist_ok=True) # Create all subdirectories AUDIO_DIR.mkdir(exist_ok=True) TRANSCRIPTIONS_DIR.mkdir(exist_ok=True) LOGS_DIR.mkdir(exist_ok=True) # CONFIG_DIR.mkdir(exist_ok=True) # Removed - config stored in .voicemode.env file instead # Create subdirectories for logs if DEBUG: DEBUG_DIR.mkdir(parents=True, exist_ok=True) # Create events log directory if EVENT_LOG_ENABLED: Path(EVENT_LOG_DIR).mkdir(parents=True, exist_ok=True) # Initialize sound fonts if not present initialize_soundfonts() # ==================== SOUND FONTS INITIALIZATION ==================== def initialize_soundfonts(): """Install package sound fonts and set up soundfonts directory structure. Directory structure: ~/.voicemode/soundfonts/ voicemode/ - Package-managed soundfonts (synced from package) current -> voicemode - Relative symlink to active soundfont .version - Package version that last synced soundfonts Users can create custom soundfont directories and point 'current' to them. The 'voicemode' directory is synced only when the package version changes. """ from voice_mode.__version__ import __version__ soundfonts_dir = BASE_DIR / "soundfonts" package_soundfont_dir = soundfonts_dir / "voicemode" current_symlink = soundfonts_dir / "current" version_file = soundfonts_dir / ".version" # Migration: rename old 'default' directory to 'voicemode' old_default_dir = soundfonts_dir / "default" if old_default_dir.exists() and not package_soundfont_dir.exists(): try: old_default_dir.rename(package_soundfont_dir) # Update symlink if it pointed to default if current_symlink.is_symlink(): link_target = str(current_symlink.readlink()) if "default" in link_target: current_symlink.unlink() # Use relative symlink current_symlink.symlink_to("voicemode") except OSError: pass # Migration failed, will recreate below # Fix absolute symlinks: convert to relative if current_symlink.is_symlink(): try: link_target = str(current_symlink.readlink()) # If it's an absolute path pointing to voicemode, make it relative if link_target.startswith("/") and link_target.endswith("/voicemode"): current_symlink.unlink() current_symlink.symlink_to("voicemode") except OSError: pass # Check if sync is needed (version mismatch or missing) needs_sync = True if version_file.exists(): try: installed_version = version_file.read_text().strip() if installed_version == __version__: needs_sync = False except (IOError, OSError): pass # Can't read version, sync needed if not needs_sync: return # Skip sync, soundfonts already up to date try: # Create soundfonts directory soundfonts_dir.mkdir(exist_ok=True) # Sync package soundfonts to 'voicemode' directory incrementally # Only update files that are missing or different try: # For Python 3.9+ from importlib.resources import files package_soundfonts = files("voice_mode.data.soundfonts.default") if package_soundfonts.is_dir(): # Files/dirs to skip (Python package artifacts) skip_names = {"__init__.py", "__pycache__"} def sync_tree(src, dst, depth=0): """Sync directory tree, only updating changed files. Skips __init__.py and __pycache__ (Python package artifacts). Limits recursion depth to prevent runaway loops. """ if depth > 10: # Reasonable max depth for soundfonts return dst.mkdir(exist_ok=True) for item in src.iterdir(): # Skip Python package artifacts if item.name in skip_names: continue if item.is_file(): target = dst / item.name # Skip if destination is a symlink (could cause issues) if target.is_symlink(): continue new_content = item.read_bytes() # Only write if file doesn't exist or content differs if not target.exists(): target.write_bytes(new_content) else: try: existing_content = target.read_bytes() if existing_content != new_content: target.write_bytes(new_content) except (IOError, OSError): # Can't read existing, overwrite target.write_bytes(new_content) elif item.is_dir(): target_dir = dst / item.name # Skip if destination is a symlink (could be cycle) if target_dir.exists() and target_dir.is_symlink(): continue sync_tree(item, target_dir, depth + 1) # Sync tree structure incrementally sync_tree(package_soundfonts, package_soundfont_dir) # Update version file after successful sync version_file.write_text(__version__) except ImportError: # Fallback for older Python versions import pkg_resources package_soundfont_dir.mkdir(exist_ok=True) # List all resources in the soundfonts directory resource_dir = "data/soundfonts/default" if pkg_resources.resource_exists("voice_mode", resource_dir): # This is a bit more complex with pkg_resources # We'll need to manually copy the structure pass # Create relative symlink to current soundfont (points to voicemode) # Only create if it doesn't exist - user may have customized it if package_soundfont_dir.exists() and not current_symlink.exists(): try: # Use relative path, not absolute current_symlink.symlink_to("voicemode") except OSError: # Symlinks might not work on all systems (e.g., Windows without admin) pass except Exception as e: # Don't fail initialization if soundfonts can't be installed # They're optional and disabled by default if DEBUG: import logging logging.getLogger("voicemode").debug(f"Could not initialize soundfonts: {e}") # ==================== UTILITY FUNCTIONS ==================== def get_debug_filename(prefix: str, extension: str) -> str: """Generate a timestamped filename for debug files. Args: prefix: Prefix for the filename (e.g., 'stt-input', 'tts-output') extension: File extension (e.g., 'wav', 'mp3') Returns: Timestamped filename string """ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] return f"{prefix}_{timestamp}.{extension}" def get_project_path() -> str: """Get the current project path (git root or current working directory).""" try: # Try to get git root result = subprocess.run( ["git", "rev-parse", "--show-toplevel"], capture_output=True, text=True, cwd=os.getcwd() ) if result.returncode == 0: return result.stdout.strip() except Exception: pass # Fall back to current working directory return os.getcwd() def save_transcription(text: str, prefix: str = "transcript", metadata: Optional[Dict] = None) -> Optional[Path]: """Save a transcription to the transcriptions directory. Args: text: The transcription text to save prefix: Prefix for the filename (e.g., 'stt', 'conversation') metadata: Optional metadata to include at the top of the file Returns: Path to the saved file or None if saving is disabled """ if not SAVE_TRANSCRIPTIONS: return None try: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3] filename = f"{prefix}_{timestamp}.txt" filepath = TRANSCRIPTIONS_DIR / filename content = [] # Create metadata with project path if metadata is None: metadata = {} metadata["project_path"] = get_project_path() # Add metadata header content.append("--- METADATA ---") for key, value in metadata.items(): content.append(f"{key}: {value}") content.append("--- TRANSCRIPT ---") content.append("") content.append(text) filepath.write_text("\n".join(content), encoding="utf-8") logger.debug(f"Transcription saved to: {filepath}") return filepath except Exception as e: logger.error(f"Failed to save transcription: {e}") return None # ==================== SOUNDDEVICE WORKAROUND ==================== def disable_sounddevice_stderr_redirect(): """Comprehensively disable sounddevice's stderr redirection. This prevents sounddevice from redirecting stderr to /dev/null which can interfere with audio playback in MCP server context. """ try: import sounddevice as sd import sys import atexit # Method 1: Override _ignore_stderr in various locations if hasattr(sd, '_sounddevice'): if hasattr(sd._sounddevice, '_ignore_stderr'): sd._sounddevice._ignore_stderr = lambda: None if hasattr(sd, '_ignore_stderr'): sd._ignore_stderr = lambda: None # Method 2: Override _check_error if it exists if hasattr(sd, '_check'): original_check = sd._check def safe_check(*args, **kwargs): # Prevent any stderr manipulation return original_check(*args, **kwargs) sd._check = safe_check # Method 3: Protect file descriptors original_stderr = sys.stderr # Create a hook to prevent stderr replacement def protect_stderr(): if sys.stderr != original_stderr: sys.stderr = original_stderr # Install protection atexit.register(protect_stderr) except Exception as e: # Log but continue - audio might still work if DEBUG: # Can't use logger here as it's not initialized yet print(f"DEBUG: Could not fully disable sounddevice stderr redirect: {e}", file=sys.stderr) # ==================== HTTP CLIENT CONFIGURATION ==================== # HTTP client configuration for OpenAI clients HTTP_CLIENT_CONFIG = { 'timeout': { 'total': 30.0, 'connect': 5.0 }, 'limits': { 'max_keepalive_connections': 5, 'max_connections': 10 } } # ==================== INITIALIZATION ==================== # Initialize directories on module import initialize_directories() # Apply sounddevice workaround on module import disable_sounddevice_stderr_redirect() # Set up logger logger = setup_logging() # Log any format validation warnings if 'AUDIO_FORMAT' in locals() and '_invalid_audio_format' in locals(): logger.warning(f"Unsupported audio format '{_invalid_audio_format}', falling back to 'pcm'") if 'TTS_AUDIO_FORMAT' in locals() and '_invalid_tts_format' in locals(): logger.warning(f"Unsupported TTS audio format '{_invalid_tts_format}', falling back to '{AUDIO_FORMAT}'") if 'STT_AUDIO_FORMAT' in locals() and '_invalid_stt_format' in locals(): logger.warning(f"Unsupported STT audio format '{_invalid_stt_format}', falling back to '{AUDIO_FORMAT}'") # ==================== AUDIO FORMAT UTILITIES ==================== def get_provider_supported_formats(provider: str, operation: str = "tts") -> list: """Get list of audio formats supported by a provider. Args: provider: Provider name (e.g., 'openai', 'kokoro', 'whisper-local') operation: 'tts' or 'stt' Returns: List of supported format strings """ # Provider format capabilities # Based on API documentation and testing provider_formats = { # TTS providers "openai": { "tts": ["opus", "mp3", "aac", "flac", "wav", "pcm"], "stt": ["mp3", "opus", "wav", "flac", "m4a", "webm"] }, "kokoro": { "tts": ["mp3", "opus", "flac", "wav", "pcm"], # AAC is not currently supported "stt": [] # Kokoro is TTS only }, # STT providers "whisper-local": { "tts": [], # Whisper is STT only "stt": ["wav", "mp3", "opus", "flac", "m4a"] }, "openai-whisper": { "tts": [], # Whisper is STT only "stt": ["mp3", "opus", "wav", "flac", "m4a", "webm"] } } provider_info = provider_formats.get(provider, {}) return provider_info.get(operation, []) def validate_audio_format(format: str, provider: str, operation: str = "tts") -> str: """Validate and potentially adjust audio format based on provider capabilities. Args: format: Requested audio format provider: Provider name operation: 'tts' or 'stt' Returns: Valid format for the provider (may differ from requested) """ supported = get_provider_supported_formats(provider, operation) if not supported: logger.warning(f"Provider '{provider}' does not support {operation} operation") return format if format in supported: return format # Fallback logic - prefer common formats fallback_order = ["opus", "mp3", "wav"] for fallback in fallback_order: if fallback in supported: logger.info(f"Format '{format}' not supported by {provider}, using '{fallback}' instead") return fallback # Last resort - use first supported format first_supported = supported[0] logger.warning(f"Using {provider}'s first supported format: {first_supported}") return first_supported def get_audio_loader_for_format(format: str): """Get the appropriate AudioSegment loader for a format. Args: format: Audio format string Returns: AudioSegment method reference or None """ from pydub import AudioSegment format_loaders = { "mp3": AudioSegment.from_mp3, "wav": AudioSegment.from_wav, "opus": AudioSegment.from_ogg, # Opus uses OGG container "flac": AudioSegment.from_file if not hasattr(AudioSegment, 'from_flac') else AudioSegment.from_flac, "aac": AudioSegment.from_file, # Generic loader for AAC "m4a": AudioSegment.from_file, # Generic loader for M4A "webm": AudioSegment.from_file, # Generic loader for WebM "ogg": AudioSegment.from_ogg, "pcm": AudioSegment.from_raw # Requires additional parameters } return format_loaders.get(format) def get_format_export_params(format: str) -> dict: """Get export parameters for a specific audio format. Args: format: Audio format string Returns: Dict with export parameters for pydub """ params = { "format": format } if format == "mp3": params["bitrate"] = MP3_BITRATE elif format == "opus": # Opus in OGG container params["format"] = "opus" # pydub uses 'opus' for OGG/Opus params["parameters"] = ["-b:a", str(OPUS_BITRATE)] elif format == "aac": params["bitrate"] = AAC_BITRATE elif format == "flac": # FLAC is lossless, no bitrate setting pass elif format == "wav": # WAV is uncompressed, no bitrate setting pass return params # ==================== SERVE COMMAND CONFIGURATION ==================== # Host/IP address to bind the server to (default: 127.0.0.1) SERVE_HOST = os.getenv("VOICEMODE_SERVE_HOST", "127.0.0.1") # Port to bind the server to (default: 8765) SERVE_PORT = int(os.getenv("VOICEMODE_SERVE_PORT", "8765")) # Allow connections from local/private IP ranges (default: true) SERVE_ALLOW_LOCAL = env_bool("VOICEMODE_SERVE_ALLOW_LOCAL", True) # Allow connections from Anthropic IP ranges for Claude Cowork (default: false) SERVE_ALLOW_ANTHROPIC = env_bool("VOICEMODE_SERVE_ALLOW_ANTHROPIC", False) # Allow connections from Tailscale IP range 100.64.0.0/10 (default: false) SERVE_ALLOW_TAILSCALE = env_bool("VOICEMODE_SERVE_ALLOW_TAILSCALE", False) # Additional allowed CIDR ranges (comma-separated, default: empty) SERVE_ALLOWED_IPS = os.getenv("VOICEMODE_SERVE_ALLOWED_IPS", "") # URL secret path segment for authentication (default: empty/disabled) SERVE_SECRET = os.getenv("VOICEMODE_SERVE_SECRET", "") # Bearer token for authentication (default: empty/disabled) SERVE_TOKEN = os.getenv("VOICEMODE_SERVE_TOKEN", "") # Transport protocol (streamable-http or sse) SERVE_TRANSPORT = os.getenv("VOICEMODE_SERVE_TRANSPORT", "streamable-http") # ==================== THINK OUT LOUD CONFIGURATION ==================== # Enable Think Out Loud mode THINK_OUT_LOUD_ENABLED = env_bool("VOICEMODE_THINK_OUT_LOUD", False) # Voice persona mappings for thinking roles (role:voice pairs) # Default: analytical:am_adam,creative:af_sarah,critical:af_bella,synthesis:af_nova THINKING_VOICES_STR = os.getenv( "VOICEMODE_THINKING_VOICES", "analytical:am_adam,creative:af_sarah,critical:af_bella,synthesis:af_nova" ) # Parse thinking voices into a dictionary THINKING_VOICES = {} for pair in THINKING_VOICES_STR.split(","): if ":" in pair: role, voice = pair.strip().split(":", 1) THINKING_VOICES[role.strip()] = voice.strip() # Thinking presentation style: sequential, debate, or chorus THINKING_STYLE = os.getenv("VOICEMODE_THINKING_STYLE", "sequential") # Whether to announce which voice is speaking THINKING_ANNOUNCE_VOICE = env_bool("VOICEMODE_THINKING_ANNOUNCE_VOICE", True)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mbailey/voicemode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

config.py•51.5 KiB