LocalVoiceMode

mcp_server.py•11.4 KiB

#!/usr/bin/env python3 """ MCP Server for LocalVoiceMode ----------------------------- Integrates voice mode with any AI assistant - runs invisibly in background. Tools: - speak: Speak text aloud (TTS) - listen: Listen for speech (STT) - converse: Speak and listen for response - start_voice: Start voice chat (runs headlessly) - stop_voice: Stop voice chat - voice_status: Check if voice mode is running - list_voices: List available character skills """ import os import sys import json import signal import subprocess import re from pathlib import Path from typing import Optional # Ensure we're in the right directory SCRIPT_DIR = Path(__file__).parent.absolute() os.chdir(SCRIPT_DIR) sys.path.insert(0, str(SCRIPT_DIR)) from mcp.server.fastmcp import FastMCP # Initialize MCP server mcp = FastMCP( "localvoicemode", instructions="""LocalVoiceMode - hands-free voice chat integration. Commands: - speak: Speak text aloud (TTS) - listen: Listen for speech (STT) - converse: Speak and listen for response - start_voice: Begin voice conversation (runs in background) - stop_voice: End voice conversation - voice_status: Check if voice is active - list_voices: See available characters - provider_status: Show available LLM providers Supports auto-detection of: - LM Studio (local, ports 1234/1235/1236/8080) - OpenRouter (if OPENROUTER_API_KEY is set) - OpenAI (if OPENAI_API_KEY is set) Voice commands while running: - Say "stop" or "goodbye" to end - Say "change voice" to switch characters """, ) # Track running voice process _voice_process: Optional[subprocess.Popen] = None def _get_api_settings() -> tuple[str, Optional[str], Optional[str]]: """Get API URL, key, and model - auto-detects LM Studio, OpenRouter, or OpenAI.""" import httpx # First: Try LM Studio on common ports for port in [1234, 1235, 1236, 8080, 5000]: try: resp = httpx.get(f"http://localhost:{port}/v1/models", timeout=0.5) if resp.status_code == 200: api_url = f"http://localhost:{port}/v1" data = resp.json() model = None if data.get("data"): model = data["data"][0].get("id") return api_url, None, model except: continue # Second: Check OpenRouter openrouter_key = os.environ.get("OPENROUTER_API_KEY") if openrouter_key: return "https://openrouter.ai/api/v1", openrouter_key, "deepseek/deepseek-v3.2" # Third: Check OpenAI openai_key = os.environ.get("OPENAI_API_KEY") if openai_key: return "https://api.openai.com/v1", openai_key, "gpt-4o" # Last resort: return localhost return "http://localhost:1234/v1", None, None @mcp.tool() def list_voices() -> str: """List available voice characters/skills.""" try: from voice_client import SkillLoader, Config cfg = Config() loader = SkillLoader(cfg.skills_dir, cfg.voice_refs_dir) skills = loader.list_skills() if not skills: return "No voice characters found." result = "Available voice characters:\n\n" for skill in skills: result += f"• **{skill['name']}** (`{skill['id']}`)\n" result += f" {skill['description'][:80]}\n\n" return result except Exception as e: return f"Error: {e}" @mcp.tool() def start_voice(skill: str = "assistant") -> str: """Start voice chat mode. Runs invisibly in the background. Speak naturally and get voice responses. """ global _voice_process if _voice_process is not None and _voice_process.poll() is None: return "Voice mode is already running. Say 'stop' to end, or use stop_voice()." api_url, api_key, model = _get_api_settings() python_exe = SCRIPT_DIR / ".venv" / "Scripts" / "python.exe" if not python_exe.exists(): python_exe = SCRIPT_DIR / ".venv" / "bin" / "python" if not python_exe.exists(): return "Error: Virtual environment not found. Run setup first." cmd = [ str(python_exe), str(SCRIPT_DIR / "voice_client.py"), "--headless", "--skill", skill, "--api-url", api_url, ] if api_key: cmd.extend(["--api-key", api_key]) if model: cmd.extend(["--model", model]) try: startupinfo = None creationflags = 0 if sys.platform == "win32": startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.wShowWindow = subprocess.SW_HIDE creationflags = subprocess.CREATE_NO_WINDOW _voice_process = subprocess.Popen( cmd, cwd=str(SCRIPT_DIR), startupinfo=startupinfo, creationflags=creationflags, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) return f"Voice mode started with **{skill}**." except Exception as e: return f"Failed to start voice mode: {e}" @mcp.tool() def stop_voice() -> str: """Stop voice chat mode.""" global _voice_process if _voice_process is None: return "Voice mode is not running." try: if sys.platform == "win32": _voice_process.terminate() else: _voice_process.send_signal(signal.SIGINT) _voice_process = None return "Voice mode stopped." except Exception as e: _voice_process = None return f"Error stopping voice mode: {e}" @mcp.tool() def voice_status() -> str: """Check voice mode status including current mode. Returns whether voice mode is running and provides available commands. When running, voice mode supports four modes: - full_voice: Listening + speaking (default) - tts_only: Speaking only - stt_only: Listening only - silent: Paused Switch modes by saying: 'stop listening', 'stop talking', 'full voice', 'go silent' """ global _voice_process if _voice_process is None: return "Voice mode is **not running**.\n\nUse `start_voice()` to begin." if _voice_process.poll() is not None: exit_code = _voice_process.returncode _voice_process = None return f"Voice mode **ended** (exit code: {exit_code}).\n\nUse `start_voice()` to begin again." # Get current settings api_url, _, model = _get_api_settings() return f"""Voice mode is **running**. **Voice Commands:** - Say **"stop listening"** - TTS only mode (no mic) - Say **"stop talking"** - STT only mode (no voice) - Say **"full voice"** - Resume both - Say **"go silent"** - Pause all - Say **"stop"** or **"goodbye"** - End session API: {api_url} Model: {model or 'default'}""" @mcp.tool() def set_voice_mode(mode: str) -> str: """Set voice mode for mid-session control. Changes how voice interaction works without restarting: - full_voice: Both listening and speaking enabled (default) - tts_only: Speaking only, not listening (for reading aloud) - stt_only: Listening only, no voice output (for dictation) - silent: Neither listening nor speaking (pause mode) Aliases: 'full', 'tts', 'stt', 'mute' Args: mode: Target mode (full_voice, tts_only, stt_only, silent) Returns: JSON status with mode guidance and voice commands """ global _voice_process # Check if voice mode is running if _voice_process is None or _voice_process.poll() is not None: return json.dumps({ "error": "Voice mode is not running. Use start_voice() first.", "requested_mode": mode, }) # Voice mode runs in a subprocess - guide users to voice commands return json.dumps({ "note": "Voice mode runs in a separate process. Use voice commands instead:", "commands": { "tts_only": "Say 'stop listening'", "stt_only": "Say 'stop talking' or 'be quiet'", "full_voice": "Say 'full voice' or 'unmute'", "silent": "Say 'go silent'", }, "requested_mode": mode, }) @mcp.tool() def provider_status() -> str: """Show available LLM providers.""" try: from voice_client import ProviderManager providers = ProviderManager.detect_all() if not providers: return "No LLM providers available." result = "**Available LLM Providers:**\n\n" for p in providers: status = "✓" if p.available else "✗" result += f"- [{status}] **{p.name}** ({p.model or 'unknown'})\n" return result except Exception as e: return f"Error: {e}" # Global TTS/ASR engine instances _tts_engine = None _asr_engine = None def _get_tts_engine(): global _tts_engine if _tts_engine is None: from voice_client import TTSEngine _tts_engine = TTSEngine() return _tts_engine def _get_asr_engine(): global _asr_engine if _asr_engine is None: from voice_client import ASREngine _asr_engine = ASREngine() return _asr_engine @mcp.tool() def speak(text: str, voice: str = "default") -> str: """Speak text aloud using Pocket TTS. Args: text: The text to speak aloud voice: Voice to use - 'default' or a custom voice """ try: tts = _get_tts_engine() # Load voice if voice != "default": voice_path = SCRIPT_DIR / "voice_references" / f"{voice}.wav" if voice_path.exists(): tts.load_voice(voice_path, voice) else: tts.load_voice(voice_name=voice) else: tts.load_voice(voice_name="default") # Speak the message tts.speak(text) return f"Spoke: {text[:50]}..." if len(text) > 50 else f"Spoke: {text}" except Exception as e: return f"Error in speak: {e}" @mcp.tool() def listen(max_duration: float = 30.0) -> str: """Listen for speech and transcribe to text. Args: max_duration: Maximum duration to listen in seconds """ try: from voice_client import AudioRecorder recorder = AudioRecorder() asr = _get_asr_engine() audio = recorder.record_vad(max_duration=max_duration) if len(audio) < 1600: return "[No speech detected]" text = asr.transcribe(audio) return text if text else "[Could not transcribe]" except Exception as e: return f"Error in listen: {e}" @mcp.tool() def converse(message: str, voice: str = "default", wait_for_response: bool = True) -> str: """Speak a message and optionally listen for a response. Args: message: The message to speak voice: Voice to use wait_for_response: Whether to listen for a response after speaking """ try: speak_res = speak(message, voice) if not wait_for_response: return speak_res listen_res = listen() return f"{speak_res}\n\nUser response: {listen_res}" except Exception as e: return f"Error in converse: {e}" @mcp.tool() def service(service_name: str, action: str = "status") -> str: """Manage voice services (tts, stt, all).""" global _tts_engine, _asr_engine results = [] if service_name in ("tts", "all"): results.append(f"TTS: {'Ready' if _tts_engine else 'Not loaded'}") if service_name in ("stt", "all"): results.append(f"STT: {'Ready' if _asr_engine else 'Not loaded'}") return "\n".join(results) if __name__ == "__main__": mcp.run()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DevMan57/voiceblitz-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

mcp_server.py•11.4 KiB