Speech MCP
by Kvadratni
{
"project": {
"name": "speech-mcp",
"description": "Speech MCP Server with command-line interface and Kokoro TTS support",
"version": "1.0.10",
"type": "goose_extension",
"python_min_version": "3.10"
},
"important": [
"Never commit without user permission",
"Always show changes first for approval",
"Don't run server directly - requires installation",
"Must be installed in Goose to test",
"Always look for logs in ~/.speech-mcp/logs"
],
"components": [
{
"name": "server",
"path": "src/speech_mcp/server.py",
"purpose": "MCP server with FastMCP integration, manages state and UI process"
},
{
"name": "ui",
"path": "src/speech_mcp/ui/",
"purpose": "PyQt UI with audio visualization, voice selection, and TTS"
},
{
"name": "tts_adapters",
"path": "src/speech_mcp/tts_adapters/",
"purpose": "TTS engine adapters with fallback support"
},
{
"name": "config",
"path": "src/speech_mcp/config/",
"purpose": "Configuration system for user preferences"
},
{
"name": "process_manager",
"path": "src/speech_mcp/process_manager.py",
"purpose": "Process lifecycle management with auto-recovery"
}
],
"api": [
{
"name": "start_conversation",
"purpose": "Start UI and begin listening",
"returns": "User speech transcription"
},
{
"name": "reply",
"purpose": "Speak text and listen for response",
"params": ["text: string to speak"],
"returns": "User response transcription"
},
{
"name": "narrate_conversation",
"purpose": "Generate multi-speaker audio files",
"params": [
"script: JSON or markdown script defining the conversation",
"output_path: Path to save the audio file",
"script_format: 'json' or 'markdown' (default: 'json')",
"temp_dir: Optional directory for temporary files"
],
"returns": "Success message with conversation summary"
}
],
"dependencies": {
"main": [
"mcp[cli]>=1.2.0",
"PyQt5>=5.15.0",
"faster-whisper>=0.10.0",
"SpeechRecognition>=3.10.0",
"numpy>=1.24.0",
"pyaudio>=0.2.13",
"pydub>=0.25.1",
"pyttsx3>=2.90",
"psutil>=5.9.0",
"kokoro>=0.8.4",
"soundfile",
"torch",
"misaki[en]",
"ffmpeg>=1.4",
"pydantic>=2.7.2,<3.0.0"
],
"optional": {
"ja": ["misaki[ja]"],
"zh": ["misaki[zh]"],
"all": ["speech-mcp[ja,zh]"]
}
},
"features": {
"ui": "PyQt dark theme with audio visualization and animations",
"tts": "54+ voices via Kokoro with persistence",
"stt": "Local speech recognition with faster-whisper and SpeechRecognition fallback"
},
"install": {
"deeplink": "goose://extension?cmd=uvx&arg=speech-mcp&id=speech_mcp&name=Speech%20Interface&description=Voice%20interaction%20with%20audio%20visualization%20for%20Goose",
"cli": "goose session --with-extension \"uvx speech-mcp\"",
"kokoro": "pip install speech-mcp[kokoro]",
"all": "pip install speech-mcp[all]"
},
"logs": ["~/.speech-mcp/logs/speech-mcp.log", "~/.speech-mcp/logs/speech-mcp-server.log", "~/.speech-mcp/logs/speech-mcp-ui.log"],
"config_file": "~/.config/speech-mcp/config.json"
}