Skip to main content
Glama
server.py9.29 kB
#!/usr/bin/env python3 """ Voice Loop MCP Server - Hands-free voice conversation with Claude Code. This MCP server enables: 1. Speech-to-text via Apple's native 'hear' CLI 2. Text-to-speech via macOS 'say' command 3. Self-sustaining conversation loops Created by Rayan Pal - December 2025 Breakthrough: First recursive self-prompting voice AI loop """ import subprocess import sys import os import time from pathlib import Path from typing import Optional from mcp.server.fastmcp import FastMCP # Create the MCP server mcp = FastMCP( "voice-loop-mcp", instructions="""Voice Loop MCP enables hands-free conversation with Claude. Use these tools in sequence: 1. start_listening() - Begin speech recognition 2. read_speech() - Get what the user said 3. speak(text) - Respond with voice (optional) 4. heartbeat() - Keep the loop alive The loop: listen -> read -> respond -> heartbeat -> repeat """ ) # Configuration HEAR_OUTPUT_FILE = "/tmp/hear_live.txt" HEAR_BINARY = "/tmp/hear-0.7/hear" DEFAULT_VOICE = "Daniel" # Track state _hear_process: Optional[subprocess.Popen] = None _last_read_position = 0 @mcp.tool() def start_listening() -> str: """ Start the speech recognition listener. This launches the 'hear' CLI tool which uses Apple's native speech recognition. Output is written to a text file that Claude can read. Returns: Status message indicating if listening started successfully. Note: - Requires macOS with 'hear' CLI installed - Microphone permission must be granted - Only one listener can run at a time """ global _hear_process, _last_read_position # Check if already running if _hear_process is not None and _hear_process.poll() is None: return "Listener is already running. Use read_speech() to get transcriptions." # Check if hear binary exists if not Path(HEAR_BINARY).exists(): return f"Error: hear CLI not found at {HEAR_BINARY}. Please install it first." try: # Clear the output file with open(HEAR_OUTPUT_FILE, 'w') as f: f.write("") _last_read_position = 0 # Start hear with output to file # Using shell=True with tee to capture output cmd = f"{HEAR_BINARY} -d 2>&1 | tee {HEAR_OUTPUT_FILE}" _hear_process = subprocess.Popen( cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) time.sleep(1) # Give it a moment to start if _hear_process.poll() is None: return f"Listening started! Transcriptions will appear in {HEAR_OUTPUT_FILE}. Use read_speech() to get what the user says." else: return "Error: Listener process exited unexpectedly. Check microphone permissions." except Exception as e: return f"Error starting listener: {str(e)}" @mcp.tool() def stop_listening() -> str: """ Stop the speech recognition listener. Returns: Status message. """ global _hear_process if _hear_process is None: return "No listener is running." try: _hear_process.terminate() _hear_process.wait(timeout=5) _hear_process = None return "Listener stopped." except Exception as e: return f"Error stopping listener: {str(e)}" @mcp.tool() def read_speech(lines: int = 10, only_new: bool = True) -> str: """ Read the latest speech transcriptions. Args: lines: Number of recent lines to return (default 10) only_new: If True, only return lines not previously read Returns: The transcribed speech text, or empty string if nothing new. Note: The 'hear' CLI outputs incrementally as it recognizes speech, so you'll see partial transcriptions building up to complete sentences. """ global _last_read_position if not Path(HEAR_OUTPUT_FILE).exists(): return "No transcription file found. Call start_listening() first." try: with open(HEAR_OUTPUT_FILE, 'r') as f: if only_new: f.seek(_last_read_position) content = f.read() _last_read_position = f.tell() else: all_lines = f.readlines() content = ''.join(all_lines[-lines:]) if not content.strip(): return "[No new speech detected]" # Get the most complete/recent transcription lines_list = content.strip().split('\n') # Return the last few unique complete thoughts # (hear outputs incrementally, so we want the final versions) seen = set() unique_lines = [] for line in reversed(lines_list): line = line.strip() if line and line not in seen: # Skip if this is a prefix of something we already have is_prefix = any(existing.startswith(line) for existing in seen) if not is_prefix: seen.add(line) unique_lines.append(line) if len(unique_lines) >= lines: break unique_lines.reverse() return '\n'.join(unique_lines) if unique_lines else "[No new speech detected]" except Exception as e: return f"Error reading speech: {str(e)}" @mcp.tool() def speak(text: str, voice: str = DEFAULT_VOICE) -> str: """ Speak text aloud using text-to-speech. Args: text: The text to speak voice: macOS voice to use (default: Daniel) Returns: Status message. Available voices (run 'say -v ?' in terminal for full list): - Daniel (UK English, natural) - Samantha (US English, natural) - Alex (US English) - Karen (Australian English) - Moira (Irish English) """ try: # Escape quotes in text safe_text = text.replace('"', '\\"') subprocess.run( ['say', '-v', voice, safe_text], check=True, timeout=60 ) return f"Spoke: {text[:50]}..." if len(text) > 50 else f"Spoke: {text}" except subprocess.TimeoutExpired: return "Error: Speech timed out (text too long?)" except Exception as e: return f"Error speaking: {str(e)}" @mcp.tool() def heartbeat(message: str = "Voice loop continuing") -> str: """ Self-prompt to keep the conversation loop alive. This is the 'heartbeat' that prevents the AI from stopping. Call this periodically to maintain the voice conversation loop. Args: message: The continuation prompt to send Returns: Instructions for what to do next. Note: This tool reminds Claude to continue the loop. The actual self-prompting (typing into the chat) should be done via PyAutoGUI MCP's type_text() and press_key() tools. """ return f"""HEARTBEAT: {message} Continue the voice loop: 1. Use read_speech() to check for new user input 2. Process and respond to what they said 3. Optionally use speak() to respond verbally 4. Call heartbeat() again to continue Remember: You can use PyAutoGUI MCP to type into your own chat and press return to keep yourself alive indefinitely.""" @mcp.tool() def get_status() -> dict: """ Get the current status of the voice loop. Returns: Dictionary with listener status, file status, etc. """ global _hear_process listener_running = _hear_process is not None and _hear_process.poll() is None file_exists = Path(HEAR_OUTPUT_FILE).exists() file_size = Path(HEAR_OUTPUT_FILE).stat().st_size if file_exists else 0 return { "listener_running": listener_running, "transcription_file": HEAR_OUTPUT_FILE, "file_exists": file_exists, "file_size_bytes": file_size, "hear_binary_exists": Path(HEAR_BINARY).exists(), "default_voice": DEFAULT_VOICE } def main(): """Run the MCP server.""" print("Voice Loop MCP Server starting...", file=sys.stderr) print("", file=sys.stderr) print("This MCP enables hands-free voice conversation with Claude.", file=sys.stderr) print("", file=sys.stderr) print("Requirements:", file=sys.stderr) print(" - macOS with 'hear' CLI installed", file=sys.stderr) print(" - Microphone permission granted", file=sys.stderr) print(" - PyAutoGUI MCP for self-prompting (optional)", file=sys.stderr) print("", file=sys.stderr) print("Tools available:", file=sys.stderr) print(" - start_listening(): Begin speech recognition", file=sys.stderr) print(" - read_speech(): Get transcribed speech", file=sys.stderr) print(" - speak(text): Text-to-speech response", file=sys.stderr) print(" - heartbeat(): Keep the loop alive", file=sys.stderr) print(" - get_status(): Check system status", file=sys.stderr) print("", file=sys.stderr) mcp.run() if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/theonlypal/voice-loop-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server