#!/usr/bin/env python3
"""
Voice Loop MCP Server - Hands-free voice conversation with Claude Code.
This MCP server enables:
1. Speech-to-text via Apple's native 'hear' CLI
2. Text-to-speech via macOS 'say' command
3. Self-sustaining conversation loops
Created by Rayan Pal - December 2025
Breakthrough: First recursive self-prompting voice AI loop
"""
import subprocess
import sys
import os
import time
from pathlib import Path
from typing import Optional
from mcp.server.fastmcp import FastMCP
# Create the MCP server
mcp = FastMCP(
"voice-loop-mcp",
instructions="""Voice Loop MCP enables hands-free conversation with Claude.
Use these tools in sequence:
1. start_listening() - Begin speech recognition
2. read_speech() - Get what the user said
3. speak(text) - Respond with voice (optional)
4. heartbeat() - Keep the loop alive
The loop: listen -> read -> respond -> heartbeat -> repeat
"""
)
# Configuration
HEAR_OUTPUT_FILE = "/tmp/hear_live.txt"
HEAR_BINARY = "/tmp/hear-0.7/hear"
DEFAULT_VOICE = "Daniel"
# Track state
_hear_process: Optional[subprocess.Popen] = None
_last_read_position = 0
@mcp.tool()
def start_listening() -> str:
"""
Start the speech recognition listener.
This launches the 'hear' CLI tool which uses Apple's native
speech recognition. Output is written to a text file that
Claude can read.
Returns:
Status message indicating if listening started successfully.
Note:
- Requires macOS with 'hear' CLI installed
- Microphone permission must be granted
- Only one listener can run at a time
"""
global _hear_process, _last_read_position
# Check if already running
if _hear_process is not None and _hear_process.poll() is None:
return "Listener is already running. Use read_speech() to get transcriptions."
# Check if hear binary exists
if not Path(HEAR_BINARY).exists():
return f"Error: hear CLI not found at {HEAR_BINARY}. Please install it first."
try:
# Clear the output file
with open(HEAR_OUTPUT_FILE, 'w') as f:
f.write("")
_last_read_position = 0
# Start hear with output to file
# Using shell=True with tee to capture output
cmd = f"{HEAR_BINARY} -d 2>&1 | tee {HEAR_OUTPUT_FILE}"
_hear_process = subprocess.Popen(
cmd,
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
time.sleep(1) # Give it a moment to start
if _hear_process.poll() is None:
return f"Listening started! Transcriptions will appear in {HEAR_OUTPUT_FILE}. Use read_speech() to get what the user says."
else:
return "Error: Listener process exited unexpectedly. Check microphone permissions."
except Exception as e:
return f"Error starting listener: {str(e)}"
@mcp.tool()
def stop_listening() -> str:
"""
Stop the speech recognition listener.
Returns:
Status message.
"""
global _hear_process
if _hear_process is None:
return "No listener is running."
try:
_hear_process.terminate()
_hear_process.wait(timeout=5)
_hear_process = None
return "Listener stopped."
except Exception as e:
return f"Error stopping listener: {str(e)}"
@mcp.tool()
def read_speech(lines: int = 10, only_new: bool = True) -> str:
"""
Read the latest speech transcriptions.
Args:
lines: Number of recent lines to return (default 10)
only_new: If True, only return lines not previously read
Returns:
The transcribed speech text, or empty string if nothing new.
Note:
The 'hear' CLI outputs incrementally as it recognizes speech,
so you'll see partial transcriptions building up to complete
sentences.
"""
global _last_read_position
if not Path(HEAR_OUTPUT_FILE).exists():
return "No transcription file found. Call start_listening() first."
try:
with open(HEAR_OUTPUT_FILE, 'r') as f:
if only_new:
f.seek(_last_read_position)
content = f.read()
_last_read_position = f.tell()
else:
all_lines = f.readlines()
content = ''.join(all_lines[-lines:])
if not content.strip():
return "[No new speech detected]"
# Get the most complete/recent transcription
lines_list = content.strip().split('\n')
# Return the last few unique complete thoughts
# (hear outputs incrementally, so we want the final versions)
seen = set()
unique_lines = []
for line in reversed(lines_list):
line = line.strip()
if line and line not in seen:
# Skip if this is a prefix of something we already have
is_prefix = any(existing.startswith(line) for existing in seen)
if not is_prefix:
seen.add(line)
unique_lines.append(line)
if len(unique_lines) >= lines:
break
unique_lines.reverse()
return '\n'.join(unique_lines) if unique_lines else "[No new speech detected]"
except Exception as e:
return f"Error reading speech: {str(e)}"
@mcp.tool()
def speak(text: str, voice: str = DEFAULT_VOICE) -> str:
"""
Speak text aloud using text-to-speech.
Args:
text: The text to speak
voice: macOS voice to use (default: Daniel)
Returns:
Status message.
Available voices (run 'say -v ?' in terminal for full list):
- Daniel (UK English, natural)
- Samantha (US English, natural)
- Alex (US English)
- Karen (Australian English)
- Moira (Irish English)
"""
try:
# Escape quotes in text
safe_text = text.replace('"', '\\"')
subprocess.run(
['say', '-v', voice, safe_text],
check=True,
timeout=60
)
return f"Spoke: {text[:50]}..." if len(text) > 50 else f"Spoke: {text}"
except subprocess.TimeoutExpired:
return "Error: Speech timed out (text too long?)"
except Exception as e:
return f"Error speaking: {str(e)}"
@mcp.tool()
def heartbeat(message: str = "Voice loop continuing") -> str:
"""
Self-prompt to keep the conversation loop alive.
This is the 'heartbeat' that prevents the AI from stopping.
Call this periodically to maintain the voice conversation loop.
Args:
message: The continuation prompt to send
Returns:
Instructions for what to do next.
Note:
This tool reminds Claude to continue the loop. The actual
self-prompting (typing into the chat) should be done via
PyAutoGUI MCP's type_text() and press_key() tools.
"""
return f"""HEARTBEAT: {message}
Continue the voice loop:
1. Use read_speech() to check for new user input
2. Process and respond to what they said
3. Optionally use speak() to respond verbally
4. Call heartbeat() again to continue
Remember: You can use PyAutoGUI MCP to type into your own chat
and press return to keep yourself alive indefinitely."""
@mcp.tool()
def get_status() -> dict:
"""
Get the current status of the voice loop.
Returns:
Dictionary with listener status, file status, etc.
"""
global _hear_process
listener_running = _hear_process is not None and _hear_process.poll() is None
file_exists = Path(HEAR_OUTPUT_FILE).exists()
file_size = Path(HEAR_OUTPUT_FILE).stat().st_size if file_exists else 0
return {
"listener_running": listener_running,
"transcription_file": HEAR_OUTPUT_FILE,
"file_exists": file_exists,
"file_size_bytes": file_size,
"hear_binary_exists": Path(HEAR_BINARY).exists(),
"default_voice": DEFAULT_VOICE
}
def main():
"""Run the MCP server."""
print("Voice Loop MCP Server starting...", file=sys.stderr)
print("", file=sys.stderr)
print("This MCP enables hands-free voice conversation with Claude.", file=sys.stderr)
print("", file=sys.stderr)
print("Requirements:", file=sys.stderr)
print(" - macOS with 'hear' CLI installed", file=sys.stderr)
print(" - Microphone permission granted", file=sys.stderr)
print(" - PyAutoGUI MCP for self-prompting (optional)", file=sys.stderr)
print("", file=sys.stderr)
print("Tools available:", file=sys.stderr)
print(" - start_listening(): Begin speech recognition", file=sys.stderr)
print(" - read_speech(): Get transcribed speech", file=sys.stderr)
print(" - speak(text): Text-to-speech response", file=sys.stderr)
print(" - heartbeat(): Keep the loop alive", file=sys.stderr)
print(" - get_status(): Check system status", file=sys.stderr)
print("", file=sys.stderr)
mcp.run()
if __name__ == "__main__":
main()