Voice-AGI MCP Server

server.py•21.4 kB

#!/usr/bin/env python3 """ Voice-AGI MCP Server Stateful voice agent with AGI integration and tool execution """ import asyncio import sys import os import logging from typing import Dict, Any, Optional, List from datetime import datetime # FastMCP implementation from fastmcp import FastMCP # Local imports from conversation_manager import ConversationManager from voice_pipeline import VoicePipeline from tool_registry import ToolRegistry from intent_detector import IntentDetector # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=sys.stderr ) logger = logging.getLogger("voice-agi") # Initialize FastMCP app app = FastMCP("voice-agi") # Global state conversation_manager = ConversationManager(max_turns=10, enable_memory=True) voice_pipeline = VoicePipeline(stt_model="base", tts_voice="en-IE-EmilyNeural") tool_registry = ToolRegistry() # Use cloud-first strategy from environment # Configure OLLAMA_URL to point to your inference node (e.g., http://your-node:11434) ollama_url = os.getenv('OLLAMA_URL', 'http://localhost:11434') ollama_model = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b-cloud') intent_detector = IntentDetector(ollama_url=ollama_url, model=ollama_model) logger.info("Voice-AGI MCP Server initialized") # ============================================================================= # Voice-Callable AGI Tools # ============================================================================= @tool_registry.register( intents=[ "search memory", "search my memory", "find in memory", "remember when", "recall when", "what do you remember about", "look up", "find information", "search for" ], description="Search AGI memory for past information", priority=8 ) async def search_agi_memory(query: str) -> Dict[str, Any]: """Search AGI memory and speak results""" try: logger.info(f"Searching AGI memory: {query}") # TODO: Call enhanced-memory MCP # For now, simulate search result = { 'query': query, 'results': [ {'text': f"Found memory about: {query}", 'relevance': 0.85} ], 'count': 1 } # Speak summary summary = f"I found {result['count']} result about {query}" await voice_pipeline.synthesize_speech(summary, play_audio=True) return result except Exception as e: logger.error(f"Error searching memory: {e}") await voice_pipeline.synthesize_speech("Sorry, I couldn't search memory right now") return {'error': str(e)} @tool_registry.register( intents=[ "create goal", "create a goal", "make goal", "make a goal", "new goal", "add goal", "set goal", "set a goal", "create a new goal", "add a new goal", "i want to create a goal" ], description="Create a new AGI goal from voice", priority=9 ) async def create_goal_from_voice(description: str) -> Dict[str, Any]: """Create AGI goal from voice input""" try: logger.info(f"Creating goal: {description}") # TODO: Call agent-runtime MCP # For now, simulate goal creation goal_id = f"goal_{int(datetime.now().timestamp())}" result = { 'goal_id': goal_id, 'name': description[:50], 'description': description, 'status': 'active' } # Speak confirmation await voice_pipeline.synthesize_speech( f"Goal created with ID {goal_id}", play_audio=True ) return result except Exception as e: logger.error(f"Error creating goal: {e}") await voice_pipeline.synthesize_speech("Sorry, I couldn't create the goal") return {'error': str(e)} @tool_registry.register( intents=[ "list tasks", "list my tasks", "show tasks", "show my tasks", "what tasks", "what are my tasks", "pending tasks", "display tasks", "get tasks", "what's on my todo" ], description="List pending AGI tasks", priority=8 ) async def list_pending_tasks(limit: int = 5) -> Dict[str, Any]: """List pending AGI tasks""" try: logger.info("Listing pending tasks") # TODO: Call agent-runtime MCP # For now, simulate tasks tasks = [ {'id': 1, 'title': 'Example task 1', 'status': 'pending'}, {'id': 2, 'title': 'Example task 2', 'status': 'in_progress'} ] # Speak summary if tasks: summary = f"You have {len(tasks)} tasks. " summary += " ".join([f"Task {t['id']}: {t['title']}" for t in tasks[:3]]) else: summary = "You have no pending tasks" await voice_pipeline.synthesize_speech(summary, play_audio=True) return {'tasks': tasks, 'count': len(tasks)} except Exception as e: logger.error(f"Error listing tasks: {e}") await voice_pipeline.synthesize_speech("Sorry, I couldn't list tasks") return {'error': str(e)} @tool_registry.register( intents=[ "consolidate", "consolidate memory", "run consolidation", "memory consolidation", "consolidation cycle", "run memory consolidation", "trigger consolidation" ], description="Trigger memory consolidation cycle", priority=9 ) async def trigger_consolidation() -> Dict[str, Any]: """Run memory consolidation cycle""" try: logger.info("Triggering memory consolidation") # Inform user await voice_pipeline.synthesize_speech( "Starting memory consolidation. This may take a moment.", play_audio=True ) # TODO: Call AGI orchestrator to run consolidation # For now, simulate await asyncio.sleep(2) # Simulate processing result = { 'status': 'completed', 'patterns_found': 5, 'memories_consolidated': 23 } # Speak result await voice_pipeline.synthesize_speech( f"Consolidation complete. Found {result['patterns_found']} patterns.", play_audio=True ) return result except Exception as e: logger.error(f"Error consolidating memory: {e}") await voice_pipeline.synthesize_speech("Sorry, consolidation failed") return {'error': str(e)} @tool_registry.register( intents=[ "research", "research about", "start research", "do research on", "investigate", "study", "learn about", "find out about", "look into", "explore", "research topic" ], description="Start autonomous research on a topic", priority=8 ) async def start_research(topic: str) -> Dict[str, Any]: """Trigger autonomous research""" try: logger.info(f"Starting research: {topic}") await voice_pipeline.synthesize_speech( f"Starting research on {topic}. I'll notify you when complete.", play_audio=True ) # TODO: Call AGI orchestrator to start research result = { 'research_id': f"research_{int(datetime.now().timestamp())}", 'topic': topic, 'status': 'started' } return result except Exception as e: logger.error(f"Error starting research: {e}") await voice_pipeline.synthesize_speech("Sorry, I couldn't start research") return {'error': str(e)} @tool_registry.register( intents=[ "status", "system status", "check status", "how are you", "how is the system", "how's the system", "system health", "are you ok", "are you working", "how are things" ], description="Check AGI system status", priority=7 ) async def check_system_status() -> Dict[str, Any]: """Check AGI system status""" try: logger.info("Checking system status") # TODO: Get actual system status status = { 'system': 'operational', 'active_agents': 12, 'memory_usage': '2.3 GB', 'uptime_hours': 48 } # Speak summary summary = f"System is {status['system']}. {status['active_agents']} agents active." await voice_pipeline.synthesize_speech(summary, play_audio=True) return status except Exception as e: logger.error(f"Error checking status: {e}") await voice_pipeline.synthesize_speech("Sorry, I couldn't check status") return {'error': str(e)} @tool_registry.register( intents=[ "my name is", "my name's", "i am", "i'm", "call me", "remember my name", "remember that i'm", "remember i'm", "you can call me", "please call me", "name is" ], description="Remember user's name", priority=8 ) async def remember_name(name: str) -> Dict[str, Any]: """Remember user's name in conversation context""" try: conversation_manager.update_user_context('name', name) await voice_pipeline.synthesize_speech( f"Got it, I'll remember your name is {name}", play_audio=True ) return {'name': name, 'stored': True} except Exception as e: logger.error(f"Error remembering name: {e}") return {'error': str(e)} @tool_registry.register( intents=[ "what is my name", "what's my name", "who am i", "who am I", "do you remember me", "do you know my name", "what do you call me", "do you know who i am", "remember me" ], description="Recall user's name", priority=8 ) async def recall_name() -> Dict[str, Any]: """Recall user's name from conversation context""" try: name = conversation_manager.get_user_context('name') if name: await voice_pipeline.synthesize_speech( f"Your name is {name}", play_audio=True ) return {'name': name} else: await voice_pipeline.synthesize_speech( "I don't know your name yet. Please tell me!", play_audio=True ) return {'name': None} except Exception as e: logger.error(f"Error recalling name: {e}") return {'error': str(e)} @tool_registry.register( intents=[ "improve", "improve yourself", "self improve", "self-improve", "optimize", "optimize yourself", "make faster", "speed up", "get better", "enhance", "upgrade", "improve performance" ], description="Start self-improvement cycle", priority=9 ) async def start_improvement_cycle(target_metric: str = "overall_performance") -> Dict[str, Any]: """Start recursive self-improvement cycle""" try: logger.info(f"Starting improvement cycle: {target_metric}") await voice_pipeline.synthesize_speech( f"Starting self-improvement cycle for {target_metric}", play_audio=True ) # TODO: Call AGI orchestrator result = { 'cycle_id': f"improve_{int(datetime.now().timestamp())}", 'target_metric': target_metric, 'status': 'started' } return result except Exception as e: logger.error(f"Error starting improvement cycle: {e}") return {'error': str(e)} @tool_registry.register( intents=[ "decompose", "decompose goal", "break down", "break down goal", "break into tasks", "split goal", "split into tasks", "plan goal", "create tasks from goal" ], description="Decompose a goal into tasks", priority=8 ) async def decompose_goal(goal_description: str) -> Dict[str, Any]: """Decompose goal into tasks""" try: logger.info(f"Decomposing goal: {goal_description}") await voice_pipeline.synthesize_speech( "Analyzing and decomposing the goal", play_audio=True ) # TODO: Call agent-runtime MCP decompose_goal result = { 'goal': goal_description, 'tasks': [ {'title': 'Task 1', 'description': 'First step'}, {'title': 'Task 2', 'description': 'Second step'} ] } await voice_pipeline.synthesize_speech( f"Created {len(result['tasks'])} tasks from your goal", play_audio=True ) return result except Exception as e: logger.error(f"Error decomposing goal: {e}") return {'error': str(e)} # ============================================================================= # MCP Tools (Exposed to Claude Code) # ============================================================================= @app.tool() async def voice_chat(text: str, listen_for_response: bool = False) -> Dict[str, Any]: """ Stateful voice conversation with context retention Args: text: User's speech input (or text input) listen_for_response: Whether to listen for voice response after speaking Returns: Assistant response with conversation metadata """ try: logger.info(f"Voice chat input: {text}") # Get conversation context context = conversation_manager.get_context() # Detect intent intent = await intent_detector.detect( text, context=context, available_tools=tool_registry.list_tools() ) logger.info(f"Detected intent: {intent.name} (confidence: {intent.confidence:.2f})") # Check if we should invoke a tool based on intent keywords matched_tool = tool_registry.match_tool(text) logger.debug(f"Matched tool: {matched_tool.name if matched_tool else 'None'}") # Trust the enhanced tool matcher (100% accuracy) even if intent confidence is low # Tool matching uses sophisticated scoring with exact match priority if matched_tool: # Invoke tool using the tool registry tool_result = await tool_registry.invoke(text, context={'intent': intent}) response = f"[Tool executed: {matched_tool.name}]" # Tool already spoke, so we just track the turn conversation_manager.add_turn( user=text, assistant=response, metadata={'tool': matched_tool.name, 'intent': intent.name} ) return { 'response': response, 'tool_used': matched_tool.name, 'tool_result': tool_result, 'conversation_turns': len(conversation_manager.messages) } # No tool invoked - generate conversational response # TODO: Call LLM with context for better responses response = f"I heard: {text}. (Intent: {intent.name})" # Speak response await voice_pipeline.synthesize_speech(response, play_audio=True) # Store turn conversation_manager.add_turn( user=text, assistant=response, metadata={'intent': intent.name} ) # Store in memory await conversation_manager.store_in_memory() # Listen for response if requested next_input = None if listen_for_response: await voice_pipeline.play_beep("on") next_input = await voice_pipeline.listen_and_transcribe(duration=5) return { 'response': response, 'intent': intent.name, 'conversation_turns': len(conversation_manager.messages), 'next_input': next_input } except Exception as e: logger.error(f"Error in voice_chat: {e}") return {'error': str(e)} @app.tool() async def voice_listen(duration: int = 5) -> Dict[str, Any]: """ Listen to microphone and transcribe Args: duration: Recording duration in seconds Returns: Transcribed text """ try: logger.info(f"Listening for {duration} seconds...") # Play beep to indicate listening await voice_pipeline.play_beep("on") # Listen and transcribe text = await voice_pipeline.listen_and_transcribe(duration) if text: logger.info(f"Transcribed: {text}") return {'success': True, 'text': text, 'duration': duration} else: return {'success': False, 'error': 'No speech detected', 'text': None} except Exception as e: logger.error(f"Error in voice_listen: {e}") return {'success': False, 'error': str(e)} @app.tool() async def voice_speak(text: str, wait_for_completion: bool = True) -> Dict[str, Any]: """ Speak text using TTS Args: text: Text to speak wait_for_completion: Wait for audio playback to complete Returns: Status of speech synthesis """ try: logger.info(f"Speaking: {text[:50]}...") audio_file = await voice_pipeline.synthesize_speech( text, play_audio=wait_for_completion ) if audio_file: return {'success': True, 'audio_file': audio_file, 'text_length': len(text)} else: return {'success': False, 'error': 'TTS failed'} except Exception as e: logger.error(f"Error in voice_speak: {e}") return {'success': False, 'error': str(e)} @app.tool() async def voice_conversation_loop(max_turns: int = 10) -> Dict[str, Any]: """ Start interactive voice conversation loop Args: max_turns: Maximum conversation turns Returns: Summary of conversation """ try: logger.info(f"Starting conversation loop (max {max_turns} turns)") # Greet user await voice_pipeline.synthesize_speech( "Hello! I'm your voice-controlled AGI assistant. How can I help?", play_audio=True ) turn_count = 0 while turn_count < max_turns: # Listen await voice_pipeline.play_beep("on") user_input = await voice_pipeline.listen_and_transcribe(duration=5) if not user_input: await voice_pipeline.synthesize_speech("I didn't catch that. Could you repeat?") continue # Check for exit commands if user_input.lower() in ['exit', 'quit', 'stop', 'goodbye', 'bye']: await voice_pipeline.synthesize_speech("Goodbye! Talk to you later.") break # Process via voice_chat result = await voice_chat(user_input, listen_for_response=False) turn_count += 1 return { 'turns': turn_count, 'summary': conversation_manager.get_conversation_summary(), 'stats': conversation_manager.get_stats() } except Exception as e: logger.error(f"Error in conversation loop: {e}") return {'error': str(e)} @app.tool() async def get_conversation_context() -> Dict[str, Any]: """Get current conversation context and statistics""" try: return { 'context': conversation_manager.get_context(), 'summary': conversation_manager.get_conversation_summary(), 'stats': conversation_manager.get_stats(), 'user_context': conversation_manager.user_context } except Exception as e: logger.error(f"Error getting context: {e}") return {'error': str(e)} @app.tool() async def clear_conversation() -> Dict[str, Any]: """Clear conversation context (start fresh)""" try: conversation_manager.clear_context() return {'success': True, 'message': 'Conversation cleared'} except Exception as e: logger.error(f"Error clearing conversation: {e}") return {'success': False, 'error': str(e)} @app.tool() async def list_voice_tools() -> Dict[str, Any]: """List all registered voice-callable tools""" try: tools = tool_registry.list_tools() return { 'tools': tools, 'count': len(tools) } except Exception as e: logger.error(f"Error listing tools: {e}") return {'error': str(e)} @app.tool() async def get_voice_stats() -> Dict[str, Any]: """Get voice pipeline statistics and latency metrics""" try: return { 'latency': voice_pipeline.get_latency_summary(), 'stt_available': voice_pipeline.is_stt_available(), 'tts_available': voice_pipeline.is_tts_available(), 'conversation_stats': conversation_manager.get_stats(), 'registered_tools': tool_registry.get_tool_count() } except Exception as e: logger.error(f"Error getting stats: {e}") return {'error': str(e)} # ============================================================================= # Main # ============================================================================= if __name__ == "__main__": logger.info("=" * 60) logger.info("Voice-AGI MCP Server Starting") logger.info("=" * 60) logger.info(f"STT Available: {voice_pipeline.is_stt_available()}") logger.info(f"TTS Available: {voice_pipeline.is_tts_available()}") logger.info(f"Registered Tools: {tool_registry.get_tool_count()}") logger.info("=" * 60) # Run the FastMCP server app.run()

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/marc-shade/voice-agi-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•21.4 kB