MCP Audio Server

audio_server.py•20.5 KiB

#!/usr/bin/env python3 """ MCP Audio Server - Provides audio playback capabilities for AI models """ import json import logging import os import sys from typing import Any, Dict, Optional import re from io import BytesIO # Suppress pygame welcome message to avoid interfering with MCP JSON communication os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1' import pyttsx3 import pygame from gtts import gTTS # Configure logging to stderr to avoid interfering with MCP JSON communication on stdout logging.basicConfig( level=logging.INFO, stream=sys.stderr, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class AudioPlayer: """Handles audio playback operations""" def __init__(self): self.tts_engine = None self.pygame_initialized = False self.current_sound = None self._init_tts() self._init_pygame() def _init_tts(self): """Initialize text-to-speech engine""" try: self.tts_engine = pyttsx3.init() # Set default properties self.tts_engine.setProperty('rate', 150) # Speed of speech self.tts_engine.setProperty('volume', 0.8) # Volume level (0.0 to 1.0) logger.info("TTS engine initialized successfully") except Exception as e: logger.error(f"Failed to initialize TTS engine: {e}") def _init_pygame(self): """Initialize pygame mixer for audio file playback""" try: # Suppress pygame welcome message os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1' pygame.mixer.init() self.pygame_initialized = True logger.info("Pygame mixer initialized successfully") except Exception as e: logger.error(f"Failed to initialize pygame mixer: {e}") def speak_text(self, text: str, rate: Optional[int] = None, volume: Optional[float] = None, voice_id: Optional[str] = None) -> Dict[str, Any]: """Convert text to speech and play it""" # Simple check for Chinese characters if re.search(r'[一-鿿]', text): try: # Use gTTS for Chinese text if not self.pygame_initialized: return {"success": False, "error": "Pygame not initialized for gTTS playback"} tts = gTTS(text, lang='zh-cn') fp = BytesIO() tts.write_to_fp(fp) fp.seek(0) pygame.mixer.music.load(fp) if volume is not None: pygame.mixer.music.set_volume(max(0.0, min(1.0, volume))) pygame.mixer.music.play() # Wait for playback to finish while pygame.mixer.music.get_busy(): pygame.time.Clock().tick(10) return { "success": True, "message": f"Successfully spoke Chinese text using gTTS: '{text[:50]}{'...' if len(text) > 50 else ''}'" } except Exception as e: return {"success": False, "error": f"Failed to speak Chinese text with gTTS: {str(e)}"} # Fallback to pyttsx3 for non-Chinese text if not self.tts_engine: return {"success": False, "error": "TTS engine not available"} try: # Set custom voice if provided if voice_id: self.tts_engine.setProperty('voice', voice_id) # Set custom rate and volume if provided if rate is not None: self.tts_engine.setProperty('rate', rate) if volume is not None: self.tts_engine.setProperty('volume', max(0.0, min(1.0, volume))) # Speak the text self.tts_engine.say(text) self.tts_engine.runAndWait() return { "success": True, "message": f"Successfully spoke text: '{text[:50]}{'...' if len(text) > 50 else ''}'" } except Exception as e: return {"success": False, "error": f"Failed to speak text: {str(e)}"} def list_voices(self) -> Dict[str, Any]: """List available TTS voices""" if not self.tts_engine: return {"success": False, "error": "TTS engine not available"} try: voices = self.tts_engine.getProperty('voices') voice_list = [] for voice in voices: voice_list.append({ "id": voice.id, "name": voice.name, "lang": voice.languages, "gender": voice.gender }) return {"success": True, "voices": voice_list} except Exception as e: return {"success": False, "error": f"Failed to list voices: {str(e)}"} def play_audio_file(self, file_path: str, volume: Optional[float] = None) -> Dict[str, Any]: """Play an audio file""" if not self.pygame_initialized: return {"success": False, "error": "Audio system not initialized"} try: # Check if file exists if not os.path.exists(file_path): return {"success": False, "error": f"Audio file not found: {file_path}"} # Load and play the audio file pygame.mixer.music.load(file_path) # Set volume if provided if volume is not None: pygame.mixer.music.set_volume(max(0.0, min(1.0, volume))) pygame.mixer.music.play() return { "success": True, "message": f"Successfully started playing: {os.path.basename(file_path)}" } except Exception as e: return {"success": False, "error": f"Failed to play audio file: {str(e)}"} def stop_audio(self) -> Dict[str, Any]: """Stop current audio playback""" try: if self.pygame_initialized: pygame.mixer.music.stop() if self.tts_engine: self.tts_engine.stop() return {"success": True, "message": "Audio playback stopped"} except Exception as e: return {"success": False, "error": f"Failed to stop audio: {str(e)}"} def get_audio_status(self) -> Dict[str, Any]: """Get current audio playback status""" try: status = { "tts_available": self.tts_engine is not None, "pygame_available": self.pygame_initialized, "music_playing": False } if self.pygame_initialized: status["music_playing"] = pygame.mixer.music.get_busy() return {"success": True, "status": status} except Exception as e: return {"success": False, "error": f"Failed to get audio status: {str(e)}"} class MCPAudioServer: """Simple MCP-compatible Audio Server""" def __init__(self): self.audio_player = AudioPlayer() self.tools = { "speak_text": { "name": "speak_text", "description": "Convert text to speech and play it through the system audio. Supports multiple languages and voice selection (for non-Chinese text).", "inputSchema": { "type": "object", "properties": { "text": { "type": "string", "description": "The text to convert to speech and play" }, "rate": { "type": "integer", "description": "Speech rate (words per minute, default: 150)", "minimum": 50, "maximum": 300 }, "volume": { "type": "number", "description": "Volume level (0.0 to 1.0, default: 0.8)", "minimum": 0.0, "maximum": 1.0 }, "voice_id": { "type": "string", "description": "The ID of the voice to use (for non-Chinese text). See list_voices() for available IDs." } }, "required": ["text"] } }, "list_voices": { "name": "list_voices", "description": "List available text-to-speech voices for non-Chinese languages.", "inputSchema": { "type": "object", "properties": {}, "required": [] } }, "play_audio_file": { "name": "play_audio_file", "description": "Play an audio file through the system audio", "inputSchema": { "type": "object", "properties": { "file_path": { "type": "string", "description": "Path to the audio file to play" }, "volume": { "type": "number", "description": "Volume level (0.0 to 1.0)", "minimum": 0.0, "maximum": 1.0 } }, "required": ["file_path"] } }, "stop_audio": { "name": "stop_audio", "description": "Stop current audio playback", "inputSchema": { "type": "object", "properties": {}, "required": [] } }, "get_audio_status": { "name": "get_audio_status", "description": "Get current audio system status and playback information", "inputSchema": { "type": "object", "properties": {}, "required": [] } } } def list_tools(self) -> Dict[str, Any]: """List available tools""" return { "tools": list(self.tools.values()) } def call_tool(self, name: str, arguments: Dict[str, Any]) -> Dict[str, Any]: """Call a tool with given arguments""" try: if name == "speak_text": text = arguments.get("text", "") rate = arguments.get("rate") volume = arguments.get("volume") voice_id = arguments.get("voice_id") if not text: return { "content": [{"type": "text", "text": "Error: No text provided"}], "isError": True } result = self.audio_player.speak_text(text, rate, volume, voice_id) if result.get("success"): return { "content": [{"type": "text", "text": result.get("message", "Speech completed successfully")}], "isError": False } else: return { "content": [{"type": "text", "text": f"Error: {result.get("error", "Unknown error")}"}], "isError": True } elif name == "list_voices": result = self.audio_player.list_voices() if result.get("success"): voices = result.get("voices", []) # Format the output for better readability voice_text = "Available Voices:\n" + "\n".join( [f"- ID: {v["id"]}\n Name: {v["name"]}\n Lang: {v["lang"]}\n Gender: {v["gender"]}" for v in voices] ) return { "content": [{"type": "text", "text": voice_text}], "isError": False } else: return { "content": [{"type": "text", "text": f"Error: {result.get("error", "Unknown error")}"}], "isError": True } elif name == "play_audio_file": file_path = arguments.get("file_path", "") volume = arguments.get("volume") if not file_path: return { "content": [{"type": "text", "text": "Error: No file path provided"}], "isError": True } result = self.audio_player.play_audio_file(file_path, volume) if result.get("success"): return { "content": [{"type": "text", "text": result.get("message", "Audio file played successfully")}], "isError": False } else: return { "content": [{"type": "text", "text": f"Error: {result.get('error', 'Unknown error')}"}], "isError": True } elif name == "stop_audio": result = self.audio_player.stop_audio() if result.get("success"): return { "content": [{"type": "text", "text": result.get("message", "Audio stopped successfully")}], "isError": False } else: return { "content": [{"type": "text", "text": f"Error: {result.get('error', 'Unknown error')}"}], "isError": True } elif name == "get_audio_status": result = self.audio_player.get_audio_status() if result.get("success"): status = result.get("status", {}) status_text = f"Audio Status:\n- TTS Available: {status.get('tts_available', False)}\n- Pygame Available: {status.get('pygame_available', False)}\n- Music Playing: {status.get('music_playing', False)}" return { "content": [{"type": "text", "text": status_text}], "isError": False } else: return { "content": [{"type": "text", "text": f"Error: {result.get('error', 'Unknown error')}"}], "isError": True } else: return { "content": [{"type": "text", "text": f"Error: Unknown tool '{name}'"}], "isError": True } except Exception as e: logger.error(f"Error calling tool '{name}': {e}") return { "content": [{"type": "text", "text": f"Tool execution error: {str(e)}"}], "isError": True } def handle_json_rpc_request(request_data: str) -> str: """Handle JSON-RPC requests""" try: request = json.loads(request_data) method = request.get("method") params = request.get("params", {}) request_id = request.get("id") # Handle notifications (no response expected) if method and method.startswith("notifications/"): logger.info(f"Received notification: {method}") return "" # No response for notifications # Ensure request_id is never None for responses if request_id is None: request_id = 0 if method == "initialize": result = { "protocolVersion": "2024-11-05", "capabilities": { "tools": { "listChanged": False } }, "serverInfo": { "name": "audio-server", "version": "1.0.0" } } elif method == "tools/list": result = server.list_tools() elif method == "tools/call": tool_name = params.get("name") arguments = params.get("arguments", {}) tool_result = server.call_tool(tool_name, arguments) # Return the tool result directly (it's already in MCP format) result = tool_result elif method == "resources/list": # Return empty resources list - this server doesn't provide resources result = {"resources": []} elif method == "prompts/list": # Return empty prompts list - this server doesn't provide prompts result = {"prompts": []} else: result = {"success": False, "error": f"Unknown method: {method}"} # For tools/call, the result is already in the correct format if method == "tools/call": response = { "jsonrpc": "2.0", "id": request_id, "result": result } else: response = { "jsonrpc": "2.0", "id": request_id, "result": result } return json.dumps(response) except Exception as e: logger.error(f"JSON-RPC error: {e}") try: request_id = request.get("id") if 'request' in locals() else 0 except: request_id = 0 # Ensure request_id is never None if request_id is None: request_id = 0 error_response = { "jsonrpc": "2.0", "id": request_id, "error": { "code": -32603, "message": "Internal error", "data": str(e) } } return json.dumps(error_response) def interactive_mode(): """Run in interactive mode for testing""" print("MCP Audio Server - Interactive Mode", file=sys.stderr) print("Available commands:", file=sys.stderr) print(" list - List available tools", file=sys.stderr) print(" speak <text> - Speak text", file=sys.stderr) print(" play <file_path> - Play audio file", file=sys.stderr) print(" stop - Stop audio playback", file=sys.stderr) print(" status - Get audio status", file=sys.stderr) print(" quit - Exit", file=sys.stderr) print(file=sys.stderr) while True: try: command = input("audio> ").strip() if command == "quit": break elif command == "list": result = server.list_tools() print(json.dumps(result, indent=2), file=sys.stderr) elif command.startswith("speak "): text = command[6:] result = server.call_tool("speak_text", {"text": text}) print(json.dumps(result, indent=2), file=sys.stderr) elif command.startswith("play "): file_path = command[5:] result = server.call_tool("play_audio_file", {"file_path": file_path}) print(json.dumps(result, indent=2), file=sys.stderr) elif command == "stop": result = server.call_tool("stop_audio", {}) print(json.dumps(result, indent=2), file=sys.stderr) elif command == "status": result = server.call_tool("get_audio_status", {}) print(json.dumps(result, indent=2), file=sys.stderr) elif command == "": continue else: print(f"Unknown command: {command}", file=sys.stderr) except KeyboardInterrupt: print("\nExiting...", file=sys.stderr) break except Exception as e: print(f"Error: {e}", file=sys.stderr) def main(): """Main function""" if len(sys.argv) > 1 and sys.argv[1] == "--interactive": interactive_mode() else: # JSON-RPC mode for MCP integration logger.info("Starting MCP Audio Server in JSON-RPC mode...") logger.info("Send JSON-RPC requests to stdin") try: while True: line = sys.stdin.readline() if not line: break response = handle_json_rpc_request(line.strip()) if response: # Only print non-empty responses print(response) sys.stdout.flush() except KeyboardInterrupt: logger.info("Server stopped by user") except Exception as e: logger.error(f"Server error: {e}") # Initialize server server = MCPAudioServer() if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BatchLion/mcp-audio-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

audio_server.py•20.5 KiB