Kokoro Text to Speech MCP Server

Apache 2.0
  • Apple
#!/usr/bin/env python3 import argparse import asyncio import json import os import sys import time from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() async def send_tts_request(host, port, text, voice, speed, language, filename, upload_to_s3=True): """Send a TTS request to the MCP server""" try: # Connect to the server reader, writer = await asyncio.open_connection(host, port) # Create the request request = { "text": text, "voice": voice, "speed": speed, "lang": language, "upload_to_s3": upload_to_s3 } # Add filename if provided if filename: request["filename"] = filename # Encode the request as JSON request_json = json.dumps(request) # Send the request print(f"Sending request to {host}:{port}...") print(f"Request: {request_json}") writer.write(request_json.encode('utf-8')) await writer.drain() # Read the response print("Waiting for response...") data = await reader.read(4096) response = json.loads(data.decode('utf-8')) # Close the connection writer.close() await writer.wait_closed() # Return the response return response except ConnectionRefusedError: print(f"Error: Could not connect to MCP TTS Server at {host}:{port}") print("Please make sure the server is running.") return None except Exception as e: print(f"Error: {str(e)}") return None def print_response(response): """Print the server response in a human-readable format""" if response is None: return print("\n=== TTS Response ===") if response.get("success", False): print("✅ Success: Audio generated successfully") print(f"📁 Filename: {response.get('filename', 'Unknown')}") print(f"📊 File size: {response.get('file_size', 0)} bytes") if response.get("s3_uploaded", False): print("☁️ S3 Upload: Success") print(f"🔗 S3 URL: {response.get('s3_url', 'Not available')}") elif "s3_uploaded" in response and not response["s3_uploaded"]: print("☁️ S3 Upload: Failed") print(f"❌ Error: {response.get('s3_error', 'Unknown error')}") else: print("☁️ S3 Upload: Not requested") else: print("❌ Error: TTS generation failed") print(f"Error message: {response.get('error', 'Unknown error')}") print("=====================\n") def read_text_file(file_path): """Read text from a file""" try: with open(file_path, 'r', encoding='utf-8') as file: return file.read() except Exception as e: print(f"Error reading file {file_path}: {str(e)}") sys.exit(1) def main(): # Parse command line arguments parser = argparse.ArgumentParser(description="MCP TTS Client") # Server connection - Important: for connecting to a local server, use "localhost" or "127.0.0.1", not "0.0.0.0" parser.add_argument("--host", default=os.environ.get("MCP_CLIENT_HOST", "localhost"), help="MCP server hostname (default: localhost or MCP_CLIENT_HOST env var)") parser.add_argument("--port", type=int, default=int(os.environ.get("MCP_PORT", "9876")), help="MCP server port (default: 9876 or MCP_PORT env var)") # TTS parameters parser.add_argument("--text", default=None, help="Text to synthesize") parser.add_argument("--file", default=None, help="Text file to read content from") parser.add_argument("--voice", default=os.environ.get("TTS_VOICE", "af_heart"), help="Voice to use (default: af_heart or TTS_VOICE env var)") parser.add_argument("--speed", type=float, default=float(os.environ.get("TTS_SPEED", "1.0")), help="Speech speed (default: 1.0 or TTS_SPEED env var)") parser.add_argument("--language", default=os.environ.get("TTS_LANGUAGE", "en-us"), help="Language code (default: en-us or TTS_LANGUAGE env var)") parser.add_argument("--filename", default=None, help="Output filename (default: auto-generated)") # S3 options parser.add_argument("--no-s3", action="store_true", help="Disable S3 upload (enabled by default)") # Debug options parser.add_argument("--raw", action="store_true", help="Print raw JSON response") args = parser.parse_args() # Check for text input if args.text is None and args.file is None: parser.error("Either --text or --file is required") # Read text from file if specified if args.file: text = read_text_file(args.file) else: text = args.text # Run the request response = asyncio.run(send_tts_request( host=args.host, port=args.port, text=text, voice=args.voice, speed=args.speed, language=args.language, filename=args.filename, upload_to_s3=not args.no_s3 )) # Print the response if response: if args.raw: print(json.dumps(response, indent=2)) else: print_response(response) if __name__ == "__main__": main()