Resemble AI Voice Generation MCP Server

resemble_stdio_server.py•11.9 kB

""" Resemble AI Voice Generation MCP Server using StdIO transport. This server integrates with Resemble AI's voice generation API using simple stdio communication for Claude/Cursor integration. """ import os import json import base64 import logging import sys from typing import Dict, List, Optional, Any, Union from pathlib import Path import requests from dotenv import load_dotenv from pydantic import BaseModel # Configure logging - log to file instead of stdout to avoid interfering with stdio communication logging.basicConfig( level=logging.INFO, format='[%(levelname)s] %(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename='resemble_stdio.log', # Log to file instead of stdout filemode='a' ) logger = logging.getLogger("resemble-stdio") # Load environment variables load_dotenv() # Resemble AI API configuration RESEMBLE_API_KEY = os.getenv("RESEMBLE_API_KEY") RESEMBLE_API_BASE_URL = "https://app.resemble.ai/api/v2" OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./output") AUDIO_FORMAT = os.getenv("AUDIO_FORMAT", "mp3") # Create output directory if it doesn't exist Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True) # Validate API key if not RESEMBLE_API_KEY: logger.error("[Setup] Missing RESEMBLE_API_KEY in environment variables") raise ValueError("RESEMBLE_API_KEY environment variable is required") # Models class VoiceModel(BaseModel): """Voice model information.""" uuid: str name: str is_public: bool is_cloned: bool description: Optional[str] = None class ListVoicesResponse(BaseModel): """Response model for list_voices endpoint.""" voices: List[VoiceModel] class GenerateTTSResponse(BaseModel): """Response model for generate_tts endpoint.""" success: bool message: str file_path: Optional[str] = None base64_audio: Optional[str] = None # Client for Resemble AI API class ResembleClient: """Client for interacting with Resemble AI API.""" def __init__(self, api_key: str, base_url: str = RESEMBLE_API_BASE_URL): """Initialize the client with API key and base URL.""" self.api_key = api_key self.base_url = base_url self.headers = { "Authorization": f"Token token={api_key}", "Content-Type": "application/json", "Accept": "application/json" } logger.info("[Setup] Initializing Resemble AI client") def get_voices(self) -> Dict[str, Any]: """Get a list of available voices.""" logger.info("[API] Fetching available voices from Resemble AI") try: response = requests.get( f"{self.base_url}/voices", headers=self.headers ) response.raise_for_status() return response.json() except requests.RequestException as e: logger.error(f"[Error] Failed to fetch voices: {str(e)}") raise def generate_audio(self, text: str, voice_uuid: str) -> Dict[str, Any]: """Generate audio from text using specified voice.""" logger.info(f"[API] Generating audio for voice {voice_uuid}") try: payload = { "title": f"generated_{voice_uuid[:8]}", "voice_uuid": voice_uuid, "body": text, "output_format": AUDIO_FORMAT } response = requests.post( f"{self.base_url}/clips", headers=self.headers, json=payload ) response.raise_for_status() return response.json() except requests.RequestException as e: logger.error(f"[Error] Failed to generate audio: {str(e)}") raise def download_audio(self, clip_id: str, output_path: str) -> str: """Download generated audio clip and save to file.""" logger.info(f"[API] Downloading audio clip {clip_id}") try: response = requests.get( f"{self.base_url}/clips/{clip_id}/audio", headers=self.headers ) response.raise_for_status() # Save to file with open(output_path, 'wb') as f: f.write(response.content) logger.info(f"[Success] Saved audio to {output_path}") return output_path except requests.RequestException as e: logger.error(f"[Error] Failed to download audio: {str(e)}") raise # Initialize Resemble AI client resemble_client = ResembleClient(RESEMBLE_API_KEY) # Tool implementations def list_voices() -> Dict[str, Any]: """List available voice models from Resemble AI.""" try: voices_data = resemble_client.get_voices() voices = [] for voice in voices_data.get("voices", []): voices.append({ "uuid": voice.get("uuid", ""), "name": voice.get("name", ""), "is_public": voice.get("is_public", False), "is_cloned": voice.get("is_cloned", False), "description": voice.get("description", "") }) return {"voices": voices} except Exception as e: logger.error(f"[Error] Failed to list voices: {str(e)}") raise def generate_tts(text: str, voice_id: str, return_type: str = "file", output_filename: Optional[str] = None) -> Dict[str, Any]: """Generate voice audio from text.""" try: # Validate inputs if not text: raise ValueError("Text is required") if not voice_id: raise ValueError("Voice ID is required") # Generate audio result = resemble_client.generate_audio(text, voice_id) clip_id = result.get("id") if not clip_id: raise ValueError("Failed to generate audio clip") # Set output filename if not output_filename: output_filename = f"generated_{voice_id[:8]}" # Set file path file_path = os.path.join(OUTPUT_DIR, f"{output_filename}.{AUDIO_FORMAT}") # Download audio resemble_client.download_audio(clip_id, file_path) # Return appropriate response based on return_type if return_type == "base64": with open(file_path, "rb") as audio_file: encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8") return { "success": True, "message": "Successfully generated audio", "base64_audio": encoded_audio } else: return { "success": True, "message": "Successfully generated audio", "file_path": file_path } except Exception as e: logger.error(f"[Error] Failed to generate TTS: {str(e)}") raise # Define tool schemas for registration TOOL_SCHEMAS = [ { "name": "list_voices", "description": "List available voice models from Resemble AI.", "parameters": { "type": "object", "properties": { "random_string": { "type": "string", "description": "Dummy parameter for no-parameter tools" } }, "required": ["random_string"] } }, { "name": "generate_tts", "description": "Generate voice audio from text.\n \n Args:\n text: Text to convert to speech\n voice_id: ID of the voice to use\n return_type: How to return the audio: 'file' or 'base64'\n output_filename: Filename for the output (without extension)\n ", "parameters": { "type": "object", "properties": { "text": { "type": "string", "title": "Text" }, "voice_id": { "type": "string", "title": "Voice Id" }, "return_type": { "type": "string", "title": "Return Type", "default": "file" }, "output_filename": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Output Filename", "default": None } }, "required": [ "text", "voice_id" ], "title": "generate_ttsArguments" } } ] def read_message(): """Read a message from stdin.""" line = sys.stdin.readline() if not line: return None try: return json.loads(line) except json.JSONDecodeError as e: logger.error(f"[Error] Failed to parse message: {str(e)}") return None def write_message(message_type, **kwargs): """Write a message to stdout.""" message = {"type": message_type, **kwargs} sys.stdout.write(json.dumps(message) + "\n") sys.stdout.flush() def start_server(): """Start the MCP server using StdIO transport.""" logger.info("Starting Resemble AI MCP Server with StdIO transport") # Send a register message to the client write_message("register", tools=TOOL_SCHEMAS) # Process messages from stdin while True: message = read_message() if message is None: logger.info("End of input, shutting down") break logger.info(f"[Message] Received: {json.dumps(message)[:100]}...") try: message_type = message.get("type", "") if message_type == "invoke": name = message.get("name", "") parameters = message.get("parameters", {}) invoke_id = message.get("invoke_id", "") logger.info(f"[Invoke] Tool: {name}, ID: {invoke_id}") try: if name == "list_voices": result = list_voices() write_message("tool_result", invoke_id=invoke_id, result=result) elif name == "generate_tts": result = generate_tts( text=parameters.get("text", ""), voice_id=parameters.get("voice_id", ""), return_type=parameters.get("return_type", "file"), output_filename=parameters.get("output_filename") ) write_message("tool_result", invoke_id=invoke_id, result=result) else: logger.error(f"[Error] Unknown tool: {name}") write_message("tool_error", invoke_id=invoke_id, error=f"Unknown tool: {name}") except Exception as e: logger.error(f"[Error] Failed to execute tool: {str(e)}") write_message("tool_error", invoke_id=invoke_id, error=str(e)) elif message_type == "ping": write_message("pong") else: logger.warning(f"[Warning] Unknown message type: {message_type}") except Exception as e: logger.error(f"[Error] Failed to handle message: {str(e)}") logger.exception(e) if __name__ == "__main__": try: start_server() except Exception as e: logger.error(f"[Error] Server crashed: {str(e)}") logger.exception(e) sys.exit(1)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/obaid/resemble-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server