Skip to main content
Glama

Resemble AI Voice Generation MCP Server

by obaid
resemble_http_server.py14.4 kB
""" Resemble AI Voice Generation HTTP Server This is a simplified implementation that serves as a fallback when the MCP SDK is not available or has compatibility issues. It implements the MCP protocol but without using the MCP SDK directly. """ import os import json import base64 import logging import asyncio from typing import Dict, List, Optional, Any, Union from pathlib import Path import requests from dotenv import load_dotenv from pydantic import BaseModel from fastapi import FastAPI, Request, Response, BackgroundTasks from fastapi.middleware.cors import CORSMiddleware from sse_starlette.sse import EventSourceResponse import uvicorn # Configure logging logging.basicConfig( level=logging.INFO, format='[%(levelname)s] %(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger("resemble-http") # Load environment variables load_dotenv() # Resemble AI API configuration RESEMBLE_API_KEY = os.getenv("RESEMBLE_API_KEY") RESEMBLE_API_BASE_URL = "https://app.resemble.ai/api/v2" OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./output") AUDIO_FORMAT = os.getenv("AUDIO_FORMAT", "mp3") # Create output directory if it doesn't exist Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True) # Validate API key if not RESEMBLE_API_KEY: logger.error("[Setup] Missing RESEMBLE_API_KEY in environment variables") raise ValueError("RESEMBLE_API_KEY environment variable is required") # Models class VoiceModel(BaseModel): """Voice model information.""" uuid: str name: str is_public: bool is_cloned: bool description: Optional[str] = None class ListVoicesResponse(BaseModel): """Response model for list_voices endpoint.""" voices: List[VoiceModel] class GenerateTTSRequest(BaseModel): """Request model for generate_tts endpoint.""" text: str voice_id: str return_type: str = "file" # 'file' or 'base64' output_filename: Optional[str] = None class GenerateTTSResponse(BaseModel): """Response model for generate_tts endpoint.""" success: bool message: str file_path: Optional[str] = None base64_audio: Optional[str] = None # Client for Resemble AI API class ResembleClient: """Client for interacting with Resemble AI API.""" def __init__(self, api_key: str, base_url: str = RESEMBLE_API_BASE_URL): """Initialize the client with API key and base URL.""" self.api_key = api_key self.base_url = base_url self.headers = { "Authorization": f"Token token={api_key}", "Content-Type": "application/json", "Accept": "application/json" } logger.info("[Setup] Initializing Resemble AI client") def get_voices(self) -> Dict[str, Any]: """Get a list of available voices.""" logger.info("[API] Fetching available voices from Resemble AI") try: response = requests.get( f"{self.base_url}/voices", headers=self.headers ) response.raise_for_status() return response.json() except requests.RequestException as e: logger.error(f"[Error] Failed to fetch voices: {str(e)}") raise def generate_audio(self, text: str, voice_uuid: str) -> Dict[str, Any]: """Generate audio from text using specified voice.""" logger.info(f"[API] Generating audio for voice {voice_uuid}") try: payload = { "title": f"generated_{voice_uuid[:8]}", "voice_uuid": voice_uuid, "body": text, "output_format": AUDIO_FORMAT } response = requests.post( f"{self.base_url}/clips", headers=self.headers, json=payload ) response.raise_for_status() return response.json() except requests.RequestException as e: logger.error(f"[Error] Failed to generate audio: {str(e)}") raise def download_audio(self, clip_id: str, output_path: str) -> str: """Download generated audio clip and save to file.""" logger.info(f"[API] Downloading audio clip {clip_id}") try: response = requests.get( f"{self.base_url}/clips/{clip_id}/audio", headers=self.headers ) response.raise_for_status() # Save to file with open(output_path, 'wb') as f: f.write(response.content) logger.info(f"[Success] Saved audio to {output_path}") return output_path except requests.RequestException as e: logger.error(f"[Error] Failed to download audio: {str(e)}") raise # Initialize Resemble AI client resemble_client = ResembleClient(RESEMBLE_API_KEY) # Create FastAPI app app = FastAPI(title="Resemble AI Voice Generation API") # Enable CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # SSE connection event manager class ConnectionManager: """Manages SSE connections and message delivery.""" def __init__(self): """Initialize the connection manager.""" self.active_connections = [] async def connect(self): """Connect a new client and return the connection queue.""" queue = asyncio.Queue() self.active_connections.append(queue) logger.info(f"[SSE] Client connected. Active connections: {len(self.active_connections)}") # Send register event with available tools await queue.put({ "type": "register", "tools": [ { "name": "list_voices", "description": "List available voice models from Resemble AI.", "parameters": { "type": "object", "properties": { "random_string": { "type": "string", "description": "Dummy parameter for no-parameter tools" } }, "required": ["random_string"] } }, { "name": "generate_tts", "description": "Generate voice audio from text.\n \n Args:\n text: Text to convert to speech\n voice_id: ID of the voice to use\n return_type: How to return the audio: 'file' or 'base64'\n output_filename: Filename for the output (without extension)\n ", "parameters": { "type": "object", "properties": { "text": { "type": "string", "title": "Text" }, "voice_id": { "type": "string", "title": "Voice Id" }, "return_type": { "type": "string", "title": "Return Type", "default": "file" }, "output_filename": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "title": "Output Filename", "default": None } }, "required": [ "text", "voice_id" ], "title": "generate_ttsArguments" } } ] }) return queue def disconnect(self, queue): """Disconnect a client.""" if queue in self.active_connections: self.active_connections.remove(queue) logger.info(f"[SSE] Client disconnected. Active connections: {len(self.active_connections)}") async def send_message(self, message): """Send a message to all connected clients.""" for queue in self.active_connections: await queue.put(message) connection_manager = ConnectionManager() # SSE Endpoint @app.get("/sse") async def sse_endpoint(request: Request): """Endpoint for SSE connections.""" queue = await connection_manager.connect() async def event_generator(): try: while True: data = await queue.get() if data is None: break yield data except asyncio.CancelledError: connection_manager.disconnect(queue) return EventSourceResponse(event_generator()) # Message endpoint @app.post("/messages/") async def message_endpoint(request: Request, background_tasks: BackgroundTasks): """Endpoint for receiving messages from clients.""" data = await request.json() logger.info(f"[Message] Received: {json.dumps(data)[:100]}...") # Process the message background_tasks.add_task(handle_message, data) return {"status": "ok"} async def handle_message(data): """Handle incoming messages and invoke appropriate tools.""" try: message_type = data.get("type", "") if message_type == "invoke": name = data.get("name", "") parameters = data.get("parameters", {}) invoke_id = data.get("invoke_id", "") logger.info(f"[Invoke] Tool: {name}, ID: {invoke_id}") # Invoke the appropriate tool if name == "list_voices": result = list_voices() await send_tool_result(invoke_id, result) elif name == "generate_tts": result = generate_tts( text=parameters.get("text", ""), voice_id=parameters.get("voice_id", ""), return_type=parameters.get("return_type", "file"), output_filename=parameters.get("output_filename") ) await send_tool_result(invoke_id, result) else: logger.error(f"[Error] Unknown tool: {name}") await send_tool_error(invoke_id, f"Unknown tool: {name}") except Exception as e: logger.error(f"[Error] Failed to handle message: {str(e)}") logger.exception(e) async def send_tool_result(invoke_id, result): """Send a tool execution result to connected clients.""" message = { "type": "tool_result", "invoke_id": invoke_id, "result": result } await connection_manager.send_message(message) async def send_tool_error(invoke_id, error_message): """Send a tool execution error to connected clients.""" message = { "type": "tool_error", "invoke_id": invoke_id, "error": error_message } await connection_manager.send_message(message) # Tool implementations def list_voices(): """List available voice models from Resemble AI.""" try: voices_data = resemble_client.get_voices() voices = [] for voice in voices_data.get("voices", []): voices.append({ "uuid": voice.get("uuid", ""), "name": voice.get("name", ""), "is_public": voice.get("is_public", False), "is_cloned": voice.get("is_cloned", False), "description": voice.get("description", "") }) return {"voices": voices} except Exception as e: logger.error(f"[Error] Failed to list voices: {str(e)}") raise def generate_tts(text, voice_id, return_type="file", output_filename=None): """Generate voice audio from text.""" try: # Validate inputs if not text: raise ValueError("Text is required") if not voice_id: raise ValueError("Voice ID is required") # Generate audio result = resemble_client.generate_audio(text, voice_id) clip_id = result.get("id") if not clip_id: raise ValueError("Failed to generate audio clip") # Set output filename if not output_filename: output_filename = f"generated_{voice_id[:8]}" # Set file path file_path = os.path.join(OUTPUT_DIR, f"{output_filename}.{AUDIO_FORMAT}") # Download audio resemble_client.download_audio(clip_id, file_path) # Return appropriate response based on return_type if return_type == "base64": with open(file_path, "rb") as audio_file: encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8") return { "success": True, "message": "Successfully generated audio", "base64_audio": encoded_audio } else: return { "success": True, "message": "Successfully generated audio", "file_path": file_path } except Exception as e: logger.error(f"[Error] Failed to generate TTS: {str(e)}") raise def start_server(host="0.0.0.0", port=8083): """Start the HTTP server.""" logger.info(f"Starting Resemble AI HTTP Server on {host}:{port}") uvicorn.run(app, host=host, port=port) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Resemble AI HTTP Server") parser.add_argument("--host", default="0.0.0.0", help="Host to bind the server to") parser.add_argument("--port", type=int, default=8083, help="Port to run the server on") args = parser.parse_args() start_server(host=args.host, port=args.port)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/obaid/resemble-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server