Skip to main content
Glama

Resemble AI Voice Generation MCP Server

by obaid
resemble_mcp_server.py14.4 kB
""" Resemble AI Voice Generation MCP Server This server integrates with Resemble AI's voice generation API using the official MCP SDK and SSE transport. """ import os import json import base64 import logging import asyncio from typing import Dict, List, Optional, Any, Union from pathlib import Path import requests from dotenv import load_dotenv from pydantic import BaseModel # Try to import MCP SDK components with fallbacks for different versions try: # Try standard imports first (newer versions) from mcp.server.fastmcp import FastMCP from mcp.server.sse import SseServerTransport from mcp.server import Server HAS_MCP_SDK = True except ImportError: try: # Try alternate imports (older versions) from mcp import FastMCP from mcp.sse import SseServerTransport from mcp import Server HAS_MCP_SDK = True except ImportError: logging.error("Failed to import MCP SDK. Make sure it's installed.") logging.error("If you're using a virtual environment, activate it first.") logging.error("Install with: pip install mcp") HAS_MCP_SDK = False from starlette.applications import Starlette from starlette.requests import Request from starlette.routing import Mount, Route import uvicorn # Configure logging logging.basicConfig( level=logging.INFO, format='[%(levelname)s] %(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger("resemble-mcp") # Load environment variables load_dotenv() # Resemble AI API configuration RESEMBLE_API_KEY = os.getenv("RESEMBLE_API_KEY") RESEMBLE_API_BASE_URL = "https://app.resemble.ai/api/v2" OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./output") AUDIO_FORMAT = os.getenv("AUDIO_FORMAT", "mp3") # Create output directory if it doesn't exist Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True) # Validate API key if not RESEMBLE_API_KEY: logger.error("[Setup] Missing RESEMBLE_API_KEY in environment variables") raise ValueError("RESEMBLE_API_KEY environment variable is required") # Initialize FastMCP server for Resemble AI tools mcp = FastMCP("resemble-ai") class VoiceModel(BaseModel): id: str name: str gender: str = "Unknown" language: str = "Unknown" accent: str = "Unknown" description: str = "" class ListVoicesResponse(BaseModel): voices: List[VoiceModel] class GenerateTTSResponse(BaseModel): success: bool message: str audio_data: Optional[str] = None file_path: Optional[str] = None # Resemble AI API Client class ResembleClient: """Client for interacting with the Resemble AI API.""" def __init__(self, api_key: str, base_url: str): """Initialize the Resemble AI client.""" self.api_key = api_key self.base_url = base_url self.headers = { "Authorization": f"Token {self.api_key}", "Content-Type": "application/json", "Accept": "application/json" } logger.info("[Setup] Initializing Resemble AI client") async def get_voices(self) -> List[Dict[str, Any]]: """Get list of available voices.""" logger.info("[API] Fetching available voices") try: response = requests.get( f"{self.base_url}/voices", headers=self.headers ) response.raise_for_status() data = response.json() voices = data.get('items', []) if isinstance(data, dict) and 'items' in data else data logger.info(f"[API] Successfully retrieved {len(voices)} voices") return voices except Exception as e: logger.error(f"[Error] Failed to fetch voices: {str(e)}") raise async def generate_tts(self, text: str, voice_id: str) -> Dict[str, Any]: """Generate text-to-speech audio.""" logger.info(f"[API] Generating TTS for text (length: {len(text)})") try: # Get project UUID for the API call projects_response = requests.get( f"{self.base_url}/projects", headers=self.headers ) projects_response.raise_for_status() projects_data = projects_response.json() # Use the first project if not projects_data.get('items'): raise ValueError("No projects found in your Resemble account") project_uuid = projects_data['items'][0]['uuid'] logger.info(f"[API] Using project UUID: {project_uuid}") # Generate the clip using the sync endpoint payload = { "body": text, "voice_uuid": voice_id, "output_format": AUDIO_FORMAT } response = requests.post( f"{self.base_url}/projects/{project_uuid}/clips/sync", headers=self.headers, json=payload ) response.raise_for_status() result = response.json() logger.info(f"[API] Successfully generated TTS") return result except Exception as e: logger.error(f"[Error] Failed to generate TTS: {str(e)}") raise # Initialize Resemble client resemble_client = ResembleClient(RESEMBLE_API_KEY, RESEMBLE_API_BASE_URL) # Helper function to extract audio URL from any response structure def extract_audio_url(response: Dict[str, Any]) -> Optional[str]: """Extract audio URL from Resemble API response regardless of structure.""" # Try direct access first if "audio_src" in response: return response["audio_src"] # Try item.audio_src (common pattern) if "item" in response and isinstance(response["item"], dict): item = response["item"] if "audio_src" in item: return item["audio_src"] # Try other common URL fields for key in ["url", "audio_url", "clip_url"]: if key in response: return response[key] # Also check in item if "item" in response and isinstance(response["item"], dict): if key in response["item"]: return response["item"][key] # No audio URL found return None @mcp.tool() async def list_voices() -> ListVoicesResponse: """List available voice models from Resemble AI.""" logger.info("[Tool] Executing list_voices") try: voices = await resemble_client.get_voices() # Format voice data for better readability formatted_voices = [] for voice in voices: formatted_voice = VoiceModel( id=voice.get("uuid"), name=voice.get("name"), gender=voice.get("gender", "Unknown"), language=voice.get("language", "Unknown"), accent=voice.get("accent", "Unknown"), description=voice.get("description", "") ) formatted_voices.append(formatted_voice) return ListVoicesResponse(voices=formatted_voices) except Exception as e: logger.error(f"[Error] list_voices failed: {str(e)}") raise @mcp.tool() async def generate_tts( text: str, voice_id: str, return_type: str = "file", output_filename: Optional[str] = None ) -> GenerateTTSResponse: """Generate voice audio from text. Args: text: Text to convert to speech voice_id: ID of the voice to use return_type: How to return the audio: 'file' or 'base64' output_filename: Filename for the output (without extension) """ logger.info(f"[Tool] Executing generate_tts with {len(text)} characters of text") try: # Generate the TTS result = await resemble_client.generate_tts(text, voice_id) # Get the audio URL from the response audio_url = extract_audio_url(result) if not audio_url: return GenerateTTSResponse( success=False, message="No audio URL found in the response. Response structure may have changed." ) # Download the audio file audio_response = requests.get(audio_url) audio_response.raise_for_status() audio_data = audio_response.content # Handle response based on return_type if return_type == "base64": # Convert to base64 encoded_audio = base64.b64encode(audio_data).decode("utf-8") return GenerateTTSResponse( success=True, message="Audio generated successfully", audio_data=encoded_audio ) else: # Save to file if not output_filename: output_filename = f"resemble_tts_{voice_id.split('-')[0]}" file_path = os.path.join(OUTPUT_DIR, f"{output_filename}.{AUDIO_FORMAT}") with open(file_path, "wb") as f: f.write(audio_data) return GenerateTTSResponse( success=True, message="Audio saved to file successfully", file_path=file_path ) except Exception as e: logger.error(f"[Error] generate_tts failed: {str(e)}") return GenerateTTSResponse( success=False, message=f"Error generating audio: {str(e)}" ) def create_starlette_app(mcp_server, debug=False): """ Create a Starlette application with the MCP endpoints. """ # Get the SSE transport from the server # Access _transport instead of transport based on error message sse = getattr(mcp_server, "_transport", None) if not sse: # Fallback for backward compatibility sse = getattr(mcp_server, "transport", None) if not sse: logger.error("No transport found on MCP server object. Cannot create Starlette app.") raise ValueError("No transport found on MCP server object") # Create an async function to handle SSE connection requests async def handle_sse(request: Request): # Check if the transport has the connect_sse method with the right signature if hasattr(sse, "connect_sse"): # For newer versions of the MCP SDK try: # Try the new API format if hasattr(request, "scope") and hasattr(request, "receive") and hasattr(request, "_send"): async with sse.connect_sse( request.scope, request.receive, request._send, # noqa: SLF001 ) as (read_stream, write_stream): await mcp_server.run( read_stream, write_stream, mcp_server.create_initialization_options(), ) else: # Try the simplified API return await sse.connect_sse(request) except (TypeError, AttributeError) as e: logger.error(f"Error connecting SSE: {str(e)}") raise else: logger.error("Transport does not have connect_sse method") raise ValueError("Transport does not support SSE connections") # Define routes for the Starlette app routes = [ Route("/sse", endpoint=handle_sse), ] # Add the messages route if transport supports it if hasattr(sse, "handle_post_message"): routes.append(Mount("/messages/", routes=[ Route("/", endpoint=sse.handle_post_message, methods=["POST"]) ])) return Starlette(debug=debug, routes=routes) def start_server(host="0.0.0.0", port=8083): """ Start the MCP server with SSE transport. Args: host: Host to bind the server to port: Port to run the server on """ logger.info(f"Starting Resemble AI MCP Server on {host}:{port}") try: # Check if MCP SDK is available if not HAS_MCP_SDK: logger.error("MCP SDK is not available. Please install it with: pip install mcp") return # Create the SSE transport sse_transport = SseServerTransport("/messages/") # Create the MCP server # First try the newer API which accepts transport in constructor try: logger.debug("Trying to create Server with transport in constructor") mcp_server = Server(mcp, transport=sse_transport) except TypeError: # Fall back to older API where transport is set after creation logger.debug("Falling back to setting transport after server creation") mcp_server = Server(mcp) # Try to set the transport attribute directly try: # Try setting as public attribute first mcp_server.transport = sse_transport except (AttributeError, TypeError): # Fall back to setting as private attribute try: mcp_server._transport = sse_transport except (AttributeError, TypeError): logger.error("Could not set transport on server object") raise ValueError("Could not set transport on server object") # Inspect the server object to understand its structure transport_attr = getattr(mcp_server, "_transport", None) or getattr(mcp_server, "transport", None) logger.debug(f"Server transport attribute: {transport_attr}") # Create the Starlette app app = create_starlette_app(mcp_server, debug=True) # Run the server uvicorn.run(app, host=host, port=port) except Exception as e: logger.error(f"Failed to start MCP server: {str(e)}") logger.exception(e) raise if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Resemble AI MCP Server") parser.add_argument("--port", type=int, default=8083, help="Port to run the server on") parser.add_argument("--host", default="0.0.0.0", help="Host to bind the server to") args = parser.parse_args() start_server(host=args.host, port=args.port)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/obaid/resemble-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server