Skip to main content
Glama

Resemble AI Voice Generation MCP Server

by obaid
resemble_ai_sdk_server.py8.39 kB
""" Resemble AI Voice Generation MCP Server (SDK Version) This server implements an MCP interface for Resemble AI's voice generation API using the official Resemble SDK. It allows LLMs like Claude Desktop to generate voice content and list available voice models through natural language interactions. Created: 2025-03-06 Updated: 2025-03-06 - Fixed nested response structure handling """ import os import base64 import logging import json from typing import Dict, List, Optional, Any from pathlib import Path import requests from dotenv import load_dotenv from pydantic import BaseModel, Field # Configure logging logging.basicConfig( level=logging.INFO, format='[%(levelname)s] %(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger("resemble-mcp-sdk") # Load environment variables load_dotenv() # Resemble AI API configuration RESEMBLE_API_KEY = os.getenv("RESEMBLE_API_KEY") OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./output") AUDIO_FORMAT = os.getenv("AUDIO_FORMAT", "mp3") # Create output directory if it doesn't exist Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True) # Validate API key if not RESEMBLE_API_KEY: logger.error("[Setup] Missing RESEMBLE_API_KEY in environment variables") raise ValueError("RESEMBLE_API_KEY environment variable is required") # Import and initialize Resemble SDK try: from resemble import Resemble Resemble.api_key(RESEMBLE_API_KEY) logger.info("[Setup] Successfully initialized Resemble SDK") except ImportError as e: logger.error(f"[Error] Failed to import Resemble SDK: {str(e)}") logger.error("[Error] Make sure to install the SDK with: pip install resemble") raise # Define Pydantic models for request/response validation class GenerateTTSRequest(BaseModel): """Request model for generate_tts tool.""" text: str = Field(..., description="The text to convert to speech") voice_id: str = Field(..., description="The ID of the voice to use") return_type: str = Field("file", description="How to return the audio: 'file' or 'base64'") output_filename: Optional[str] = Field(None, description="Filename for the output (without extension)") class ListVoicesResponse(BaseModel): """Response model for list_voices tool.""" voices: List[Dict[str, Any]] = Field(..., description="List of available voices") class GenerateTTSResponse(BaseModel): """Response model for generate_tts tool.""" success: bool = Field(..., description="Whether the operation was successful") message: str = Field(..., description="Status message") audio_data: Optional[str] = Field(None, description="Base64-encoded audio data (if return_type is 'base64')") file_path: Optional[str] = Field(None, description="Path to the saved audio file (if return_type is 'file')") # Helper function to extract audio URL from any response structure def extract_audio_url(response: Dict[str, Any]) -> Optional[str]: """Extract audio URL from Resemble API response regardless of structure.""" # Try direct access first if "audio_src" in response: return response["audio_src"] # Try item.audio_src (common pattern) if "item" in response and isinstance(response["item"], dict): item = response["item"] if "audio_src" in item: return item["audio_src"] # Try other common URL fields for key in ["url", "audio_url", "clip_url"]: if key in response: return response[key] # Also check in item if "item" in response and isinstance(response["item"], dict): if key in response["item"]: return response["item"][key] # No audio URL found return None # MCP Tools def list_voices() -> ListVoicesResponse: """List available voice models from Resemble AI. Returns: ListVoicesResponse with available voices """ logger.info("[Tool] Executing list_voices") try: # Get all voices (paginated, up to 10 items per page) response = Resemble.v2.voices.all(1, 10) voices = response.get('items', []) logger.info(f"[API] Successfully retrieved {len(voices)} voices") # Format voice data for better readability formatted_voices = [] for voice in voices: formatted_voice = { "id": voice.get("uuid"), "name": voice.get("name"), "gender": voice.get("gender", "Unknown"), "language": voice.get("language", "Unknown"), "accent": voice.get("accent", "Unknown"), "description": voice.get("description", "") } formatted_voices.append(formatted_voice) return ListVoicesResponse(voices=formatted_voices) except Exception as e: logger.error(f"[Error] list_voices failed: {str(e)}") raise def generate_tts(text: str, voice_id: str, return_type: str = "file", output_filename: Optional[str] = None) -> GenerateTTSResponse: """Generate voice audio from text. Args: text: Text to convert to speech voice_id: ID of the voice to use return_type: How to return the audio ('file' or 'base64') output_filename: Filename for the output (without extension) Returns: GenerateTTSResponse with audio data or file path """ logger.info(f"[Tool] Executing generate_tts with {len(text)} characters of text") try: # Get the first project UUID projects = Resemble.v2.projects.all(1, 10) if not projects.get('items'): return GenerateTTSResponse( success=False, message="No projects found in your Resemble account" ) project_uuid = projects['items'][0]['uuid'] logger.info(f"[API] Using project UUID: {project_uuid}") # Generate TTS synchronously result = Resemble.v2.clips.create_sync( project_uuid, voice_id, text, output_format=AUDIO_FORMAT ) # Debug: Log the full response structure logger.info(f"[Debug] TTS Response Keys: {list(result.keys())}") # Extract audio URL using helper function audio_url = extract_audio_url(result) if not audio_url: return GenerateTTSResponse( success=False, message="No audio URL found in the response. Response structure may have changed." ) logger.info(f"[Debug] Found audio URL: {audio_url}") # Download the audio file audio_response = requests.get(audio_url) audio_response.raise_for_status() audio_data = audio_response.content # Handle response based on return_type if return_type == "base64": # Convert to base64 encoded_audio = base64.b64encode(audio_data).decode("utf-8") return GenerateTTSResponse( success=True, message="Audio generated successfully", audio_data=encoded_audio ) else: # Save to file if not output_filename: output_filename = f"resemble_tts_{voice_id.split('-')[0]}" file_path = os.path.join(OUTPUT_DIR, f"{output_filename}.{AUDIO_FORMAT}") with open(file_path, "wb") as f: f.write(audio_data) return GenerateTTSResponse( success=True, message="Audio saved to file successfully", file_path=file_path ) except Exception as e: logger.error(f"[Error] generate_tts failed: {str(e)}") return GenerateTTSResponse( success=False, message=f"Error generating audio: {str(e)}" ) # Do not run the MCP server when imported, but register the necessary tools if __name__ == "__main__": # Import MCP Server late to avoid circular imports from mcp.server import Server # Create MCP Server server = Server() # Register tools server.tool(list_voices) server.tool(generate_tts) # Start the server logger.info("[Setup] Starting MCP Server for Resemble AI (SDK Version)") server.start()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/obaid/resemble-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server