resemble_mcp_server.py•14.4 kB
"""
Resemble AI Voice Generation MCP Server
This server integrates with Resemble AI's voice generation API
using the official MCP SDK and SSE transport.
"""
import os
import json
import base64
import logging
import asyncio
from typing import Dict, List, Optional, Any, Union
from pathlib import Path
import requests
from dotenv import load_dotenv
from pydantic import BaseModel
# Try to import MCP SDK components with fallbacks for different versions
try:
# Try standard imports first (newer versions)
from mcp.server.fastmcp import FastMCP
from mcp.server.sse import SseServerTransport
from mcp.server import Server
HAS_MCP_SDK = True
except ImportError:
try:
# Try alternate imports (older versions)
from mcp import FastMCP
from mcp.sse import SseServerTransport
from mcp import Server
HAS_MCP_SDK = True
except ImportError:
logging.error("Failed to import MCP SDK. Make sure it's installed.")
logging.error("If you're using a virtual environment, activate it first.")
logging.error("Install with: pip install mcp")
HAS_MCP_SDK = False
from starlette.applications import Starlette
from starlette.requests import Request
from starlette.routing import Mount, Route
import uvicorn
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='[%(levelname)s] %(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("resemble-mcp")
# Load environment variables
load_dotenv()
# Resemble AI API configuration
RESEMBLE_API_KEY = os.getenv("RESEMBLE_API_KEY")
RESEMBLE_API_BASE_URL = "https://app.resemble.ai/api/v2"
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./output")
AUDIO_FORMAT = os.getenv("AUDIO_FORMAT", "mp3")
# Create output directory if it doesn't exist
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
# Validate API key
if not RESEMBLE_API_KEY:
logger.error("[Setup] Missing RESEMBLE_API_KEY in environment variables")
raise ValueError("RESEMBLE_API_KEY environment variable is required")
# Initialize FastMCP server for Resemble AI tools
mcp = FastMCP("resemble-ai")
class VoiceModel(BaseModel):
id: str
name: str
gender: str = "Unknown"
language: str = "Unknown"
accent: str = "Unknown"
description: str = ""
class ListVoicesResponse(BaseModel):
voices: List[VoiceModel]
class GenerateTTSResponse(BaseModel):
success: bool
message: str
audio_data: Optional[str] = None
file_path: Optional[str] = None
# Resemble AI API Client
class ResembleClient:
"""Client for interacting with the Resemble AI API."""
def __init__(self, api_key: str, base_url: str):
"""Initialize the Resemble AI client."""
self.api_key = api_key
self.base_url = base_url
self.headers = {
"Authorization": f"Token {self.api_key}",
"Content-Type": "application/json",
"Accept": "application/json"
}
logger.info("[Setup] Initializing Resemble AI client")
async def get_voices(self) -> List[Dict[str, Any]]:
"""Get list of available voices."""
logger.info("[API] Fetching available voices")
try:
response = requests.get(
f"{self.base_url}/voices",
headers=self.headers
)
response.raise_for_status()
data = response.json()
voices = data.get('items', []) if isinstance(data, dict) and 'items' in data else data
logger.info(f"[API] Successfully retrieved {len(voices)} voices")
return voices
except Exception as e:
logger.error(f"[Error] Failed to fetch voices: {str(e)}")
raise
async def generate_tts(self, text: str, voice_id: str) -> Dict[str, Any]:
"""Generate text-to-speech audio."""
logger.info(f"[API] Generating TTS for text (length: {len(text)})")
try:
# Get project UUID for the API call
projects_response = requests.get(
f"{self.base_url}/projects",
headers=self.headers
)
projects_response.raise_for_status()
projects_data = projects_response.json()
# Use the first project
if not projects_data.get('items'):
raise ValueError("No projects found in your Resemble account")
project_uuid = projects_data['items'][0]['uuid']
logger.info(f"[API] Using project UUID: {project_uuid}")
# Generate the clip using the sync endpoint
payload = {
"body": text,
"voice_uuid": voice_id,
"output_format": AUDIO_FORMAT
}
response = requests.post(
f"{self.base_url}/projects/{project_uuid}/clips/sync",
headers=self.headers,
json=payload
)
response.raise_for_status()
result = response.json()
logger.info(f"[API] Successfully generated TTS")
return result
except Exception as e:
logger.error(f"[Error] Failed to generate TTS: {str(e)}")
raise
# Initialize Resemble client
resemble_client = ResembleClient(RESEMBLE_API_KEY, RESEMBLE_API_BASE_URL)
# Helper function to extract audio URL from any response structure
def extract_audio_url(response: Dict[str, Any]) -> Optional[str]:
"""Extract audio URL from Resemble API response regardless of structure."""
# Try direct access first
if "audio_src" in response:
return response["audio_src"]
# Try item.audio_src (common pattern)
if "item" in response and isinstance(response["item"], dict):
item = response["item"]
if "audio_src" in item:
return item["audio_src"]
# Try other common URL fields
for key in ["url", "audio_url", "clip_url"]:
if key in response:
return response[key]
# Also check in item
if "item" in response and isinstance(response["item"], dict):
if key in response["item"]:
return response["item"][key]
# No audio URL found
return None
@mcp.tool()
async def list_voices() -> ListVoicesResponse:
"""List available voice models from Resemble AI."""
logger.info("[Tool] Executing list_voices")
try:
voices = await resemble_client.get_voices()
# Format voice data for better readability
formatted_voices = []
for voice in voices:
formatted_voice = VoiceModel(
id=voice.get("uuid"),
name=voice.get("name"),
gender=voice.get("gender", "Unknown"),
language=voice.get("language", "Unknown"),
accent=voice.get("accent", "Unknown"),
description=voice.get("description", "")
)
formatted_voices.append(formatted_voice)
return ListVoicesResponse(voices=formatted_voices)
except Exception as e:
logger.error(f"[Error] list_voices failed: {str(e)}")
raise
@mcp.tool()
async def generate_tts(
text: str,
voice_id: str,
return_type: str = "file",
output_filename: Optional[str] = None
) -> GenerateTTSResponse:
"""Generate voice audio from text.
Args:
text: Text to convert to speech
voice_id: ID of the voice to use
return_type: How to return the audio: 'file' or 'base64'
output_filename: Filename for the output (without extension)
"""
logger.info(f"[Tool] Executing generate_tts with {len(text)} characters of text")
try:
# Generate the TTS
result = await resemble_client.generate_tts(text, voice_id)
# Get the audio URL from the response
audio_url = extract_audio_url(result)
if not audio_url:
return GenerateTTSResponse(
success=False,
message="No audio URL found in the response. Response structure may have changed."
)
# Download the audio file
audio_response = requests.get(audio_url)
audio_response.raise_for_status()
audio_data = audio_response.content
# Handle response based on return_type
if return_type == "base64":
# Convert to base64
encoded_audio = base64.b64encode(audio_data).decode("utf-8")
return GenerateTTSResponse(
success=True,
message="Audio generated successfully",
audio_data=encoded_audio
)
else:
# Save to file
if not output_filename:
output_filename = f"resemble_tts_{voice_id.split('-')[0]}"
file_path = os.path.join(OUTPUT_DIR, f"{output_filename}.{AUDIO_FORMAT}")
with open(file_path, "wb") as f:
f.write(audio_data)
return GenerateTTSResponse(
success=True,
message="Audio saved to file successfully",
file_path=file_path
)
except Exception as e:
logger.error(f"[Error] generate_tts failed: {str(e)}")
return GenerateTTSResponse(
success=False,
message=f"Error generating audio: {str(e)}"
)
def create_starlette_app(mcp_server, debug=False):
"""
Create a Starlette application with the MCP endpoints.
"""
# Get the SSE transport from the server
# Access _transport instead of transport based on error message
sse = getattr(mcp_server, "_transport", None)
if not sse:
# Fallback for backward compatibility
sse = getattr(mcp_server, "transport", None)
if not sse:
logger.error("No transport found on MCP server object. Cannot create Starlette app.")
raise ValueError("No transport found on MCP server object")
# Create an async function to handle SSE connection requests
async def handle_sse(request: Request):
# Check if the transport has the connect_sse method with the right signature
if hasattr(sse, "connect_sse"):
# For newer versions of the MCP SDK
try:
# Try the new API format
if hasattr(request, "scope") and hasattr(request, "receive") and hasattr(request, "_send"):
async with sse.connect_sse(
request.scope,
request.receive,
request._send, # noqa: SLF001
) as (read_stream, write_stream):
await mcp_server.run(
read_stream,
write_stream,
mcp_server.create_initialization_options(),
)
else:
# Try the simplified API
return await sse.connect_sse(request)
except (TypeError, AttributeError) as e:
logger.error(f"Error connecting SSE: {str(e)}")
raise
else:
logger.error("Transport does not have connect_sse method")
raise ValueError("Transport does not support SSE connections")
# Define routes for the Starlette app
routes = [
Route("/sse", endpoint=handle_sse),
]
# Add the messages route if transport supports it
if hasattr(sse, "handle_post_message"):
routes.append(Mount("/messages/", routes=[
Route("/", endpoint=sse.handle_post_message, methods=["POST"])
]))
return Starlette(debug=debug, routes=routes)
def start_server(host="0.0.0.0", port=8083):
"""
Start the MCP server with SSE transport.
Args:
host: Host to bind the server to
port: Port to run the server on
"""
logger.info(f"Starting Resemble AI MCP Server on {host}:{port}")
try:
# Check if MCP SDK is available
if not HAS_MCP_SDK:
logger.error("MCP SDK is not available. Please install it with: pip install mcp")
return
# Create the SSE transport
sse_transport = SseServerTransport("/messages/")
# Create the MCP server
# First try the newer API which accepts transport in constructor
try:
logger.debug("Trying to create Server with transport in constructor")
mcp_server = Server(mcp, transport=sse_transport)
except TypeError:
# Fall back to older API where transport is set after creation
logger.debug("Falling back to setting transport after server creation")
mcp_server = Server(mcp)
# Try to set the transport attribute directly
try:
# Try setting as public attribute first
mcp_server.transport = sse_transport
except (AttributeError, TypeError):
# Fall back to setting as private attribute
try:
mcp_server._transport = sse_transport
except (AttributeError, TypeError):
logger.error("Could not set transport on server object")
raise ValueError("Could not set transport on server object")
# Inspect the server object to understand its structure
transport_attr = getattr(mcp_server, "_transport", None) or getattr(mcp_server, "transport", None)
logger.debug(f"Server transport attribute: {transport_attr}")
# Create the Starlette app
app = create_starlette_app(mcp_server, debug=True)
# Run the server
uvicorn.run(app, host=host, port=port)
except Exception as e:
logger.error(f"Failed to start MCP server: {str(e)}")
logger.exception(e)
raise
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Resemble AI MCP Server")
parser.add_argument("--port", type=int, default=8083, help="Port to run the server on")
parser.add_argument("--host", default="0.0.0.0", help="Host to bind the server to")
args = parser.parse_args()
start_server(host=args.host, port=args.port)