resemble_http_server.py•14.4 kB
"""
Resemble AI Voice Generation HTTP Server
This is a simplified implementation that serves as a fallback
when the MCP SDK is not available or has compatibility issues.
It implements the MCP protocol but without using the MCP SDK directly.
"""
import os
import json
import base64
import logging
import asyncio
from typing import Dict, List, Optional, Any, Union
from pathlib import Path
import requests
from dotenv import load_dotenv
from pydantic import BaseModel
from fastapi import FastAPI, Request, Response, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from sse_starlette.sse import EventSourceResponse
import uvicorn
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='[%(levelname)s] %(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("resemble-http")
# Load environment variables
load_dotenv()
# Resemble AI API configuration
RESEMBLE_API_KEY = os.getenv("RESEMBLE_API_KEY")
RESEMBLE_API_BASE_URL = "https://app.resemble.ai/api/v2"
OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./output")
AUDIO_FORMAT = os.getenv("AUDIO_FORMAT", "mp3")
# Create output directory if it doesn't exist
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
# Validate API key
if not RESEMBLE_API_KEY:
logger.error("[Setup] Missing RESEMBLE_API_KEY in environment variables")
raise ValueError("RESEMBLE_API_KEY environment variable is required")
# Models
class VoiceModel(BaseModel):
"""Voice model information."""
uuid: str
name: str
is_public: bool
is_cloned: bool
description: Optional[str] = None
class ListVoicesResponse(BaseModel):
"""Response model for list_voices endpoint."""
voices: List[VoiceModel]
class GenerateTTSRequest(BaseModel):
"""Request model for generate_tts endpoint."""
text: str
voice_id: str
return_type: str = "file" # 'file' or 'base64'
output_filename: Optional[str] = None
class GenerateTTSResponse(BaseModel):
"""Response model for generate_tts endpoint."""
success: bool
message: str
file_path: Optional[str] = None
base64_audio: Optional[str] = None
# Client for Resemble AI API
class ResembleClient:
"""Client for interacting with Resemble AI API."""
def __init__(self, api_key: str, base_url: str = RESEMBLE_API_BASE_URL):
"""Initialize the client with API key and base URL."""
self.api_key = api_key
self.base_url = base_url
self.headers = {
"Authorization": f"Token token={api_key}",
"Content-Type": "application/json",
"Accept": "application/json"
}
logger.info("[Setup] Initializing Resemble AI client")
def get_voices(self) -> Dict[str, Any]:
"""Get a list of available voices."""
logger.info("[API] Fetching available voices from Resemble AI")
try:
response = requests.get(
f"{self.base_url}/voices",
headers=self.headers
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
logger.error(f"[Error] Failed to fetch voices: {str(e)}")
raise
def generate_audio(self, text: str, voice_uuid: str) -> Dict[str, Any]:
"""Generate audio from text using specified voice."""
logger.info(f"[API] Generating audio for voice {voice_uuid}")
try:
payload = {
"title": f"generated_{voice_uuid[:8]}",
"voice_uuid": voice_uuid,
"body": text,
"output_format": AUDIO_FORMAT
}
response = requests.post(
f"{self.base_url}/clips",
headers=self.headers,
json=payload
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
logger.error(f"[Error] Failed to generate audio: {str(e)}")
raise
def download_audio(self, clip_id: str, output_path: str) -> str:
"""Download generated audio clip and save to file."""
logger.info(f"[API] Downloading audio clip {clip_id}")
try:
response = requests.get(
f"{self.base_url}/clips/{clip_id}/audio",
headers=self.headers
)
response.raise_for_status()
# Save to file
with open(output_path, 'wb') as f:
f.write(response.content)
logger.info(f"[Success] Saved audio to {output_path}")
return output_path
except requests.RequestException as e:
logger.error(f"[Error] Failed to download audio: {str(e)}")
raise
# Initialize Resemble AI client
resemble_client = ResembleClient(RESEMBLE_API_KEY)
# Create FastAPI app
app = FastAPI(title="Resemble AI Voice Generation API")
# Enable CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# SSE connection event manager
class ConnectionManager:
"""Manages SSE connections and message delivery."""
def __init__(self):
"""Initialize the connection manager."""
self.active_connections = []
async def connect(self):
"""Connect a new client and return the connection queue."""
queue = asyncio.Queue()
self.active_connections.append(queue)
logger.info(f"[SSE] Client connected. Active connections: {len(self.active_connections)}")
# Send register event with available tools
await queue.put({
"type": "register",
"tools": [
{
"name": "list_voices",
"description": "List available voice models from Resemble AI.",
"parameters": {
"type": "object",
"properties": {
"random_string": {
"type": "string",
"description": "Dummy parameter for no-parameter tools"
}
},
"required": ["random_string"]
}
},
{
"name": "generate_tts",
"description": "Generate voice audio from text.\n \n Args:\n text: Text to convert to speech\n voice_id: ID of the voice to use\n return_type: How to return the audio: 'file' or 'base64'\n output_filename: Filename for the output (without extension)\n ",
"parameters": {
"type": "object",
"properties": {
"text": {
"type": "string",
"title": "Text"
},
"voice_id": {
"type": "string",
"title": "Voice Id"
},
"return_type": {
"type": "string",
"title": "Return Type",
"default": "file"
},
"output_filename": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Output Filename",
"default": None
}
},
"required": [
"text",
"voice_id"
],
"title": "generate_ttsArguments"
}
}
]
})
return queue
def disconnect(self, queue):
"""Disconnect a client."""
if queue in self.active_connections:
self.active_connections.remove(queue)
logger.info(f"[SSE] Client disconnected. Active connections: {len(self.active_connections)}")
async def send_message(self, message):
"""Send a message to all connected clients."""
for queue in self.active_connections:
await queue.put(message)
connection_manager = ConnectionManager()
# SSE Endpoint
@app.get("/sse")
async def sse_endpoint(request: Request):
"""Endpoint for SSE connections."""
queue = await connection_manager.connect()
async def event_generator():
try:
while True:
data = await queue.get()
if data is None:
break
yield data
except asyncio.CancelledError:
connection_manager.disconnect(queue)
return EventSourceResponse(event_generator())
# Message endpoint
@app.post("/messages/")
async def message_endpoint(request: Request, background_tasks: BackgroundTasks):
"""Endpoint for receiving messages from clients."""
data = await request.json()
logger.info(f"[Message] Received: {json.dumps(data)[:100]}...")
# Process the message
background_tasks.add_task(handle_message, data)
return {"status": "ok"}
async def handle_message(data):
"""Handle incoming messages and invoke appropriate tools."""
try:
message_type = data.get("type", "")
if message_type == "invoke":
name = data.get("name", "")
parameters = data.get("parameters", {})
invoke_id = data.get("invoke_id", "")
logger.info(f"[Invoke] Tool: {name}, ID: {invoke_id}")
# Invoke the appropriate tool
if name == "list_voices":
result = list_voices()
await send_tool_result(invoke_id, result)
elif name == "generate_tts":
result = generate_tts(
text=parameters.get("text", ""),
voice_id=parameters.get("voice_id", ""),
return_type=parameters.get("return_type", "file"),
output_filename=parameters.get("output_filename")
)
await send_tool_result(invoke_id, result)
else:
logger.error(f"[Error] Unknown tool: {name}")
await send_tool_error(invoke_id, f"Unknown tool: {name}")
except Exception as e:
logger.error(f"[Error] Failed to handle message: {str(e)}")
logger.exception(e)
async def send_tool_result(invoke_id, result):
"""Send a tool execution result to connected clients."""
message = {
"type": "tool_result",
"invoke_id": invoke_id,
"result": result
}
await connection_manager.send_message(message)
async def send_tool_error(invoke_id, error_message):
"""Send a tool execution error to connected clients."""
message = {
"type": "tool_error",
"invoke_id": invoke_id,
"error": error_message
}
await connection_manager.send_message(message)
# Tool implementations
def list_voices():
"""List available voice models from Resemble AI."""
try:
voices_data = resemble_client.get_voices()
voices = []
for voice in voices_data.get("voices", []):
voices.append({
"uuid": voice.get("uuid", ""),
"name": voice.get("name", ""),
"is_public": voice.get("is_public", False),
"is_cloned": voice.get("is_cloned", False),
"description": voice.get("description", "")
})
return {"voices": voices}
except Exception as e:
logger.error(f"[Error] Failed to list voices: {str(e)}")
raise
def generate_tts(text, voice_id, return_type="file", output_filename=None):
"""Generate voice audio from text."""
try:
# Validate inputs
if not text:
raise ValueError("Text is required")
if not voice_id:
raise ValueError("Voice ID is required")
# Generate audio
result = resemble_client.generate_audio(text, voice_id)
clip_id = result.get("id")
if not clip_id:
raise ValueError("Failed to generate audio clip")
# Set output filename
if not output_filename:
output_filename = f"generated_{voice_id[:8]}"
# Set file path
file_path = os.path.join(OUTPUT_DIR, f"{output_filename}.{AUDIO_FORMAT}")
# Download audio
resemble_client.download_audio(clip_id, file_path)
# Return appropriate response based on return_type
if return_type == "base64":
with open(file_path, "rb") as audio_file:
encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
return {
"success": True,
"message": "Successfully generated audio",
"base64_audio": encoded_audio
}
else:
return {
"success": True,
"message": "Successfully generated audio",
"file_path": file_path
}
except Exception as e:
logger.error(f"[Error] Failed to generate TTS: {str(e)}")
raise
def start_server(host="0.0.0.0", port=8083):
"""Start the HTTP server."""
logger.info(f"Starting Resemble AI HTTP Server on {host}:{port}")
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Resemble AI HTTP Server")
parser.add_argument("--host", default="0.0.0.0", help="Host to bind the server to")
parser.add_argument("--port", type=int, default=8083, help="Port to run the server on")
args = parser.parse_args()
start_server(host=args.host, port=args.port)