We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/mamertofabian/elevenlabs-mcp-server'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
import logging
import os
import time
import requests
from pathlib import Path
from typing import Dict, List, Optional, TypedDict
from dotenv import load_dotenv
load_dotenv()
log_level = os.getenv("ELEVENLABS_LOG_LEVEL", "ERROR").upper()
valid_levels = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
if log_level not in valid_levels:
log_level = "ERROR"
print(f"Invalid log level {log_level}. Using ERROR. Valid levels are: {', '.join(valid_levels)}")
logging.basicConfig(
level=getattr(logging, log_level),
format='%(asctime)s - %(levelname)s - %(message)s'
)
class VoiceData(TypedDict):
voice_id: str
name: str
category: str
labels: Dict[str, str]
description: str
preview_url: str
high_quality_base_model_ids: List[str]
from pydub import AudioSegment
import io
from datetime import datetime
from tenacity import retry, stop_after_attempt, wait_exponential
class ElevenLabsAPI:
# Add model list as class constant
MODELS = {
"eleven_multilingual_v2": {"description": "Our most lifelike model with rich emotional expression", "languages": "32",
"supports_stitching": True, "supports_style": True, "wait_time": 0.1},
"eleven_flash_v2_5": {"description": "Ultra-fast model optimized for real-time use (~75ms†)", "languages": "32",
"supports_stitching": False, "supports_style": False, "wait_time": 0.1},
"eleven_flash_v2": {"description": "Ultra-fast model optimized for real-time use (~75ms†)", "languages": "English",
"supports_stitching": False, "supports_style": False, "wait_time": 0.1}
}
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def get_voices(self) -> List[VoiceData]:
"""Fetch available voices from ElevenLabs API"""
headers = {
"Accept": "application/json",
"xi-api-key": self.api_key
}
response = requests.get(
f"{self.base_url}/voices",
headers=headers
)
if response.status_code == 200:
voices_data = response.json()["voices"]
return [
{
"voice_id": voice["voice_id"],
"name": voice["name"],
"category": voice.get("category", ""),
"labels": voice.get("labels", {}),
"description": voice.get("description", ""),
"preview_url": voice.get("preview_url", ""),
"high_quality_base_model_ids": voice.get("high_quality_base_model_ids", [])
}
for voice in voices_data
]
else:
raise Exception(f"Failed to fetch voices: {response.text}")
def __init__(self):
self.api_key = os.getenv("ELEVENLABS_API_KEY")
if not self.api_key:
logging.error("ELEVENLABS_API_KEY environment variable not set")
raise ValueError("ELEVENLABS_API_KEY environment variable not set")
self.voice_id = os.getenv("ELEVENLABS_VOICE_ID") or "iEw1wkYocsNy7I7pteSN"
self.model_id = os.getenv("ELEVENLABS_MODEL_ID") or "eleven_multilingual_v2"
logging.info(f"Initializing ElevenLabsAPI with model_id: {self.model_id}")
# Add validation for model_id
if self.model_id not in self.MODELS:
logging.error(f"Invalid model_id: {self.model_id}. Valid models: {list(self.MODELS.keys())}")
raise ValueError(f"Invalid model_id: {self.model_id}. Must be one of {list(self.MODELS.keys())}")
self.stability = float(os.getenv("ELEVENLABS_STABILITY", "0.5"))
self.similarity_boost = float(os.getenv("ELEVENLABS_SIMILARITY_BOOST", "0.75"))
self.style = float(os.getenv("ELEVENLABS_STYLE", "0.1"))
self.base_url = "https://api.elevenlabs.io/v1"
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def generate_audio_segment(self, text: str, voice_id: str, output_file: Optional[str] = None,
previous_text: Optional[str] = None, next_text: Optional[str] = None,
previous_request_ids: Optional[List[str]] = None, debug_info: Optional[List[str]] = None) -> tuple[bytes, str]:
"""Generate audio using specified voice with context conditioning"""
headers = {
"Accept": "application/json",
"xi-api-key": self.api_key,
"Content-Type": "application/json"
}
data = {
"text": text,
"model_id": self.model_id,
"voice_settings": {
"stability": self.stability,
"similarity_boost": self.similarity_boost
}
}
if self.MODELS[self.model_id]["supports_style"]:
data["style"] = self.style
# Add context conditioning if model supports it
if self.MODELS[self.model_id]["supports_stitching"]:
if previous_text is not None:
data["previous_text"] = previous_text
if next_text is not None:
data["next_text"] = next_text
if previous_request_ids:
data["previous_request_ids"] = previous_request_ids[-3:] # Maximum of 3 previous IDs
logging.info(f"Generating audio for text length: {len(text)} chars using voice_id: {voice_id}")
logging.debug(f"Generation parameters: stability={self.stability}, similarity_boost={self.similarity_boost}, model={self.model_id}")
try:
response = requests.post(
f"{self.base_url}/text-to-speech/{voice_id}",
json=data,
headers=headers
)
logging.debug(f"API response status: {response.status_code}")
if response.status_code == 200:
logging.info("Audio generation successful")
if output_file:
with open(output_file, 'wb') as f:
f.write(response.content)
return response.content, response.headers["request-id"]
else:
debug_info.append(response.text)
error_message = f"Failed to generate audio: {response.text} \n\n{debug_info} \n\n{data}"
logging.error(f"API error response: {response.status_code}")
logging.error(f"API error details: {response.text}")
logging.error(f"Request data: {data}")
raise Exception(error_message)
except requests.exceptions.RequestException as e:
error_message = f"Network error during API call: {str(e)}"
logging.error(error_message)
raise Exception(error_message)
def generate_full_audio(self, script_parts: List[Dict], output_dir: Path) -> tuple[str, List[str], int]:
"""Generate audio for multiple parts using request stitching. Returns tuple of (output_file_path, debug_info, completed_parts)"""
# Create output directory if it doesn't exist
output_dir.mkdir(exist_ok=True)
# Final output file path with unique file name
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_file = output_dir / f"full_audio_{timestamp}.mp3"
debug_info = []
debug_info.append("ElevenLabsAPI - Starting generate_full_audio")
debug_info.append(f"Input script_parts: {script_parts}")
# Initialize segments list and request IDs tracking
segments = []
previous_request_ids = []
failed_parts = []
completed_parts = 0
debug_info.append("Processing all_texts")
all_texts = []
for part in script_parts:
debug_info.append(f"Processing text from part: {part}")
text = str(part.get('text', ''))
debug_info.append(f"Extracted text: {text}")
all_texts.append(text)
debug_info.append(f"Final all_texts: {all_texts}")
for i, part in enumerate(script_parts):
debug_info.append(f"Processing part {i}: {part}")
part_voice_id = part.get('voice_id')
if not part_voice_id:
part_voice_id = self.voice_id
text = str(part.get('text', ''))
if not text:
continue
debug_info.append(f"Using voice ID: {part_voice_id}")
# Determine previous and next text for context
is_first = i == 0
is_last = i == len(script_parts) - 1
previous_text = None if is_first else " ".join(all_texts[:i])
next_text = None if is_last else " ".join(all_texts[i + 1:])
try:
logging.info(f"Processing part {i+1}/{len(script_parts)}")
logging.info(f"Text length: {len(text)} chars")
logging.debug(f"Context - Previous text: {'Yes' if previous_text else 'No'}, Next text: {'Yes' if next_text else 'No'}")
# Generate audio with context conditioning
audio_content, request_id = self.generate_audio_segment(
text=text,
voice_id=part_voice_id,
previous_text=previous_text,
next_text=next_text,
previous_request_ids=previous_request_ids,
debug_info=debug_info
)
debug_info.append(f"Successfully generated audio for part {i}")
completed_parts += 1
# Add request ID to history
previous_request_ids.append(request_id)
# Convert audio content to AudioSegment and add to segments
audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_content))
segments.append(audio_segment)
# Wait for the specified wait_time
time.sleep(self.MODELS[self.model_id]["wait_time"])
except Exception as e:
debug_info.append(f"Error generating audio: {e}")
failed_parts.append(part)
continue
# Combine all segments
if segments:
final_audio = segments[0]
for segment in segments[1:]:
final_audio = final_audio + segment
# Export combined audio
final_audio.export(output_file, format="mp3")
if failed_parts:
debug_info.append(f"Failed parts: {failed_parts}")
else:
logging.debug("All parts generated successfully")
debug_info.append("All parts generated successfully")
debug_info.append(f"Model: {self.model_id}")
logging.debug(f"Model: {self.model_id}")
return str(output_file), debug_info, completed_parts
else:
error_msg = "\n".join([
"No audio segments were generated. Debug info:",
*debug_info
])
logging.error("No audio segments were generated. Debug info: %s", debug_info)
raise Exception(error_msg)