Skip to main content
Glama

ElevenLabs MCP Server

import logging import os import time import requests from pathlib import Path from typing import Dict, List, Optional, TypedDict from dotenv import load_dotenv load_dotenv() log_level = os.getenv("ELEVENLABS_LOG_LEVEL", "ERROR").upper() valid_levels = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"} if log_level not in valid_levels: log_level = "ERROR" print(f"Invalid log level {log_level}. Using ERROR. Valid levels are: {', '.join(valid_levels)}") logging.basicConfig( level=getattr(logging, log_level), format='%(asctime)s - %(levelname)s - %(message)s' ) class VoiceData(TypedDict): voice_id: str name: str category: str labels: Dict[str, str] description: str preview_url: str high_quality_base_model_ids: List[str] from pydub import AudioSegment import io from datetime import datetime from tenacity import retry, stop_after_attempt, wait_exponential class ElevenLabsAPI: # Add model list as class constant MODELS = { "eleven_multilingual_v2": {"description": "Our most lifelike model with rich emotional expression", "languages": "32", "supports_stitching": True, "supports_style": True, "wait_time": 0.1}, "eleven_flash_v2_5": {"description": "Ultra-fast model optimized for real-time use (~75ms†)", "languages": "32", "supports_stitching": False, "supports_style": False, "wait_time": 0.1}, "eleven_flash_v2": {"description": "Ultra-fast model optimized for real-time use (~75ms†)", "languages": "English", "supports_stitching": False, "supports_style": False, "wait_time": 0.1} } @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def get_voices(self) -> List[VoiceData]: """Fetch available voices from ElevenLabs API""" headers = { "Accept": "application/json", "xi-api-key": self.api_key } response = requests.get( f"{self.base_url}/voices", headers=headers ) if response.status_code == 200: voices_data = response.json()["voices"] return [ { "voice_id": voice["voice_id"], "name": voice["name"], "category": voice.get("category", ""), "labels": voice.get("labels", {}), "description": voice.get("description", ""), "preview_url": voice.get("preview_url", ""), "high_quality_base_model_ids": voice.get("high_quality_base_model_ids", []) } for voice in voices_data ] else: raise Exception(f"Failed to fetch voices: {response.text}") def __init__(self): self.api_key = os.getenv("ELEVENLABS_API_KEY") if not self.api_key: logging.error("ELEVENLABS_API_KEY environment variable not set") raise ValueError("ELEVENLABS_API_KEY environment variable not set") self.voice_id = os.getenv("ELEVENLABS_VOICE_ID") or "iEw1wkYocsNy7I7pteSN" self.model_id = os.getenv("ELEVENLABS_MODEL_ID") or "eleven_multilingual_v2" logging.info(f"Initializing ElevenLabsAPI with model_id: {self.model_id}") # Add validation for model_id if self.model_id not in self.MODELS: logging.error(f"Invalid model_id: {self.model_id}. Valid models: {list(self.MODELS.keys())}") raise ValueError(f"Invalid model_id: {self.model_id}. Must be one of {list(self.MODELS.keys())}") self.stability = float(os.getenv("ELEVENLABS_STABILITY", "0.5")) self.similarity_boost = float(os.getenv("ELEVENLABS_SIMILARITY_BOOST", "0.75")) self.style = float(os.getenv("ELEVENLABS_STYLE", "0.1")) self.base_url = "https://api.elevenlabs.io/v1" @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def generate_audio_segment(self, text: str, voice_id: str, output_file: Optional[str] = None, previous_text: Optional[str] = None, next_text: Optional[str] = None, previous_request_ids: Optional[List[str]] = None, debug_info: Optional[List[str]] = None) -> tuple[bytes, str]: """Generate audio using specified voice with context conditioning""" headers = { "Accept": "application/json", "xi-api-key": self.api_key, "Content-Type": "application/json" } data = { "text": text, "model_id": self.model_id, "voice_settings": { "stability": self.stability, "similarity_boost": self.similarity_boost } } if self.MODELS[self.model_id]["supports_style"]: data["style"] = self.style # Add context conditioning if model supports it if self.MODELS[self.model_id]["supports_stitching"]: if previous_text is not None: data["previous_text"] = previous_text if next_text is not None: data["next_text"] = next_text if previous_request_ids: data["previous_request_ids"] = previous_request_ids[-3:] # Maximum of 3 previous IDs logging.info(f"Generating audio for text length: {len(text)} chars using voice_id: {voice_id}") logging.debug(f"Generation parameters: stability={self.stability}, similarity_boost={self.similarity_boost}, model={self.model_id}") try: response = requests.post( f"{self.base_url}/text-to-speech/{voice_id}", json=data, headers=headers ) logging.debug(f"API response status: {response.status_code}") if response.status_code == 200: logging.info("Audio generation successful") if output_file: with open(output_file, 'wb') as f: f.write(response.content) return response.content, response.headers["request-id"] else: debug_info.append(response.text) error_message = f"Failed to generate audio: {response.text} \n\n{debug_info} \n\n{data}" logging.error(f"API error response: {response.status_code}") logging.error(f"API error details: {response.text}") logging.error(f"Request data: {data}") raise Exception(error_message) except requests.exceptions.RequestException as e: error_message = f"Network error during API call: {str(e)}" logging.error(error_message) raise Exception(error_message) def generate_full_audio(self, script_parts: List[Dict], output_dir: Path) -> tuple[str, List[str], int]: """Generate audio for multiple parts using request stitching. Returns tuple of (output_file_path, debug_info, completed_parts)""" # Create output directory if it doesn't exist output_dir.mkdir(exist_ok=True) # Final output file path with unique file name timestamp = datetime.now().strftime("%Y%m%d%H%M%S") output_file = output_dir / f"full_audio_{timestamp}.mp3" debug_info = [] debug_info.append("ElevenLabsAPI - Starting generate_full_audio") debug_info.append(f"Input script_parts: {script_parts}") # Initialize segments list and request IDs tracking segments = [] previous_request_ids = [] failed_parts = [] completed_parts = 0 debug_info.append("Processing all_texts") all_texts = [] for part in script_parts: debug_info.append(f"Processing text from part: {part}") text = str(part.get('text', '')) debug_info.append(f"Extracted text: {text}") all_texts.append(text) debug_info.append(f"Final all_texts: {all_texts}") for i, part in enumerate(script_parts): debug_info.append(f"Processing part {i}: {part}") part_voice_id = part.get('voice_id') if not part_voice_id: part_voice_id = self.voice_id text = str(part.get('text', '')) if not text: continue debug_info.append(f"Using voice ID: {part_voice_id}") # Determine previous and next text for context is_first = i == 0 is_last = i == len(script_parts) - 1 previous_text = None if is_first else " ".join(all_texts[:i]) next_text = None if is_last else " ".join(all_texts[i + 1:]) try: logging.info(f"Processing part {i+1}/{len(script_parts)}") logging.info(f"Text length: {len(text)} chars") logging.debug(f"Context - Previous text: {'Yes' if previous_text else 'No'}, Next text: {'Yes' if next_text else 'No'}") # Generate audio with context conditioning audio_content, request_id = self.generate_audio_segment( text=text, voice_id=part_voice_id, previous_text=previous_text, next_text=next_text, previous_request_ids=previous_request_ids, debug_info=debug_info ) debug_info.append(f"Successfully generated audio for part {i}") completed_parts += 1 # Add request ID to history previous_request_ids.append(request_id) # Convert audio content to AudioSegment and add to segments audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_content)) segments.append(audio_segment) # Wait for the specified wait_time time.sleep(self.MODELS[self.model_id]["wait_time"]) except Exception as e: debug_info.append(f"Error generating audio: {e}") failed_parts.append(part) continue # Combine all segments if segments: final_audio = segments[0] for segment in segments[1:]: final_audio = final_audio + segment # Export combined audio final_audio.export(output_file, format="mp3") if failed_parts: debug_info.append(f"Failed parts: {failed_parts}") else: logging.debug("All parts generated successfully") debug_info.append("All parts generated successfully") debug_info.append(f"Model: {self.model_id}") logging.debug(f"Model: {self.model_id}") return str(output_file), debug_info, completed_parts else: error_msg = "\n".join([ "No audio segments were generated. Debug info:", *debug_info ]) logging.error("No audio segments were generated. Debug info: %s", debug_info) raise Exception(error_msg)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mamertofabian/elevenlabs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server