Skip to main content
Glama
mamertofabian

ElevenLabs MCP Server

generate_audio_simple

Convert plain text into audio using ElevenLabs text-to-speech with default voice settings. Specify text and optionally choose a voice ID.

Instructions

Generate audio from plain text using default voice settings

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
textYesPlain text to convert to audio
voice_idNoOptional voice ID to use for generation

Implementation Reference

  • Registration of the generate_audio_simple tool with its input schema and description in the list_tools handler.
    types.Tool( name="generate_audio_simple", description="Generate audio from plain text using default voice settings", inputSchema={ "type": "object", "properties": { "text": { "type": "string", "description": "Plain text to convert to audio" }, "voice_id": { "type": "string", "description": "Optional voice ID to use for generation" } }, "required": ["text"] } ),
  • Main handler logic for generate_audio_simple tool: processes input, manages database job, calls ElevenLabsAPI to generate audio, returns text status and embedded base64 audio resource.
    if name == "generate_audio_simple": debug_info.append(f"Processing simple audio request") debug_info.append(f"Arguments: {arguments}") text = arguments.get("text", "").strip() voice_id = arguments.get("voice_id") if not text: raise ValueError("Text cannot be empty") script_parts = [{ "text": text, "voice_id": voice_id }] debug_info.append(f"Created script parts: {script_parts}") # Create job record job_id = str(uuid.uuid4()) job = AudioJob( id=job_id, status="pending", script_parts=script_parts, total_parts=1 ) await self.db.insert_job(job) debug_info.append(f"Created job record: {job_id}") try: job.status = "processing" await self.db.update_job(job) # # Send progress notification # if hasattr(self.server, 'session'): # await self.server.session.send_notification({ # "method": "notifications/progress", # "params": { # "progressToken": str(job.id), # "progress": { # "kind": "begin", # "message": "Starting audio generation" # } # } # }) output_file, api_debug_info, completed_parts = self.api.generate_full_audio( script_parts, self.output_dir ) debug_info.extend(api_debug_info) job.status = "completed" job.output_file = str(output_file) job.completed_parts = completed_parts await self.db.update_job(job) # # Send completion notification # if hasattr(self.server, 'session'): # await self.server.session.send_notification({ # "method": "notifications/progress", # "params": { # "progressToken": str(job.id), # "progress": { # "kind": "end", # "message": "Audio generation completed" # } # } # }) except Exception as e: job.status = "failed" job.error = str(e) await self.db.update_job(job) raise # Read the generated audio file and encode it as base64 with open(output_file, 'rb') as f: audio_bytes = f.read() audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') # Generate unique URI for the resource filename = Path(output_file).name resource_uri = f"audio://{filename}" # Return both a status message and the audio file content return [ types.TextContent( type="text", text="\n".join([ "Audio generation successful. Debug info:", *debug_info ]) ), types.EmbeddedResource( type="resource", resource=types.BlobResourceContents( uri=resource_uri, name=filename, blob=audio_base64, mimeType="audio/mpeg" ) ) ]
  • Key helper function called by the tool handler: generates audio segments for each script part using ElevenLabs TTS API with context stitching, combines them using pydub, and saves the full MP3 file.
    def generate_full_audio(self, script_parts: List[Dict], output_dir: Path) -> tuple[str, List[str], int]: """Generate audio for multiple parts using request stitching. Returns tuple of (output_file_path, debug_info, completed_parts)""" # Create output directory if it doesn't exist output_dir.mkdir(exist_ok=True) # Final output file path with unique file name timestamp = datetime.now().strftime("%Y%m%d%H%M%S") output_file = output_dir / f"full_audio_{timestamp}.mp3" debug_info = [] debug_info.append("ElevenLabsAPI - Starting generate_full_audio") debug_info.append(f"Input script_parts: {script_parts}") # Initialize segments list and request IDs tracking segments = [] previous_request_ids = [] failed_parts = [] completed_parts = 0 debug_info.append("Processing all_texts") all_texts = [] for part in script_parts: debug_info.append(f"Processing text from part: {part}") text = str(part.get('text', '')) debug_info.append(f"Extracted text: {text}") all_texts.append(text) debug_info.append(f"Final all_texts: {all_texts}") for i, part in enumerate(script_parts): debug_info.append(f"Processing part {i}: {part}") part_voice_id = part.get('voice_id') if not part_voice_id: part_voice_id = self.voice_id text = str(part.get('text', '')) if not text: continue debug_info.append(f"Using voice ID: {part_voice_id}") # Determine previous and next text for context is_first = i == 0 is_last = i == len(script_parts) - 1 previous_text = None if is_first else " ".join(all_texts[:i]) next_text = None if is_last else " ".join(all_texts[i + 1:]) try: logging.info(f"Processing part {i+1}/{len(script_parts)}") logging.info(f"Text length: {len(text)} chars") logging.debug(f"Context - Previous text: {'Yes' if previous_text else 'No'}, Next text: {'Yes' if next_text else 'No'}") # Generate audio with context conditioning audio_content, request_id = self.generate_audio_segment( text=text, voice_id=part_voice_id, previous_text=previous_text, next_text=next_text, previous_request_ids=previous_request_ids, debug_info=debug_info ) debug_info.append(f"Successfully generated audio for part {i}") completed_parts += 1 # Add request ID to history previous_request_ids.append(request_id) # Convert audio content to AudioSegment and add to segments audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_content)) segments.append(audio_segment) # Wait for the specified wait_time time.sleep(self.MODELS[self.model_id]["wait_time"]) except Exception as e: debug_info.append(f"Error generating audio: {e}") failed_parts.append(part) continue # Combine all segments if segments: final_audio = segments[0] for segment in segments[1:]: final_audio = final_audio + segment # Export combined audio final_audio.export(output_file, format="mp3") if failed_parts: debug_info.append(f"Failed parts: {failed_parts}") else: logging.debug("All parts generated successfully") debug_info.append("All parts generated successfully") debug_info.append(f"Model: {self.model_id}") logging.debug(f"Model: {self.model_id}") return str(output_file), debug_info, completed_parts else: error_msg = "\n".join([ "No audio segments were generated. Debug info:", *debug_info ]) logging.error("No audio segments were generated. Debug info: %s", debug_info) raise Exception(error_msg)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mamertofabian/elevenlabs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server