MCP Server Whisper

tts_service.py•3.46 KiB

"""Text-to-speech service - orchestrates TTS operations.""" import time from ..constants import TTSVoice from ..domain import AudioProcessor from ..infrastructure import FileSystemRepository, OpenAIClientWrapper, SecurePathResolver from ..models import TTSResult from ..utils import split_text_for_tts class TTSService: """Service for text-to-speech operations.""" def __init__( self, file_repo: FileSystemRepository, openai_client: OpenAIClientWrapper, path_resolver: SecurePathResolver ): """Initialize the TTS service. Args: ---- file_repo: File system repository for I/O operations. openai_client: OpenAI API client wrapper. path_resolver: Secure path resolver for filename to path conversion. """ self.file_repo = file_repo self.openai_client = openai_client self.audio_processor = AudioProcessor() self.path_resolver = path_resolver async def create_speech( self, text_prompt: str, output_filename: str | None = None, model: str = "gpt-4o-mini-tts", voice: TTSVoice = "alloy", instructions: str | None = None, speed: float = 1.0, ) -> TTSResult: """Generate text-to-speech audio from text. Args: ---- text_prompt: Text to convert to speech. output_filename: Optional name for output file. model: TTS model to use. voice: Voice to use for TTS. instructions: Optional instructions for speech generation. speed: Speech speed (0.25 to 4.0). Returns: ------- TTSResult: Result with name of the generated audio file. """ # Determine output filename if output_filename is None: default_name = f"speech_{time.time_ns()}.mp3" else: default_name = output_filename # Resolve to full path output_file_path = self.path_resolver.resolve_output(output_filename, default_name) # Split text if it exceeds the API limit text_chunks = split_text_for_tts(text_prompt) if len(text_chunks) == 1: # Single chunk - process directly audio_bytes = await self.openai_client.text_to_speech( text=text_chunks[0], model=model, # type: ignore voice=voice, instructions=instructions, speed=speed, ) # Write audio file await self.file_repo.write_audio_file(output_file_path, audio_bytes) else: # Multiple chunks - process in parallel and concatenate print(f"Text exceeds TTS API limit, splitting into {len(text_chunks)} chunks") # Generate TTS for all chunks in parallel audio_chunks = await self.openai_client.generate_tts_chunks( text_chunks=text_chunks, model=model, # type: ignore voice=voice, instructions=instructions, speed=speed, ) # Concatenate audio chunks using domain logic combined_audio = await self.audio_processor.concatenate_audio_segments( audio_chunks=audio_chunks, format="mp3", ) # Write combined audio file await self.file_repo.write_audio_file(output_file_path, combined_audio) return TTSResult(output_file=output_file_path.name)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arcaputo3/mcp-server-whisper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

tts_service.py•3.46 KiB