Skip to main content
Glama
tts_service.py3.54 kB
"""Text-to-speech service - orchestrates TTS operations.""" import time from ..constants import TTSVoice from ..domain import AudioProcessor from ..infrastructure import FileSystemRepository, OpenAIClientWrapper, SecurePathResolver from ..models import TTSResult from ..utils import split_text_for_tts class TTSService: """Service for text-to-speech operations.""" def __init__( self, file_repo: FileSystemRepository, openai_client: OpenAIClientWrapper, path_resolver: SecurePathResolver ): """Initialize the TTS service. Args: ---- file_repo: File system repository for I/O operations. openai_client: OpenAI API client wrapper. path_resolver: Secure path resolver for filename to path conversion. """ self.file_repo = file_repo self.openai_client = openai_client self.audio_processor = AudioProcessor() self.path_resolver = path_resolver async def create_speech( self, text_prompt: str, output_filename: str | None = None, model: str = "gpt-4o-mini-tts", voice: TTSVoice = "alloy", instructions: str | None = None, speed: float = 1.0, ) -> TTSResult: """Generate text-to-speech audio from text. Args: ---- text_prompt: Text to convert to speech. output_filename: Optional name for output file. model: TTS model to use. voice: Voice to use for TTS. instructions: Optional instructions for speech generation. speed: Speech speed (0.25 to 4.0). Returns: ------- TTSResult: Result with name of the generated audio file. """ # Determine output filename if output_filename is None: default_name = f"speech_{time.time_ns()}.mp3" else: default_name = output_filename # Resolve to full path output_file_path = self.path_resolver.resolve_output(output_filename, default_name) # Split text if it exceeds the API limit text_chunks = split_text_for_tts(text_prompt) if len(text_chunks) == 1: # Single chunk - process directly audio_bytes = await self.openai_client.text_to_speech( text=text_chunks[0], model=model, # type: ignore voice=voice, instructions=instructions, speed=speed, ) # Write audio file await self.file_repo.write_audio_file(output_file_path, audio_bytes) else: # Multiple chunks - process in parallel and concatenate print(f"Text exceeds TTS API limit, splitting into {len(text_chunks)} chunks") # Generate TTS for all chunks in parallel audio_chunks = await self.openai_client.generate_tts_chunks( text_chunks=text_chunks, model=model, # type: ignore voice=voice, instructions=instructions, speed=speed, ) # Concatenate audio chunks using domain logic combined_audio = await self.audio_processor.concatenate_audio_segments( audio_chunks=audio_chunks, format="mp3", ) # Write combined audio file await self.file_repo.write_audio_file(output_file_path, combined_audio) return TTSResult(output_file=output_file_path.name)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arcaputo3/mcp-server-whisper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server