Skip to main content
Glama
tts.py1.94 kB
"""Text-to-speech using Supertonic.""" import sys import numpy as np import sounddevice as sd from supertonic import TTS # Global TTS instance cache _tts: TTS | None = None _voice_style = None # Audio playback settings PLAYBACK_SAMPLE_RATE = 44100 # Supertonic outputs 44.1kHz audio def load_tts() -> TTS: """Load TTS model (cached after first load).""" global _tts, _voice_style if _tts is not None: return _tts print("Loading Supertonic TTS model...", file=sys.stderr) _tts = TTS(auto_download=True) _voice_style = _tts.get_voice_style(voice_name="M1") print("TTS model loaded.", file=sys.stderr) return _tts def speak(text: str, voice: str = "M1") -> dict: """ Synthesize and play speech from text. Args: text: The text to speak voice: Voice name (default: M1) Returns: dict with 'success' and 'duration' keys """ if not text.strip(): return {"success": False, "error": "No text provided", "duration": 0} try: global _voice_style tts = load_tts() # Get voice style if different from cached if voice != "M1" or _voice_style is None: _voice_style = tts.get_voice_style(voice_name=voice) # Synthesize speech wav, duration = tts.synthesize(text, voice_style=_voice_style) # Handle output format: wav is (1, samples), duration is array wav = np.asarray(wav, dtype=np.float32).flatten() duration_secs = float(np.asarray(duration).flatten()[0]) # Play audio (stereo for compatibility with DACs) stereo = np.column_stack([wav, wav]) sd.play(stereo, PLAYBACK_SAMPLE_RATE, blocking=True) return { "success": True, "duration": duration_secs, } except Exception as e: return { "success": False, "error": str(e), "duration": 0, }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jochiang/voice-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server