Skip to main content
Glama

Voice Generation MCP Server

by mylxsw
voice_generator.py4.31 kB
#!/usr/bin/env python3 """ Voice generation module that handles communication with the Minimax AI API. """ import binascii import json import requests from typing import Dict, Any, Optional class VoiceGeneratorError(Exception): """Custom exception for voice generation errors.""" pass class VoiceGenerator: """Handles voice generation using the Minimax AI API.""" def __init__(self, config: Dict[str, Any]): """ Initialize the VoiceGenerator with configuration. Args: config: Configuration dictionary containing API settings """ self.config = config self.base_url = config['api']['base_url'] self.group_id = config['api']['group_id'] self.api_key = config['api']['api_key'] self.audio_settings = config['audio_settings'] self.defaults = config['defaults'] def generate_voice(self, text: str, model: str = None, voice_id: str = None, speed: float = None) -> bytes: """ Generate voice audio from text. Args: text: The text to convert to speech model: The model to use (defaults to config value) voice_id: The voice ID to use (defaults to config value) speed: The speech speed (defaults to config value, typically 0.5-2.0) Returns: bytes: The audio data in binary format Raises: VoiceGeneratorError: If there's an error during generation """ if not text: raise VoiceGeneratorError("Text cannot be empty") # Use defaults if not provided model = model or self.defaults['model'] voice_id = voice_id or self.defaults['voice_id'] speed = speed if speed is not None else self.defaults['speed'] # Prepare API request url = f"{self.base_url}?GroupId={self.group_id}" headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } payload = self._create_payload(text, model, voice_id, speed) try: # Make the API request response = requests.post(url, headers=headers, json=payload) response.raise_for_status() # Parse the JSON response response_data = response.json() # Extract the hex-encoded audio data if "data" in response_data and "audio" in response_data["data"]: hex_audio = response_data["data"]["audio"] # Convert hex to binary return binascii.unhexlify(hex_audio) else: raise VoiceGeneratorError(f"Unexpected API response format: {response_data}") except requests.exceptions.RequestException as e: raise VoiceGeneratorError(f"Error making API request: {e}") except json.JSONDecodeError as e: raise VoiceGeneratorError(f"Error parsing API response: {e}") except binascii.Error as e: raise VoiceGeneratorError(f"Error decoding audio data: {e}") def _create_payload(self, text: str, model: str, voice_id: str, speed: float) -> Dict[str, Any]: """ Create the API request payload. Args: text: The text to convert to speech model: The model to use voice_id: The voice ID to use speed: The speech speed Returns: Dict[str, Any]: The request payload """ return { "model": model, "text": text, "timber_weights": [ { "voice_id": voice_id, "weight": 1 } ], "voice_setting": { "voice_id": "", "speed": speed, "pitch": 0, "vol": 1, "latex_read": False }, "audio_setting": { "sample_rate": self.audio_settings['sample_rate'], "bitrate": self.audio_settings['bitrate'], "format": self.audio_settings['format'] }, "language_boost": "auto" }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mylxsw/voice-gen-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server