Skip to main content
Glama

MCP FishAudio Server

by da-okazaki
tts.ts10.5 kB
import { FishAudioSDKService } from '../services/fishAudioSDK.js'; import { TTSToolParams, TTSToolResponse, TTSParams, AudioFormat, Mp3Bitrate, LatencyMode, FishAudioError } from '../types/index.js'; import { loadConfig, getOutputPath } from '../utils/config.js'; import { writeFileSync, createWriteStream } from 'fs'; import { playAudio, isAudioPlaybackSupported } from '../utils/audioPlayer.js'; import { RealTimeAudioPlayer } from '../utils/realTimePlayer.js'; import { ReferenceSelector } from '../utils/referenceSelector.js'; import { logger } from '../utils/logger.js'; export class TTSTool { name = 'fish_audio_tts'; description = 'Generate speech from text using Fish Audio TTS API'; inputSchema = { type: 'object' as const, properties: { text: { type: 'string', description: 'Text to convert to speech', maxLength: 10000 }, reference_id: { type: 'string', description: 'Voice model reference ID (optional)' }, reference_name: { type: 'string', description: 'Voice model name to search for (optional)' }, reference_tag: { type: 'string', description: 'Voice model tag to search for (optional)' }, streaming: { type: 'boolean', description: 'Enable HTTP streaming mode (optional)', default: false }, websocket_streaming: { type: 'boolean', description: 'Enable WebSocket streaming mode (optional)', default: false }, realtime_play: { type: 'boolean', description: 'Enable real-time audio playback during streaming (optional)', default: false }, format: { type: 'string', enum: ['mp3', 'wav', 'pcm', 'opus'], description: 'Output audio format (optional)', default: 'mp3' }, mp3_bitrate: { type: 'number', enum: [64, 128, 192], description: 'MP3 bitrate in kbps (optional)', default: 128 }, normalize: { type: 'boolean', description: 'Enable text normalization (optional)', default: true }, latency: { type: 'string', enum: ['normal', 'balanced'], description: 'Latency mode (optional)', default: 'balanced' }, output_path: { type: 'string', description: 'Custom output file path (optional)' }, auto_play: { type: 'boolean', description: 'Automatically play the generated audio (optional)', default: false } }, required: ['text'] }; private service: FishAudioSDKService; constructor() { this.service = new FishAudioSDKService(); } async run(input: TTSToolParams): Promise<TTSToolResponse> { try { // Validate input if (!input.text || input.text.trim().length === 0) { return { success: false, error: 'Text input is required' }; } if (input.text.length > 10000) { return { success: false, error: 'Text length exceeds maximum limit of 10,000 characters' }; } const config = loadConfig(); // Select reference ID based on input parameters let selectedReferenceId: string | undefined; if (input.reference_id || input.reference_name || input.reference_tag) { // Use reference selector if references are configured if (config.references && config.references.length > 0) { const selector = new ReferenceSelector(config.references, config.defaultReference); selectedReferenceId = selector.selectReference({ id: input.reference_id, name: input.reference_name, tag: input.reference_tag }); if (!selectedReferenceId && (input.reference_name || input.reference_tag)) { return { success: false, error: `No reference found matching: ${input.reference_name || input.reference_tag}` }; } } else { // Fallback to direct ID if no references configured selectedReferenceId = input.reference_id; } } else { // Use default reference or backward compatible referenceId selectedReferenceId = config.defaultReference || config.referenceId; } // Prepare parameters const ttsParams = { text: input.text, referenceId: selectedReferenceId, format: (input.format || config.outputFormat) as AudioFormat, mp3Bitrate: (input.mp3_bitrate || config.mp3Bitrate) as Mp3Bitrate, normalize: input.normalize !== false, latency: (input.latency || 'balanced') as LatencyMode, streaming: input.streaming ?? config.streaming }; // Determine output path const outputPath = input.output_path || getOutputPath(ttsParams.format || 'mp3'); // Determine if auto-play is enabled const shouldAutoPlay = input.auto_play ?? config.autoPlay; // Determine if WebSocket streaming is enabled const useWebSocketStreaming = input.websocket_streaming ?? config.websocketStreaming; // WebSocket streaming mode with real-time playback if (useWebSocketStreaming) { return await this.handleWebSocketStreaming(input, ttsParams, outputPath, shouldAutoPlay || false); } if (ttsParams.streaming) { // HTTP Streaming mode const totalBytes = await this.service.generateSpeechStream(ttsParams, outputPath); // Auto-play if requested if (shouldAutoPlay && isAudioPlaybackSupported()) { try { await playAudio(outputPath); } catch (playError) { logger.error('Audio playback failed:', playError); } } return { success: true, file_path: outputPath, format: ttsParams.format, played: shouldAutoPlay || false }; } else { // Non-streaming mode const response = await this.service.generateSpeech(ttsParams); // Save to file if output path is specified if (outputPath) { writeFileSync(outputPath, response.audio); // Auto-play if requested if (shouldAutoPlay && isAudioPlaybackSupported()) { try { await playAudio(outputPath); } catch (playError) { logger.error('Audio playback failed:', playError); } } return { success: true, file_path: outputPath, format: response.format, played: shouldAutoPlay || false }; } else { // Return base64 encoded audio return { success: true, audio_data: response.audio.toString('base64'), format: response.format }; } } } catch (error) { if (error instanceof FishAudioError) { return { success: false, error: `${error.message} (${error.code})` }; } return { success: false, error: error instanceof Error ? error.message : 'Unknown error occurred' }; } } private async handleWebSocketStreaming( input: TTSToolParams, ttsParams: TTSParams, outputPath: string, shouldAutoPlay: boolean ): Promise<TTSToolResponse> { const writeStream = outputPath ? createWriteStream(outputPath) : null; let realTimePlayer: RealTimeAudioPlayer | null = null; let totalBytes = 0; const audioChunks: Buffer[] = []; try { // Set up real-time player if requested const config = loadConfig(); const shouldRealtimePlay = input.realtime_play ?? config.realtimePlay; if (shouldRealtimePlay && isAudioPlaybackSupported()) { realTimePlayer = new RealTimeAudioPlayer(); realTimePlayer.start(ttsParams.format || 'opus'); } // Split text into chunks for streaming const textChunks = this.splitTextIntoChunks(input.text); // Stream via WebSocket const audioStream = this.service.generateSpeechWebSocket(ttsParams, textChunks); for await (const audioChunk of audioStream) { totalBytes += audioChunk.length; // Write to file if output path specified if (writeStream) { writeStream.write(audioChunk); } // Play in real-time if requested if (realTimePlayer) { realTimePlayer.write(audioChunk); } // Collect chunks for post-playback if auto-play is enabled if (shouldAutoPlay && !shouldRealtimePlay) { audioChunks.push(audioChunk); } } // Close write stream if (writeStream) { writeStream.end(); } // Stop real-time player if (realTimePlayer) { realTimePlayer.stop(); } // Auto-play collected audio if requested (and not already played in real-time) let played = false; if (shouldAutoPlay && !shouldRealtimePlay && outputPath && isAudioPlaybackSupported()) { try { await playAudio(outputPath); played = true; } catch (playError) { logger.error('Audio playback failed:', playError); } } else if (shouldRealtimePlay) { played = true; } return { success: true, file_path: outputPath || undefined, format: ttsParams.format, played, streaming_mode: 'websocket', total_bytes: totalBytes }; } catch (error) { // Clean up on error if (writeStream) { writeStream.end(); } if (realTimePlayer) { realTimePlayer.stop(); } throw error; } } private splitTextIntoChunks(text: string, chunkSize: number = 100): string[] { const sentences = text.match(/[^.!?]+[.!?]+/g) || [text]; const chunks: string[] = []; let currentChunk = ''; for (const sentence of sentences) { if (currentChunk.length + sentence.length > chunkSize && currentChunk.length > 0) { chunks.push(currentChunk.trim()); currentChunk = sentence; } else { currentChunk += sentence; } } if (currentChunk.trim()) { chunks.push(currentChunk.trim()); } return chunks; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/da-okazaki/mcp-fish-audio-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server