Skip to main content
Glama
index.ts11 kB
#!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequest, CallToolRequestSchema, ListToolsRequestSchema, Tool, } from "@modelcontextprotocol/sdk/types.js"; import { rm } from 'fs/promises'; import { KokoroTTS, KokoroVoice } from "kokoro-js"; import player from 'node-wav-player'; import { homedir, tmpdir } from 'os'; import { join } from 'path'; // Configuration from environment variables const DEFAULT_SPEECH_SPEED = parseFloat(process.env.MCP_DEFAULT_SPEECH_SPEED || "1.1"); if (isNaN(DEFAULT_SPEECH_SPEED) || DEFAULT_SPEECH_SPEED < 0.5 || DEFAULT_SPEECH_SPEED > 2.0) { throw new Error("MCP_DEFAULT_SPEECH_SPEED must be a number between 0.5 and 2.0"); } const DEFAULT_VOICE = (process.env.MCP_DEFAULT_VOICE || "af_bella") as KokoroVoice; const MAX_RETRIES = 3; const RETRY_DELAY_MS = 1000; // Type definitions for tool arguments interface TextToSpeechArgs { text: string; voice?: KokoroVoice; } interface TextToSpeechWithOptionsArgs extends TextToSpeechArgs { speed?: number; voice?: KokoroVoice; } // Tool definitions const textToSpeechTool: Tool = { name: "text_to_speech", description: "Convert text to speech and play it through system audio", inputSchema: { type: "object", properties: { text: { type: "string", description: "The text to convert to speech", minLength: 1, maxLength: 1000, }, voice: { type: "string", description: "The voice to use for speech synthesis (e.g. 'af_bella'). Use list_voices to see available options.", }, }, required: ["text"], }, }; const textToSpeechWithOptionsTool: Tool = { name: "text_to_speech_with_options", description: "Convert text to speech with customizable speed", inputSchema: { type: "object", properties: { text: { type: "string", description: "The text to convert to speech", minLength: 1, maxLength: 1000, }, voice: { type: "string", description: "The voice to use for speech synthesis (e.g. 'af_bella'). Use list_voices to see available options.", }, speed: { type: "number", description: "Speech rate multiplier (0.5 to 2.0)", minimum: 0.5, maximum: 2.0, }, }, required: ["text"], }, }; const listVoicesTool: Tool = { name: "list_voices", description: "List all available voices for text-to-speech", inputSchema: { type: "object", properties: {}, required: [], }, }; const getModelStatusTool: Tool = { name: "get_model_status", description: "Get the current status of the TTS model initialization", inputSchema: { type: "object", properties: {}, required: [], }, }; class TTSClient { private ttsInstance: KokoroTTS | null = null; private readonly modelId = "onnx-community/Kokoro-82M-v1.0-ONNX"; private initializationPromise: Promise<void> | null = null; private initializationError: Error | null = null; private initializationStartTime: number | null = null; private retryCount: number = 0; constructor() { // Start initialization immediately but don't block this.startInitialization(); } private async cleanupModelFiles(): Promise<void> { const modelPaths = [ join(homedir(), '.npm', '_npx', '**', 'node_modules', '@huggingface', 'transformers', '.cache', 'onnx-community', 'Kokoro-82M-v1.0-ONNX', 'onnx', 'model_quantized.onnx'), join(homedir(), '.cache', 'huggingface', 'transformers', 'onnx-community', 'Kokoro-82M-v1.0-ONNX', 'onnx', 'model_quantized.onnx') ]; for (const path of modelPaths) { try { await rm(path, { force: true }); console.error(`Cleaned up model file at ${path}`); } catch (error) { // Ignore errors during cleanup } } } private async delay(ms: number): Promise<void> { return new Promise(resolve => setTimeout(resolve, ms)); } private async startInitialization(): Promise<void> { if (this.initializationPromise) return; this.initializationStartTime = Date.now(); this.initializationPromise = this.initializeWithRetry(); } private async initializeWithRetry(): Promise<void> { while (this.retryCount < MAX_RETRIES) { try { if (this.retryCount > 0) { console.error(`Retrying model initialization (attempt ${this.retryCount + 1}/${MAX_RETRIES})...`); await this.cleanupModelFiles(); await this.delay(RETRY_DELAY_MS); } this.ttsInstance = await KokoroTTS.from_pretrained(this.modelId, { dtype: "q8", }); return; } catch (error) { this.retryCount++; const isLastAttempt = this.retryCount >= MAX_RETRIES; if (error instanceof Error) { const errorMessage = `Model initialization failed${isLastAttempt ? ' (final attempt)' : ''}: ${error.message}`; console.error(errorMessage); if (isLastAttempt) { this.initializationError = new Error( `Failed to initialize model after ${MAX_RETRIES} attempts. ` + `Last error: ${error.message}\n` + `Try manually removing the model file and running again.` ); throw this.initializationError; } } } } } async getStatus(): Promise<{ status: 'uninitialized' | 'initializing' | 'ready' | 'error'; elapsedMs?: number; error?: string; retryCount?: number; }> { if (!this.initializationPromise) { return { status: 'uninitialized' }; } if (this.initializationError) { return { status: 'error', error: this.initializationError.message, retryCount: this.retryCount }; } if (!this.ttsInstance) { return { status: 'initializing', elapsedMs: Date.now() - (this.initializationStartTime || 0), retryCount: this.retryCount }; } return { status: 'ready', retryCount: this.retryCount }; } async waitForInit(): Promise<void> { if (!this.initializationPromise) { this.startInitialization(); } await this.initializationPromise; } async listVoices(): Promise<KokoroVoice[]> { await this.waitForInit(); if (!this.ttsInstance) { throw new Error("TTS model not initialized"); } // @ts-ignore-line const allVoices = this.ttsInstance.voices as unknown as {[voice: string]: {overallGrade: string; gender: string}}; const goodVoices = Object.keys(allVoices) .filter((voiceName) => ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+'].includes(allVoices[voiceName].overallGrade)) return goodVoices as unknown as KokoroVoice[]; } async generateAndPlayAudio(text: string, voice?: KokoroVoice, speed?: number): Promise<void> { await this.waitForInit(); if (!this.ttsInstance) { throw new Error("TTS model not initialized"); } const audio = await this.ttsInstance.generate(text, { voice: voice || DEFAULT_VOICE, // @ts-ignore-line speed: speed || DEFAULT_SPEECH_SPEED, }); const tempFile = join(tmpdir(), `${Date.now()}.wav`); await audio.save(tempFile); await player.play({ path: tempFile, sync: true }); } } async function main() { console.error("Starting Speech MCP Server..."); const server = new Server( { name: "Speech MCP Server", version: "1.0.0", }, { capabilities: { tools: {}, }, }, ); const ttsClient = new TTSClient(); server.setRequestHandler( CallToolRequestSchema, async (request: CallToolRequest) => { console.error("Received CallToolRequest:", request); try { if (!request.params.arguments) { throw new Error("No arguments provided"); } switch (request.params.name) { case "text_to_speech": { const args = request.params.arguments as unknown as TextToSpeechArgs; if (!args.text) { throw new Error("Missing required argument: text"); } await ttsClient.generateAndPlayAudio(args.text, args.voice); return { content: [{ type: "text", text: `Successfully generated and played audio${args.voice ? ` using voice: ${args.voice}` : ''}` }], }; } case "text_to_speech_with_options": { const args = request.params.arguments as unknown as TextToSpeechWithOptionsArgs; if (!args.text) { throw new Error("Missing required argument: text"); } await ttsClient.generateAndPlayAudio(args.text, args.voice, args.speed); return { content: [{ type: "text", text: `Successfully generated and played audio${args.voice ? ` using voice: ${args.voice}` : ''} (speed: ${args.speed || 1.0})` }], }; } case "list_voices": { const voices = await ttsClient.listVoices(); return { content: [{ type: "text", text: `Available voices:\n${voices.join('\n')}` }], }; } case "get_model_status": { const status = await ttsClient.getStatus(); let message = `Model status: ${status.status}`; if (status.elapsedMs) { message += ` (${Math.round(status.elapsedMs / 1000)}s elapsed)`; } if (status.error) { message += `\nError: ${status.error}`; } return { content: [{ type: "text", text: message }], }; } default: throw new Error(`Unknown tool: ${request.params.name}`); } } catch (error) { console.error("Error executing tool:", error); return { content: [ { type: "text", text: JSON.stringify({ error: error instanceof Error ? error.message : String(error), }), }, ], }; } }, ); server.setRequestHandler(ListToolsRequestSchema, async () => { console.error("Received ListToolsRequest"); return { tools: [ textToSpeechTool, textToSpeechWithOptionsTool, listVoicesTool, getModelStatusTool, ], }; }); const transport = new StdioServerTransport(); console.error("Connecting server to transport..."); await server.connect(transport); console.error("Speech MCP Server running on stdio"); } main().catch((error) => { console.error("Fatal error in main():", error); process.exit(1); });

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hammeiam/koroko-speech-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server