/**
* VoiceBox MCP Server
*
* Model Context Protocol server for VOICEVOX text-to-speech
* Integrates with voicebox-tts API (Celery + Redis queue system)
*
* Optimized for performance with fetch API and minimal dependencies
*/
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
CallToolRequestSchema,
ListToolsRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';
// Configuration
const API_BASE = process.env.VOICEBOX_API_URL || 'http://localhost:5001';
/**
* VoiceBox TTS API Client (Optimized with fetch API)
*/
class VoiceBoxClient {
private baseUrl: string;
constructor(baseUrl: string = API_BASE) {
this.baseUrl = baseUrl;
}
/**
* Create TTS task (non-blocking)
*/
async createTTSTask(text: string, speaker: number = 1): Promise<{ task_id: string; status: string }> {
const response = await fetch(`${this.baseUrl}/tts`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text, speaker }),
});
return response.json();
}
/**
* Get task status
*/
async getTaskStatus(taskId: string): Promise<any> {
const response = await fetch(`${this.baseUrl}/tts/${taskId}`);
return response.json();
}
/**
* Get system health
*/
async getHealth(): Promise<any> {
const response = await fetch(`${this.baseUrl}/health`);
return response.json();
}
/**
* Get metrics
*/
async getMetrics(): Promise<any> {
const response = await fetch(`${this.baseUrl}/metrics`);
return response.json();
}
/**
* Get errors
*/
async getErrors(limit: number = 10): Promise<any> {
const response = await fetch(`${this.baseUrl}/errors?limit=${limit}`);
return response.json();
}
}
// Initialize client
const client = new VoiceBoxClient();
/**
* Speaker list
*/
const SPEAKERS: Record<number, string> = {
0: '四国めたん (あまあま)',
1: '四国めたん (ノーマル)',
2: '四国めたん (セクシー)',
3: 'ずんだもん (ノーマル)',
4: 'ずんだもん (あまあま)',
5: 'ずんだもん (悲嘆)',
6: '春日部つむぎ (ノーマル)',
7: '春日部つむぎ (あまあま)',
8: '春日部つむぎ (ツンデレ)',
};
/**
* Create MCP Server
*/
const server = new Server(
{
name: 'voicebox-mcp',
version: '1.0.0',
},
{
capabilities: {
tools: {},
},
}
);
/**
* List available tools
*/
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: [
{
name: 'voicebox_speak',
description: 'Convert text to speech using VOICEVOX. Non-blocking - queues task and returns immediately with task_id.',
inputSchema: {
type: 'object',
properties: {
text: {
type: 'string',
description: 'Text to synthesize',
},
speaker: {
type: 'number',
description: `Speaker ID. Available speakers:\n${Object.entries(SPEAKERS).map(([id, name]) => `${id}: ${name}`).join('\n')}`,
default: 1,
},
},
required: ['text'],
},
},
{
name: 'voicebox_status',
description: 'Check the status of a TTS task',
inputSchema: {
type: 'object',
properties: {
task_id: {
type: 'string',
description: 'Task ID to check',
},
},
required: ['task_id'],
},
},
{
name: 'voicebox_speakers',
description: 'List available VOICEVOX speakers',
inputSchema: {
type: 'object',
properties: {},
},
},
{
name: 'voicebox_health',
description: 'Check VoiceBox TTS system health',
inputSchema: {
type: 'object',
properties: {},
},
},
{
name: 'voicebox_metrics',
description: 'Get system metrics and statistics',
inputSchema: {
type: 'object',
properties: {},
},
},
],
};
});
/**
* Handle tool calls
*/
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
const params = args ?? {};
try {
switch (name) {
case 'voicebox_speak': {
const text = params.text as string;
const speaker = (params.speaker as number) ?? 1;
if (!text || text.trim().length === 0) {
throw new Error('Text is required');
}
// Create task (NON-BLOCKING - immediate return)
const task = await client.createTTSTask(text, speaker);
return {
content: [
{
type: 'text',
text: `✅ TTS task queued!\n\n` +
`Task ID: ${task.task_id}\n` +
`Status: ${task.status}\n` +
`Speaker: ${SPEAKERS[speaker] || `Speaker ${speaker}`}\n` +
`Text: ${text}\n\n` +
`📝 Check status with voicebox_status tool.`,
},
],
};
}
case 'voicebox_status': {
const taskId = params.task_id as string;
const status = await client.getTaskStatus(taskId);
let statusText = `Task: ${taskId}\nStatus: ${status.status}`;
if (status.status === 'SUCCESS' && status.result) {
statusText += `\n\nAudio: ${status.result.audio_path}\nSize: ${status.result.file_size} bytes`;
} else if (status.status === 'FAILURE') {
statusText += `\n\nError: ${status.result}`;
} else if (status.result) {
statusText += `\n\nProgress: ${status.result.status || 'Processing...'}`;
}
return {
content: [{ type: 'text', text: statusText }],
};
}
case 'voicebox_speakers': {
const speakerList = Object.entries(SPEAKERS)
.map(([id, name]) => `${id}: ${name}`)
.join('\n');
return {
content: [{
type: 'text',
text: `🎤 Available VOICEVOX Speakers:\n\n${speakerList}`,
}],
};
}
case 'voicebox_health': {
const health = await client.getHealth();
return {
content: [{
type: 'text',
text: `VoiceBox TTS Health: ${health.status}\n` +
`Service: ${health.service}`,
}],
};
}
case 'voicebox_metrics': {
const metrics = await client.getMetrics();
const stats = metrics.stats || {};
return {
content: [{
type: 'text',
text: `📊 VoiceBox Metrics:\n\n` +
`Tasks completed: ${stats.counters?.tasks_completed || 0}\n` +
`Tasks failed: ${stats.counters?.tasks_failed || 0}\n` +
`Success rate: ${((stats.success_rate || 0) * 100).toFixed(1)}%\n` +
`Active tasks: ${stats.active_tasks || 0}\n` +
`Tasks last hour: ${stats.tasks_last_hour || 0}`,
}],
};
}
default:
throw new Error(`Unknown tool: ${name}`);
}
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{ type: 'text', text: `❌ Error: ${errorMessage}` }],
isError: true,
};
}
});
/**
* Start server
*/
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
// Suppress stderr output
console.error = () => {};
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});