Audio Transcriber MCP Server
by Ichigo3766
Verified
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
CallToolRequestSchema,
ErrorCode,
ListToolsRequestSchema,
McpError,
} from '@modelcontextprotocol/sdk/types.js';
import OpenAI from 'openai';
import fs from 'fs';
import path from 'path';
import { promisify } from 'util';
// Initialize OpenAI client with configuration
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const OPENAI_BASE_URL = process.env.OPENAI_BASE_URL;
const OPENAI_MODEL = process.env.OPENAI_MODEL || "whisper-1";
if (!OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY environment variable is required');
}
const config: { apiKey: string; baseURL?: string } = {
apiKey: OPENAI_API_KEY
};
if (OPENAI_BASE_URL) {
config.baseURL = OPENAI_BASE_URL;
}
const openai = new OpenAI(config);
interface TranscribeArgs {
filepath: string;
save_to_file?: boolean | string;
language?: string;
}
const isValidTranscribeArgs = (args: any): args is TranscribeArgs =>
typeof args === 'object' &&
args !== null &&
typeof args.filepath === 'string' &&
(args.save_to_file === undefined ||
typeof args.save_to_file === 'boolean' ||
typeof args.save_to_file === 'string') &&
(args.language === undefined || typeof args.language === 'string');
class AudioTranscriberServer {
private server: Server;
constructor() {
this.server = new Server(
{
name: 'audio-transcriber',
version: '0.1.0',
},
{
capabilities: {
tools: {},
},
}
);
this.setupToolHandlers();
this.server.onerror = (error) => console.error('[MCP Error]', error);
process.on('SIGINT', async () => {
await this.server.close();
process.exit(0);
});
}
private setupToolHandlers() {
this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [
{
name: 'transcribe_audio',
description: 'Transcribe an audio file using OpenAI Whisper API',
inputSchema: {
type: 'object',
properties: {
filepath: {
type: 'string',
description: 'Absolute path to the audio file',
},
save_to_file: {
type: 'boolean',
description: 'Whether to save the transcription to a file next to the audio file',
},
language: {
type: 'string',
description: 'Language of the audio in ISO-639-1 format (e.g. "en", "es"). Default is "en".',
},
},
required: ['filepath'],
},
},
],
}));
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
if (request.params.name !== 'transcribe_audio') {
throw new McpError(
ErrorCode.MethodNotFound,
`Unknown tool: ${request.params.name}`
);
}
if (!isValidTranscribeArgs(request.params.arguments)) {
throw new McpError(
ErrorCode.InvalidParams,
'Invalid transcribe arguments'
);
}
let fileStream = null;
try {
const { filepath, save_to_file, language = "en" } = request.params.arguments;
// Normalize and decode path properly
const decodedPath = decodeURIComponent(filepath.replace(/\\/g, '').trim());
console.error(`[DEBUG] Requested file path: ${decodedPath}`);
// Verify file exists
if (!fs.existsSync(decodedPath)) {
throw new Error(`Audio file not found: ${decodedPath}`);
}
// Check if file is readable
try {
await promisify(fs.access)(decodedPath, fs.constants.R_OK);
} catch (err) {
throw new Error(`Audio file not readable: ${decodedPath}`);
}
console.error(`[DEBUG] File exists and is readable: ${decodedPath}`);
// Create transcription
console.error(`[DEBUG] Sending transcription request to OpenAI API`);
fileStream = fs.createReadStream(decodedPath);
const response = await openai.audio.transcriptions.create({
file: fileStream,
model: OPENAI_MODEL,
language: language
});
// Close the file stream immediately after use
fileStream.destroy();
fileStream = null;
const transcription = response.text;
console.error(`[DEBUG] Transcription completed successfully`);
// Handle save_to_file parameter
const shouldSaveToFile = typeof save_to_file === 'string'
? save_to_file.toLowerCase() === 'true'
: Boolean(save_to_file);
if (shouldSaveToFile) {
const audioDir = path.dirname(decodedPath);
const audioName = path.basename(decodedPath, path.extname(decodedPath));
const transcriptionPath = path.join(audioDir, `${audioName}.txt`);
console.error(`[DEBUG] Saving transcription to: ${transcriptionPath}`);
await promisify(fs.writeFile)(transcriptionPath, transcription);
console.error(`[DEBUG] File saved successfully`);
}
return {
content: [
{
type: 'text',
text: transcription,
},
],
};
} catch (error: any) {
console.error('[ERROR] Transcription failed:', error);
return {
content: [
{
type: 'text',
text: `Error transcribing audio: ${error?.message || String(error)}`,
},
],
isError: true,
};
} finally {
// Ensure file stream is closed even if there's an error
if (fileStream) {
try {
fileStream.destroy();
console.error("[DEBUG] File stream closed");
} catch (err) {
console.error("[ERROR] Failed to close file stream:", err);
}
}
}
});
}
async run() {
try {
const transport = new StdioServerTransport();
await this.server.connect(transport);
console.error('[INFO] Audio Transcriber MCP server running on stdio');
} catch (err) {
console.error('[FATAL] Failed to start server:', err);
process.exit(1);
}
}
}
// Handle global unhandled promise rejections
process.on('unhandledRejection', (reason, promise) => {
console.error('[ERROR] Unhandled Rejection at:', promise, 'reason:', reason);
});
// Handle global uncaught exceptions
process.on('uncaughtException', (err) => {
console.error('[FATAL] Uncaught Exception:', err);
// Give the error logs time to flush before exiting
setTimeout(() => process.exit(1), 500);
});
const server = new AudioTranscriberServer();
server.run().catch(err => {
console.error('[FATAL] Server initialization failed:', err);
process.exit(1);
});