voice-assistant-server.js•17.1 kB
/**
* Voice Assistant Server
* Bridges web UI, VS Code extension, and SCS-MCP tools
*/
const express = require('express');
const WebSocket = require('ws');
const cors = require('cors');
const { Client } = require('@modelcontextprotocol/sdk/client/index.js');
const { StdioClientTransport } = require('@modelcontextprotocol/sdk/client/stdio.js');
const path = require('path');
const fs = require('fs').promises;
const app = express();
app.use(cors());
app.use(express.json());
app.use(express.static('public'));
// Configuration
const PORT = process.env.PORT || 3000;
const WS_PORT = process.env.WS_PORT || 3001;
const PROJECT_ROOT = process.env.PROJECT_ROOT || process.cwd();
// MCP Clients
let scsMcpClient = null;
let elevenLabsClient = null;
// WebSocket servers
const wss = new WebSocket.Server({ port: WS_PORT });
const clients = new Map(); // Track connected clients
// Current context from VS Code
let currentEditorContext = null;
// Initialize MCP connections
async function initializeMCP() {
try {
// Initialize SCS-MCP client
console.log('Initializing SCS-MCP client...');
const scsTransport = new StdioClientTransport({
command: 'node',
args: [path.join(__dirname, '../src/index.js')],
env: { ...process.env, PROJECT_ROOT }
});
scsMcpClient = new Client({
name: 'voice-assistant-scs',
version: '1.0.0'
}, {
capabilities: {}
});
await scsMcpClient.connect(scsTransport);
console.log('✅ SCS-MCP connected');
// Initialize ElevenLabs MCP client
console.log('Initializing ElevenLabs MCP client...');
const elevenLabsTransport = new StdioClientTransport({
command: 'npx',
args: ['-y', '@modelcontextprotocol/server-elevenlabs'],
env: { ...process.env }
});
elevenLabsClient = new Client({
name: 'voice-assistant-elevenlabs',
version: '1.0.0'
}, {
capabilities: {}
});
await elevenLabsClient.connect(elevenLabsTransport);
console.log('✅ ElevenLabs MCP connected');
} catch (error) {
console.error('Failed to initialize MCP clients:', error);
// Continue without MCP for development/testing
}
}
// WebSocket connection handler
wss.on('connection', (ws, req) => {
const clientId = generateClientId();
const clientType = req.url === '/vscode' ? 'vscode' : 'web';
clients.set(clientId, {
ws,
type: clientType,
connected: new Date()
});
console.log(`Client connected: ${clientType} (${clientId})`);
// Send initial status
ws.send(JSON.stringify({
type: 'status',
data: {
connected: true,
scsMcp: scsMcpClient !== null,
elevenLabs: elevenLabsClient !== null,
clientId
}
}));
// Handle messages
ws.on('message', async (message) => {
try {
const data = JSON.parse(message.toString());
await handleClientMessage(clientId, data);
} catch (error) {
console.error('Error handling message:', error);
ws.send(JSON.stringify({
type: 'error',
error: error.message
}));
}
});
// Handle disconnect
ws.on('close', () => {
clients.delete(clientId);
console.log(`Client disconnected: ${clientType} (${clientId})`);
});
});
// Handle client messages
async function handleClientMessage(clientId, message) {
const client = clients.get(clientId);
if (!client) return;
switch (message.type) {
case 'voice':
await handleVoiceInput(clientId, message.text, message.context);
break;
case 'context':
if (client.type === 'vscode') {
currentEditorContext = message.data;
broadcastToWebClients({
type: 'context_update',
data: currentEditorContext
});
}
break;
case 'command':
await handleCommand(clientId, message.command, message.context);
break;
case 'tts':
await handleTextToSpeech(clientId, message.text, message.voice);
break;
default:
console.log('Unknown message type:', message.type);
}
}
// Handle voice input
async function handleVoiceInput(clientId, text, additionalContext) {
console.log(`Voice input from ${clientId}: "${text}"`);
// Combine contexts
const context = {
...currentEditorContext,
...additionalContext,
voiceInput: text
};
// Determine intent and route to appropriate tool
const intent = await determineIntent(text, context);
console.log(`Detected intent: ${intent.tool}`);
// Execute SCS-MCP tool
const result = await executeScsTool(intent.tool, intent.arguments);
// Generate response
const response = await generateResponse(result, intent);
// Send response to client
sendToClient(clientId, {
type: 'response',
data: {
text: response.text,
code: response.code,
action: intent.tool,
context: context
}
});
// Generate and send audio response
if (elevenLabsClient) {
const audio = await generateSpeech(response.text);
sendToClient(clientId, {
type: 'audio',
data: audio
});
}
}
// Determine intent from voice input
async function determineIntent(text, context) {
const lowerText = text.toLowerCase();
// Intent patterns
const intents = [
{
patterns: ['review', 'check', 'analyze', 'look at'],
tool: 'instant_review',
extractArgs: (text, ctx) => ({
code: ctx.selectedCode || ctx.currentFunction,
file_path: ctx.currentFile
})
},
{
patterns: ['find similar', 'similar code', 'patterns like'],
tool: 'find_similar',
extractArgs: (text, ctx) => ({
code: ctx.selectedCode || ctx.currentFunction,
limit: 5
})
},
{
patterns: ['explain', 'what does', 'how does', 'tell me about'],
tool: 'analyze_symbol',
extractArgs: (text, ctx) => ({
symbol_name: ctx.currentSymbol || extractSymbolFromText(text),
include_usages: true
})
},
{
patterns: ['test', 'generate test', 'write test'],
tool: 'test_gap_analyzer',
extractArgs: (text, ctx) => ({
code: ctx.selectedCode || ctx.currentFunction,
file_path: ctx.currentFile
})
},
{
patterns: ['search for', 'find', 'locate', 'where is'],
tool: 'search',
extractArgs: (text, ctx) => ({
query: extractSearchQuery(text),
limit: 10
})
},
{
patterns: ['technical debt', 'code smell', 'refactor'],
tool: 'debt_orchestrator',
extractArgs: (text, ctx) => ({
code: ctx.selectedCode || ctx.currentFunction,
file_path: ctx.currentFile
})
},
{
patterns: ['optimize imports', 'clean imports', 'fix imports'],
tool: 'import_optimizer',
extractArgs: (text, ctx) => ({
code: ctx.selectedCode || getFileContent(ctx.currentFile),
file_path: ctx.currentFile
})
},
{
patterns: ['model', 'which model', 'capabilities', 'can you'],
tool: 'get_current_model_status',
extractArgs: () => ({})
},
{
patterns: ['cost', 'how much', 'price', 'expensive'],
tool: 'estimate_operation_cost',
extractArgs: (text, ctx) => ({
operation: 'code_review',
input_size: ctx.fileSize || 10000,
output_size: 2000
})
}
];
// Find matching intent
for (const intent of intents) {
if (intent.patterns.some(pattern => lowerText.includes(pattern))) {
return {
tool: intent.tool,
arguments: intent.extractArgs(text, context)
};
}
}
// Default to search if no specific intent
return {
tool: 'search',
arguments: {
query: text,
limit: 10
}
};
}
// Execute SCS-MCP tool
async function executeScsTool(toolName, args) {
if (!scsMcpClient) {
return {
error: 'SCS-MCP not connected',
fallback: generateFallbackResponse(toolName, args)
};
}
try {
console.log(`Executing tool: ${toolName}`, args);
const result = await scsMcpClient.callTool(toolName, args);
return result;
} catch (error) {
console.error(`Tool execution failed: ${error.message}`);
return {
error: error.message,
fallback: generateFallbackResponse(toolName, args)
};
}
}
// Generate response from tool result
async function generateResponse(result, intent) {
if (result.error) {
return {
text: `I encountered an error: ${result.error}. ${result.fallback || ''}`,
code: null
};
}
// Parse tool result
const content = result.content?.[0]?.text || JSON.stringify(result);
// Format response based on tool type
switch (intent.tool) {
case 'instant_review':
return formatReviewResponse(content);
case 'find_similar':
return formatSimilarCodeResponse(content);
case 'analyze_symbol':
return formatSymbolAnalysisResponse(content);
case 'test_gap_analyzer':
return formatTestGapResponse(content);
case 'get_current_model_status':
return formatModelStatusResponse(content);
case 'estimate_operation_cost':
return formatCostEstimateResponse(content);
default:
return {
text: content,
code: null
};
}
}
// Response formatters
function formatReviewResponse(content) {
const lines = content.split('\n');
const summary = lines.slice(0, 3).join(' ');
const details = lines.slice(3).join('\n');
return {
text: `Based on my review: ${summary}`,
code: details
};
}
function formatSimilarCodeResponse(content) {
const matches = content.match(/Found (\d+) similar/);
const count = matches ? matches[1] : 'several';
return {
text: `I found ${count} similar code patterns in your codebase. The most relevant matches are shown in the details.`,
code: content
};
}
function formatSymbolAnalysisResponse(content) {
const lines = content.split('\n');
const summary = lines[0];
return {
text: summary,
code: content
};
}
function formatTestGapResponse(content) {
const gaps = content.match(/gaps?: (\d+)/i);
const count = gaps ? gaps[1] : 'some';
return {
text: `I identified ${count} test gaps. Here are the suggested test cases.`,
code: content
};
}
function formatModelStatusResponse(content) {
// Extract key info
const model = content.match(/Model: ([\w-]+)/)?.[1] || 'unknown';
const context = content.match(/Context window: ([\d,]+)/)?.[1] || 'unknown';
return {
text: `I'm currently using ${model} with a ${context} token context window. I support vision, functions, and JSON mode.`,
code: content
};
}
function formatCostEstimateResponse(content) {
const cost = content.match(/Total cost: \$([\d.]+)/)?.[1] || 'unknown';
return {
text: `This operation would cost approximately $${cost}. I can suggest cheaper alternatives if needed.`,
code: content
};
}
// Generate speech using ElevenLabs
async function generateSpeech(text, voice = 'rachel') {
if (!elevenLabsClient) {
return null;
}
try {
const result = await elevenLabsClient.callTool('text_to_speech', {
text: text,
voice_name: voice,
model_id: 'eleven_turbo_v2',
output_format: 'mp3_44100_128'
});
return result.content?.[0]?.data;
} catch (error) {
console.error('Speech generation failed:', error);
return null;
}
}
// Helper functions
function generateClientId() {
return Math.random().toString(36).substring(2, 15);
}
function sendToClient(clientId, message) {
const client = clients.get(clientId);
if (client && client.ws.readyState === WebSocket.OPEN) {
client.ws.send(JSON.stringify(message));
}
}
function broadcastToWebClients(message) {
clients.forEach((client, id) => {
if (client.type === 'web' && client.ws.readyState === WebSocket.OPEN) {
client.ws.send(JSON.stringify(message));
}
});
}
function extractSymbolFromText(text) {
// Extract symbol name from phrases like "explain the foo function"
const match = text.match(/(?:the|this)\s+(\w+)\s+(?:function|method|class|variable)/);
return match ? match[1] : null;
}
function extractSearchQuery(text) {
// Extract search query from phrases like "search for error handling"
const match = text.match(/(?:search for|find|locate)\s+(.+)/i);
return match ? match[1] : text;
}
async function getFileContent(filePath) {
try {
const content = await fs.readFile(filePath, 'utf-8');
return content;
} catch (error) {
return null;
}
}
function generateFallbackResponse(toolName, args) {
const fallbacks = {
instant_review: "I would review your code for best practices, potential bugs, and improvements.",
find_similar: "I would search for similar code patterns in your codebase.",
analyze_symbol: "I would analyze the symbol's definition, usage, and relationships.",
test_gap_analyzer: "I would identify missing test cases and generate test suggestions.",
search: `I would search for: ${args.query}`,
get_current_model_status: "I'm an AI assistant with advanced code analysis capabilities.",
estimate_operation_cost: "This operation would use AI model tokens for processing."
};
return fallbacks[toolName] || "I would help you with that request.";
}
// Handle direct commands (from VS Code or web)
async function handleCommand(clientId, command, context) {
// Treat command as voice input
await handleVoiceInput(clientId, command, context);
}
// Handle text-to-speech requests
async function handleTextToSpeech(clientId, text, voice) {
const audio = await generateSpeech(text, voice);
if (audio) {
sendToClient(clientId, {
type: 'audio',
data: audio
});
}
}
// REST API endpoints
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
scsMcp: scsMcpClient !== null,
elevenLabs: elevenLabsClient !== null,
wsClients: clients.size
});
});
app.post('/api/voice', async (req, res) => {
const { text, context } = req.body;
// Process as voice input
const tempClientId = generateClientId();
const intent = await determineIntent(text, context || {});
const result = await executeScsTool(intent.tool, intent.arguments);
const response = await generateResponse(result, intent);
res.json(response);
});
app.post('/api/tts', async (req, res) => {
const { text, voice } = req.body;
const audio = await generateSpeech(text, voice);
if (audio) {
res.type('audio/mpeg').send(Buffer.from(audio, 'base64'));
} else {
res.status(500).json({ error: 'TTS generation failed' });
}
});
// Start server
async function start() {
// Initialize MCP connections
await initializeMCP();
// Start HTTP server
app.listen(PORT, () => {
console.log(`Voice Assistant Server running on http://localhost:${PORT}`);
console.log(`WebSocket server running on ws://localhost:${WS_PORT}`);
});
}
// Handle shutdown
process.on('SIGINT', async () => {
console.log('\nShutting down...');
// Close all WebSocket connections
clients.forEach((client) => {
client.ws.close();
});
// Disconnect MCP clients
if (scsMcpClient) await scsMcpClient.close();
if (elevenLabsClient) await elevenLabsClient.close();
process.exit(0);
});
// Start the server
start().catch(console.error);