Skip to main content
Glama
voice-command-parser.tool.ts12.6 kB
/** * Voice Command Parser Tool for Home Assistant * * This tool parses natural language transcriptions into structured commands. * Uses pattern matching and simple NLP to extract intent, entities, and parameters * from voice input. Supports multiple languages. */ import { z } from "zod"; import { logger } from "../../utils/logger.js"; import { Tool } from "../../types/index.js"; import { MCPContext } from "../../mcp/types.js"; import { BaseTool } from "../base-tool.js"; import { getLanguageService } from "../../speech/languageService.js"; // Define supported intents and patterns interface CommandPattern { intent: string; patterns: RegExp[]; extractParams: (input: string, match: RegExpMatchArray) => Record<string, unknown>; } interface ParsedCommand { intent: string; action: string; target?: string; entities: string[]; parameters: Record<string, unknown>; confidence: number; original_text: string; } // Pattern-based command parser const commandPatterns: CommandPattern[] = [ { intent: "turn_on", patterns: [ /turn\s+(?:on|up)\s+(?:the\s+)?(.+?)(?:\s+(?:to|at|for|on|in))?/i, /(?:turn|switch)\s+(.+?)\s+on(?:\s+(?:to|at))?/i, /(?:please\s+)?(?:could you\s+)?turn\s+on\s+(?:the\s+)?(.+)/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "", }), }, { intent: "turn_off", patterns: [ /turn\s+(?:off|down)\s+(?:the\s+)?(.+)/i, /(?:turn|switch)\s+(.+?)\s+off/i, /(?:please\s+)?(?:could you\s+)?turn\s+off\s+(?:the\s+)?(.+)/i, /disable\s+(?:the\s+)?(.+)/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "", }), }, { intent: "set_temperature", patterns: [ /(?:set|change|adjust)\s+(?:the\s+)?temperature\s+(?:to|at)\s+(\d+)\s*(?:degrees|°)?(?:\s+(?:in|for|at)\s+(.+))?/i, /(?:make it|let it|keep it)\s+(\d+)\s*(?:degrees)?(?:\s+(?:in|for)\s+(.+))?/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ temperature: parseInt(match[1], 10), entity_name: (match[2] || "bedroom").trim().toLowerCase(), }), }, { intent: "set_brightness", patterns: [ /(?:set|adjust|dim)\s+(?:the\s+)?brightness\s+(?:to|at)\s+(\d+)%?(?:\s+(?:in|for|at)\s+(.+))?/i, /set\s+(.+?)\s+(?:to|brightness)\s+(\d+)%?/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ brightness: Math.round((parseInt(match[1], 10) / 100) * 255), entity_name: match[2]?.trim().toLowerCase() || "", }), }, { intent: "set_color", patterns: [ /(?:set|change|make)\s+(?:the\s+)?(.+?)\s+(?:to\s+)?(?:the\s+)?(red|blue|green|yellow|white|orange|purple|pink)(?:\s+(?:in|for|at)\s+(.+))?/i, /(?:turn|make)\s+(.+?)\s+(red|blue|green|yellow|white|orange|purple|pink)/i, ], extractParams: (input: string, match: RegExpMatchArray): Record<string, unknown> => { const colorMap: Record<string, [number, number, number]> = { red: [255, 0, 0], blue: [0, 0, 255], green: [0, 255, 0], yellow: [255, 255, 0], white: [255, 255, 255], orange: [255, 165, 0], purple: [128, 0, 128], pink: [255, 192, 203], }; const color = match[2]?.toLowerCase() ?? "white"; return { entity_name: match[1]?.trim().toLowerCase() || "", rgb_color: colorMap[color], color_name: color, }; }, }, { intent: "open_cover", patterns: [ /(?:open|raise|lift)\s+(?:the\s+)?(.+?)(?:\s+(?:blind|blinds|curtain|curtains|shade|shades))?(?:\s+(?:in|for|at)\s+(.+))?/i, /(?:open)\s+(?:up\s+)?(?:the\s+)?(.+)/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "", }), }, { intent: "close_cover", patterns: [ /(?:close|lower|shut)\s+(?:the\s+)?(.+?)(?:\s+(?:blind|blinds|curtain|curtains|shade|shades))?(?:\s+(?:in|for|at)\s+(.+))?/i, /(?:close)\s+(?:down\s+)?(?:the\s+)?(.+)/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "", }), }, { intent: "lock_door", patterns: [ /(?:lock|secure)\s+(?:the\s+)?(.+?)(?:\s+(?:door|doors))?(?:\s+(?:in|for))?/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "", }), }, { intent: "unlock_door", patterns: [ /(?:unlock|open)\s+(?:the\s+)?(.+?)(?:\s+(?:door|doors))?(?:\s+(?:in|for))?/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "", }), }, { intent: "start_vacuum", patterns: [ /(?:start|begin|run|vacuum|clean)\s+(?:the\s+)?(.+?)(?:\s+(?:vacuum|cleaner|robot))?/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ entity_name: match[1]?.trim().toLowerCase() || "vacuum", }), }, { intent: "send_notification", patterns: [ /(?:notify|send|alert)\s+(?:me|about|with)\s+(.+)/i, /(?:tell me|alert me|notify me)\s+(.+)/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ message: match[1]?.trim() || "", }), }, { intent: "play_media", patterns: [ /(?:play|start|put on)\s+(?:the\s+)?(.+?)(?:\s+(?:music|song|album|playlist|audio))?(?:\s+(?:on|in)\s+(.+))?/i, ], extractParams: (input: string, match: RegExpMatchArray) => ({ media: match[1]?.trim() || "", entity_name: match[2]?.trim().toLowerCase() || "", }), }, ]; // Define the schema for our tool parameters using Zod const voiceCommandParserSchema = z.object({ transcription: z .string() .min(1) .describe("The voice transcription to parse into a command"), language: z .string() .optional() .describe("Language code (e.g., 'en', 'de', 'es', 'fr'). Auto-detected if not provided."), context: z .object({ room: z.string().optional().describe("The current room context"), last_command: z.string().optional().describe("The last executed command"), available_entities: z.array(z.string()).optional().describe("List of available Home Assistant entities"), }) .optional() .describe("Optional context for better command parsing"), }); type VoiceCommandParserParams = z.infer<typeof voiceCommandParserSchema>; /** * Parse a voice transcription into a structured command */ function executeVoiceCommandParserLogic( params: VoiceCommandParserParams, ): Promise<string> { const { transcription, language: explicitLanguage, context: _context } = params; logger.debug(`Parsing voice transcription: "${transcription}"`, { language: explicitLanguage }); try { const langService = getLanguageService(); // Set language if provided if (explicitLanguage !== undefined) { langService.setLanguage(langService.normalizeLanguageCode(explicitLanguage)); } else if (langService.config.detectAutomatic === true) { // Auto-detect language const detectedLang = langService.detectLanguage(transcription); langService.setLanguage(detectedLang); } const parsedCommand = parseCommand(transcription); if (parsedCommand.confidence < 0.3) { logger.warn(`Low confidence parsing for: "${transcription}"`, { language: langService.getLanguage() }); return Promise.resolve(JSON.stringify({ success: false, message: "Could not understand the command. Please try again.", original_text: transcription, language: langService.getLanguage(), parsed: null, })); } logger.info(`Parsed command: ${parsedCommand.intent} with confidence ${parsedCommand.confidence}`, { language: langService.getLanguage(), }); return Promise.resolve(JSON.stringify({ success: true, message: `Understood command: ${parsedCommand.intent}`, original_text: transcription, language: langService.getLanguage(), parsed: parsedCommand, })); } catch (error) { logger.error("Error parsing voice command:", error); return Promise.resolve(JSON.stringify({ success: false, message: error instanceof Error ? error.message : "Error parsing command", original_text: transcription, parsed: null, })); } } /** * Parse a command from transcription */ function parseCommand(transcription: string, _langService?: ReturnType<typeof getLanguageService>): ParsedCommand { const trimmed = transcription.trim(); let bestMatch: { pattern: CommandPattern; match: RegExpMatchArray; confidence: number; } | null = null; let highestConfidence = 0; // Try each pattern for (const pattern of commandPatterns) { for (const regex of pattern.patterns) { const match = trimmed.match(regex); if (match) { // Calculate confidence based on how well the match fits const confidence = Math.min( 1.0, (match[0].length / trimmed.length) * 0.9 + 0.1, // Favor longer matches ); if (confidence > highestConfidence) { highestConfidence = confidence; bestMatch = { pattern, match, confidence }; } } } } if (!bestMatch) { // No pattern matched - try generic extraction return { intent: "unknown", action: "help", entities: [], parameters: { raw_text: trimmed }, confidence: 0, original_text: trimmed, }; } const { pattern, match, confidence } = bestMatch; const params = pattern.extractParams(trimmed, match); // Extract entities mentioned in the command const entities = extractEntities(trimmed, params); return { intent: pattern.intent, action: pattern.intent, target: (params.entity_name as string) || undefined, entities, parameters: params, confidence, original_text: trimmed, }; } /** * Extract entity mentions from command */ function extractEntities(text: string, params: Record<string, unknown>): string[] { const entities: Set<string> = new Set(); // Add the main target entity if present const entityName = params.entity_name; if (typeof entityName === "string") { entities.add(entityName); } // Look for common entity name patterns const commonEntities = [ "bedroom", "living room", "kitchen", "bathroom", "hallway", "garage", "garden", "patio", "office", "front door", "back door", "garage door", "light", "lights", "lamp", "fan", "ac", "heater", "thermostat", "lock", "vacuum", "robot", "blinds", "curtains", "shades", "speaker", "tv", ]; const lowerText = text.toLowerCase(); for (const entity of commonEntities) { if (lowerText.includes(entity)) { entities.add(entity); } } return Array.from(entities); } // Export the tool object export const voiceCommandParserTool: Tool = { name: "voice_command_parser", description: "Parse natural language voice transcriptions into structured Home Assistant commands. Extracts intent, entities, and parameters from voice input.", parameters: voiceCommandParserSchema, execute: (params: unknown): Promise<unknown> => { return executeVoiceCommandParserLogic(params as VoiceCommandParserParams); }, }; /** * VoiceCommandParserTool class extending BaseTool (for compatibility with src/index.ts) */ export class VoiceCommandParserTool extends BaseTool { constructor() { super({ name: voiceCommandParserTool.name, description: voiceCommandParserTool.description, parameters: voiceCommandParserSchema, metadata: { category: "speech", version: "1.0.0", tags: ["voice", "speech", "nlp", "command_parsing"], }, }); } /** * Execute method for the BaseTool class */ public async execute(params: VoiceCommandParserParams, _context: MCPContext): Promise<string> { logger.debug(`Executing VoiceCommandParserTool with params: ${JSON.stringify(params)}`); try { const validatedParams = this.validateParams(params) as VoiceCommandParserParams; return await executeVoiceCommandParserLogic(validatedParams); } catch (error) { logger.error(`Error in VoiceCommandParserTool: ${String(error)}`); throw error; } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jango-blockchained/advanced-homeassistant-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server