// @lifecycle canonical - Parses the unified operator command format.
/**
* Unified Command Parser
*
* Robust multi-strategy command parsing system that replaces fragile regex-based parsing
* with intelligent format detection, fallback strategies, and comprehensive validation.
*
* Features:
* - Multi-format detection (simple >>prompt, JSON objects, structured commands)
* - Fallback parsing strategies with confidence scoring
* - Comprehensive error messages with suggestions
* - Command validation and sanitization
*/
import {
normalizeSymbolicPrefixes,
stripStyleOperators,
findFrameworkOperatorOutsideQuotes,
stripFrameworkOperatorOutsideQuotes,
} from './parser-utils.js';
import { SymbolicCommandParser, createSymbolicCommandParser } from './symbolic-operator-parser.js';
import { Logger } from '../../../infra/logging/index.js';
import { PromptError, ValidationError, safeJsonParse } from '../../../shared/utils/index.js';
import type { ConvertedPrompt, ExecutionModifier, ExecutionModifiers } from '../types.js';
import type { CommandParseResultBase } from './types/command-parse-types.js';
import type {
OperatorDetectionResult,
SymbolicCommandParseResult,
SymbolicExecutionPlan,
} from './types/operator-types.js';
export type CommandParseResult = CommandParseResultBase<
OperatorDetectionResult,
SymbolicExecutionPlan
>;
/**
* Maps user-facing modifier names to canonical ExecutionModifier values.
*/
const VALID_MODIFIERS: Record<string, ExecutionModifier> = {
clean: 'clean',
judge: 'judge',
lean: 'lean',
framework: 'framework',
};
/**
* Parsing strategy interface
*/
interface ParsingStrategy {
name: string;
canHandle: (command: string) => boolean;
parse: (command: string) => CommandParseResult | null;
confidence: number;
}
/**
* Unified Command Parser Class
*/
export class UnifiedCommandParser {
private logger: Logger;
private strategies: ParsingStrategy[];
private symbolicParser: SymbolicCommandParser;
private registeredFrameworkIds: Set<string>;
// Parsing statistics for monitoring
// TODO: Wire stats to MetricsCollector for telemetry dashboard
// These are tracked but not yet exposed via system_control analytics
private stats = {
totalParses: 0,
successfulParses: 0,
failedParses: 0,
strategyUsage: new Map<string, number>(),
averageConfidence: 0,
};
/**
* @param logger - Logger instance
* @param registeredFrameworkIds - Optional set of registered framework IDs (uppercase).
* When provided, only @framework operators matching registered IDs are detected.
* Unregistered @word patterns are silently skipped (treated as literal text).
*/
constructor(logger: Logger, registeredFrameworkIds?: Set<string>) {
this.logger = logger;
this.registeredFrameworkIds = registeredFrameworkIds ?? new Set();
this.symbolicParser = createSymbolicCommandParser(logger, this.registeredFrameworkIds);
this.strategies = this.initializeStrategies();
this.logger.debug(
`UnifiedCommandParser initialized with ${this.strategies.length} parsing strategies`
);
}
/**
* Update the set of registered framework IDs.
* This allows late binding when FrameworkManager becomes available after construction.
* @param frameworkIds Set of framework IDs (will be normalized to uppercase)
*/
updateRegisteredFrameworkIds(frameworkIds: Set<string>): void {
// Normalize to uppercase for consistent matching
this.registeredFrameworkIds = new Set(Array.from(frameworkIds).map((id) => id.toUpperCase()));
// Recreate symbolic parser with updated framework IDs
this.symbolicParser = createSymbolicCommandParser(this.logger, this.registeredFrameworkIds);
this.logger.debug(
`[UnifiedCommandParser] Updated registered framework IDs: ${Array.from(this.registeredFrameworkIds).join(', ')}`
);
}
/**
* Extracts a single execution modifier prefix (e.g., %clean) from the command.
* Returns the command with the modifier stripped and the normalized modifier value.
*/
private extractModifier(command: string): { command: string; modifier?: ExecutionModifier } {
const trimmed = command.trimStart();
const match = trimmed.match(/^%\s*([a-zA-Z_-]+)/);
if (!match) {
return { command };
}
const modifierKey = match[1]?.toLowerCase();
if (!modifierKey) {
return { command };
}
const modifier = VALID_MODIFIERS[modifierKey];
if (!modifier) {
throw new ValidationError(
`Unknown execution modifier "%${modifierKey}". Supported modifiers: %clean, %judge, %lean, %framework.`
);
}
const remainder = trimmed.slice(match[0].length).trimStart();
if (/^%\s*[a-zA-Z_-]+/.test(remainder)) {
throw new ValidationError(
'Only one %modifier is allowed per command. Remove additional modifiers.'
);
}
return { command: remainder, modifier };
}
private buildModifiers(modifier?: ExecutionModifier): ExecutionModifiers | undefined {
if (!modifier) {
return undefined;
}
return {
clean: modifier === 'clean',
judge: modifier === 'judge',
lean: modifier === 'lean',
framework: modifier === 'framework',
};
}
/**
* Parse command string using multi-strategy approach
*/
async parseCommand(
command: string,
availablePrompts: ConvertedPrompt[]
): Promise<CommandParseResult> {
this.stats.totalParses++;
if (!command || command.trim().length === 0) {
this.stats.failedParses++;
throw new ValidationError('Command cannot be empty');
}
const trimmed = command.trim();
const { command: commandWithoutModifier, modifier: modifierToken } =
this.extractModifier(trimmed);
if (!commandWithoutModifier || commandWithoutModifier.trim().length === 0) {
throw new ValidationError('Command cannot be empty after applying modifier');
}
// Normalize symbolic prefixes centrally (ONE primary implementation)
const { normalized, hadPrefixes } = normalizeSymbolicPrefixes(commandWithoutModifier);
// Expand repetition operator before strategy selection (e.g., ">>p *3" → ">>p --> >>p --> >>p")
const preprocessed = this.symbolicParser.preprocessRepetition(normalized);
const hadRepetition = preprocessed !== normalized;
this.logger.debug(
`Parsing command: "${preprocessed.substring(0, 100)}..."${hadPrefixes ? ' (prefixes normalized)' : ''}${hadRepetition ? ' (repetition expanded)' : ''}`
);
// Try each strategy in order of confidence (now operating on preprocessed command)
const sortedStrategies = [...this.strategies].sort((a, b) => b.confidence - a.confidence);
for (const strategy of sortedStrategies) {
if (strategy.canHandle(preprocessed)) {
try {
const result = strategy.parse(preprocessed);
if (result) {
// Preserve original command in metadata for debugging/error messages
if (!result.metadata) {
result.metadata = {
originalCommand: '',
parseStrategy: '',
detectedFormat: '',
warnings: [],
};
}
result.metadata.originalCommand = trimmed;
if (hadPrefixes) {
result.metadata.prefixesNormalized = true;
}
if (hadRepetition) {
result.metadata.repetitionExpanded = true;
}
if (modifierToken) {
const hasExistingModifier =
result.modifiers?.clean === true ||
result.modifiers?.judge === true ||
result.modifiers?.lean === true ||
result.modifiers?.framework === true;
if (hasExistingModifier) {
throw new ValidationError(
'Only one %modifier is allowed per command. Remove the additional modifier.'
);
}
const modifiersValue = this.buildModifiers(modifierToken);
if (modifiersValue) {
result.modifiers = modifiersValue;
}
result.metadata.modifierToken = modifierToken;
}
// Validate that the prompt ID exists
await this.validatePromptExists(result.promptId, availablePrompts);
// Update statistics
this.stats.successfulParses++;
this.updateStrategyStats(strategy.name);
this.updateConfidenceStats(result.confidence);
this.logger.debug(
`Command parsed successfully using strategy: ${strategy.name} (confidence: ${result.confidence})`
);
return this.applyCommandType(result, normalized);
}
} catch (error) {
this.logger.debug(
`Strategy ${strategy.name} failed: ${error instanceof Error ? error.message : 'Unknown error'}`
);
continue;
}
}
}
// If no strategy succeeded, provide helpful error message
this.stats.failedParses++;
const errorMessage = this.generateHelpfulError(normalized, availablePrompts);
throw new ValidationError(errorMessage);
}
private applyCommandType(
result: CommandParseResult,
originalCommand: string
): CommandParseResult {
if (result.commandType) {
return result;
}
const hasChainOperator =
result.operators?.operatorTypes?.includes('chain') || /-->/i.test(originalCommand);
result.commandType = hasChainOperator ? 'chain' : 'single';
return result;
}
/**
* Initialize parsing strategies (STREAMLINED: 2 core strategies)
*/
private initializeStrategies(): ParsingStrategy[] {
return [
this.createSymbolicCommandStrategy(),
this.createSimpleCommandStrategy(),
this.createJsonCommandStrategy(),
];
}
private createSymbolicCommandStrategy(): ParsingStrategy {
return {
name: 'symbolic',
confidence: 0.97,
canHandle: (command: string) => {
// Match symbolic operators:
// - --> (chain)
// - :: or = followed by any value (quoted or unquoted)
// - @FRAMEWORK at start OR after whitespace (ONLY outside quoted strings)
// - + (parallel)
// - ? "condition" : >>branch (conditional - must match full pattern, not bare ?)
// - #id style selector (e.g., #analytical, #procedural)
// - * N repetition (e.g., >>prompt *3)
// Note: Bare ? in natural language (e.g., "is there a bug?") should NOT trigger symbolic
const hasConditionalOperator =
/\s*\?\s*["'](.+?)["']\s*:\s*(?:>>)?\s*([A-Za-z0-9_-]+)/.test(command);
const hasChainGateOrOther =
/-->|(::|=)\s*\S|\+|\s+\*\s*\d+/.test(command) || hasConditionalOperator;
const hasStyleOperator = /(?:^|\s)#[A-Za-z][A-Za-z0-9_-]*(?=\s|$|>>)/.test(command);
// Use quote-aware detection for framework operators to avoid matching @word inside quotes
const hasFrameworkOperator = findFrameworkOperatorOutsideQuotes(command) !== null;
return hasChainGateOrOther || hasStyleOperator || hasFrameworkOperator;
},
parse: (command: string): SymbolicCommandParseResult | null => {
// Command arrives already preprocessed (repetition expanded in parseCommand)
const operators = this.symbolicParser.detectOperators(command);
if (!operators.hasOperators) {
return null;
}
// Strip framework operator from command (quote-aware to preserve @word in quoted args)
let cleanCommand = stripFrameworkOperatorOutsideQuotes(command);
// Strip ALL gate operators - handles both named and anonymous formats:
// - Named colon: :: id:"criteria" or :: id:'criteria'
// - Named paren: :: id(criteria)
// - Anonymous quoted: :: "criteria" or :: 'criteria'
// - Anonymous unquoted: :: criteria (canonical refs or plain text)
cleanCommand = cleanCommand.replace(
/\s+(::|=)\s*(?:[a-z][a-z0-9_-]*:["'][^"']+["']|[a-z][a-z0-9_-]*\([^)]+\)|["'][^"']+["']|[^\s"']+)/gi,
''
);
// Strip style selector to avoid polluting base args
cleanCommand = stripStyleOperators(cleanCommand);
const baseSegment = cleanCommand.split(/-->|\+|\?/)[0]?.trim() ?? '';
const firstPromptMatch = baseSegment.match(/^(?:>>)?([A-Za-z0-9_-]+)(?:\s+([\s\S]*))?$/);
if (!firstPromptMatch) {
return null;
}
const basePromptId = firstPromptMatch[1];
if (!basePromptId) {
return null;
}
const baseArgs = (firstPromptMatch[2] ?? '').trim();
return this.symbolicParser.buildParseResult(command, operators, basePromptId, baseArgs);
},
};
}
/**
* Simple command strategy: >>prompt_name arguments (ENHANCED: More AI-friendly)
*/
private createSimpleCommandStrategy(): ParsingStrategy {
return {
name: 'simple',
confidence: 0.95, // Increased confidence for primary strategy
canHandle: (command: string) => {
const trimmed = command.trim();
// Skip if JSON-like (handled by JSON strategy)
if (trimmed.startsWith('{') || trimmed.startsWith('"')) return false;
// Skip if has symbolic operators (handled by symbolic strategy)
// Note: Gate operators (:: or =) must be preceded by whitespace to avoid
// matching argument assignment syntax like input="value"
// Note: Bare ? is allowed (natural language questions); only full conditional pattern triggers symbolic
// Use quote-aware detection for framework operators
const hasConditionalOp = /\s*\?\s*["'](.+?)["']\s*:\s*(?:>>)?\s*([A-Za-z0-9_-]+)/.test(
trimmed
);
const hasOtherOperators =
/-->|\s(::|=)\s*\S|\+|\s+\*\s*\d+/.test(trimmed) || hasConditionalOp;
const hasFrameworkOp = findFrameworkOperatorOutsideQuotes(trimmed) !== null;
if (hasOtherOperators || hasFrameworkOp) return false;
// Accept with or without >> or / prefix (bare prompt names now supported)
return /^(?:>>|\/)?[a-zA-Z][a-zA-Z0-9_-]*(?:\s|$)/.test(trimmed);
},
parse: (command: string): CommandParseResult | null => {
// Enhanced regex to handle more natural command formats
// Prefix (>> or /) is now optional to support bare prompt names
// Prompt name: starts with letter, contains letters/numbers/underscores/hyphens (no spaces)
const match = command.trim().match(/^(?:>>|\/)?([a-zA-Z][a-zA-Z0-9_-]*)(?:\s+([\s\S]*))?$/);
if (!match) return null;
const [, rawPromptId, rawArgs] = match;
if (!rawPromptId) {
return null;
}
// Clean up prompt ID: convert spaces and hyphens to underscores, normalize
const promptId = rawPromptId
.trim()
.toLowerCase()
.replace(/[\s-]+/g, '_') // Spaces and hyphens to underscores
.replace(/[^a-z0-9_]/g, '') // Remove invalid characters (except underscores)
.replace(/_+/g, '_') // Collapse multiple underscores
.replace(/^_|_$/g, ''); // Trim leading/trailing underscores
const warnings: string[] = [];
if (rawPromptId.trim() !== promptId) {
warnings.push(`Normalized prompt name: "${rawPromptId.trim()}" → "${promptId}"`);
}
// Detect if original had prefix for metadata
const hadPrefix = /^(>>|\/)/.test(command.trim());
const detectedFormat = hadPrefix ? 'prefixed prompt format' : 'bare prompt name';
return {
promptId: promptId,
rawArgs: (rawArgs || '').trim(),
format: 'simple',
confidence: 0.98, // High confidence for enhanced parsing
metadata: {
originalCommand: command,
parseStrategy: 'simple_enhanced',
detectedFormat,
warnings,
},
};
},
};
}
/**
* JSON command strategy: {"command": ">>prompt", "args": {...}}
*/
private createJsonCommandStrategy(): ParsingStrategy {
return {
name: 'json',
confidence: 0.85,
canHandle: (command: string) => {
const trimmed = command.trim();
return trimmed.startsWith('{') && trimmed.endsWith('}');
},
parse: (command: string): CommandParseResult | null => {
const parseResult = safeJsonParse(command);
if (!parseResult.success || !parseResult.data) {
return null;
}
let data = parseResult.data;
// Handle different JSON formats
let actualCommand = '';
let confidence = 0.8;
if (data.command) {
actualCommand = data.command;
confidence = 0.9;
} else if (data.prompt) {
actualCommand = data.prompt;
confidence = 0.85;
} else {
return null;
}
// Handle double-encoded JSON (command field is itself a JSON string)
// This can happen when clients double-escape JSON payloads
if (typeof actualCommand === 'string' && actualCommand.trim().startsWith('{')) {
const innerParse = safeJsonParse(actualCommand);
if (
innerParse.success === true &&
innerParse.data !== null &&
typeof innerParse.data === 'object'
) {
// Extract command from the inner JSON object with proper type guards
const innerData = innerParse.data as Record<string, unknown>;
const innerCommand = innerData['command'];
const innerPrompt = innerData['prompt'];
const innerArgs = innerData['args'];
if (typeof innerCommand === 'string') {
actualCommand = innerCommand;
// Merge args if present in inner object and not in outer
// Use bracket notation with type assertion for data.args access
const outerData = data as Record<string, unknown>;
if (innerArgs !== undefined && outerData['args'] === undefined) {
data = { ...outerData, args: innerArgs };
}
} else if (typeof innerPrompt === 'string') {
actualCommand = innerPrompt;
}
}
}
const { command: innerCommand, modifier: modifierToken } = this.extractModifier(
String(actualCommand)
);
// Recursively parse the inner command - try both strategies
// The symbolic strategy handles commands with operators (-->, ::, @, etc.)
// The simple strategy handles plain prompt names
const symbolicStrategy = this.createSymbolicCommandStrategy();
const simpleStrategy = this.createSimpleCommandStrategy();
let innerResult: CommandParseResult | null = null;
// Try symbolic strategy first (for chains and operators)
if (symbolicStrategy.canHandle(innerCommand)) {
innerResult = symbolicStrategy.parse(innerCommand);
}
// Fall back to simple strategy (for plain prompt names)
if (!innerResult && simpleStrategy.canHandle(innerCommand)) {
innerResult = simpleStrategy.parse(innerCommand);
}
if (!innerResult) return null;
if (modifierToken) {
const modifiersValue = this.buildModifiers(modifierToken);
if (modifiersValue) {
innerResult.modifiers = modifiersValue;
}
if (innerResult.metadata) {
innerResult.metadata.modifierToken = modifierToken;
innerResult.metadata.originalCommand =
typeof data.command === 'string' ? data.command : command;
}
}
const mods = innerResult.modifiers;
return {
promptId: innerResult.promptId,
rawArgs: data.args ? JSON.stringify(data.args) : innerResult.rawArgs,
format: 'json',
confidence,
...(mods !== undefined && { modifiers: mods }),
metadata: {
originalCommand: command,
parseStrategy: 'json',
detectedFormat: 'JSON wrapper with inner command',
warnings: innerResult.metadata?.warnings ?? [],
...(innerResult.metadata?.modifierToken !== undefined && {
modifierToken: innerResult.metadata.modifierToken,
}),
},
};
},
};
}
/**
* Validate that the prompt ID exists in available prompts
*/
private async validatePromptExists(
promptId: string,
availablePrompts: ConvertedPrompt[]
): Promise<void> {
// Check if this is a built-in command that should be routed (handled by prompt engine)
if (this.isBuiltinCommand(promptId)) {
return; // Built-in commands are valid and will be routed by the prompt engine
}
// Use case-insensitive matching to find the prompt
const found = availablePrompts.find(
(p) =>
p.id.toLowerCase() === promptId.toLowerCase() ||
p.name?.toLowerCase() === promptId.toLowerCase()
);
if (!found) {
const suggestions = this.generatePromptSuggestions(promptId, availablePrompts);
const msg =
suggestions !== ''
? `Unknown prompt "${promptId}". ${suggestions}`
: `Unknown prompt "${promptId}"`;
throw new PromptError(msg);
}
}
/**
* Check if command is a built-in system command
*/
private isBuiltinCommand(promptId: string): boolean {
const builtinCommands = [
'listprompts',
'listprompt',
'list_prompts',
'help',
'commands',
'status',
'health',
'analytics',
'metrics',
'gates',
'gate',
];
return builtinCommands.includes(promptId.toLowerCase());
}
/**
* Generate helpful prompt suggestions using multi-factor scoring
* Considers: prefix matches, word overlap, and Levenshtein distance
*/
private generatePromptSuggestions(promptId: string, availablePrompts: ConvertedPrompt[]): string {
const query = promptId.toLowerCase();
const scored = availablePrompts
.map((prompt) => {
const id = prompt.id.toLowerCase();
let score = 0;
// Exact prefix match (highest value - user typing partial name)
if (id.startsWith(query) || query.startsWith(id)) {
score += 100;
}
// Word overlap (medium value - related prompts)
const queryWords = query.split(/[_-]/);
const idWords = id.split(/[_-]/);
const wordOverlap = queryWords.filter((w) =>
idWords.some((iw) => iw.includes(w) || w.includes(iw))
).length;
score += wordOverlap * 30;
// Levenshtein distance (inverse - lower distance = higher score)
const distance = this.levenshteinDistance(query, id);
// Dynamic threshold based on query length (longer queries allow more edits)
const threshold = Math.max(3, Math.floor(query.length / 2));
if (distance <= threshold) {
score += Math.max(0, 50 - distance * 10);
}
return { prompt, score };
})
.filter((item) => item.score > 0)
.sort((a, b) => b.score - a.score)
.slice(0, 3);
if (scored.length > 0) {
return `Did you mean: ${scored.map((s) => s.prompt.id).join(', ')}?`;
}
return '';
}
/**
* Simple Levenshtein distance calculation
*/
private levenshteinDistance(a: string, b: string): number {
const matrix: number[][] = Array(b.length + 1)
.fill(null)
.map(() => Array<number>(a.length + 1).fill(0));
for (let i = 0; i <= a.length; i++) {
const row = matrix[0];
if (row) row[i] = i;
}
for (let j = 0; j <= b.length; j++) {
const row = matrix[j];
if (row) row[0] = j;
}
for (let j = 1; j <= b.length; j++) {
for (let i = 1; i <= a.length; i++) {
const aChar = a[i - 1];
const bChar = b[j - 1];
const indicator = aChar === bChar ? 0 : 1;
const currentRow = matrix[j];
const prevRow = matrix[j - 1];
if (currentRow && prevRow) {
const left = currentRow[i - 1] ?? 0;
const up = prevRow[i] ?? 0;
const diag = prevRow[i - 1] ?? 0;
currentRow[i] = Math.min(left + 1, up + 1, diag + indicator);
}
}
}
const finalRow = matrix[b.length];
return finalRow?.[a.length] ?? Math.max(a.length, b.length);
}
/**
* Generate concise error message for parsing failures
*/
private generateHelpfulError(command: string, availablePrompts: ConvertedPrompt[]): string {
// Extract prompt name for suggestions
const promptMatch = command.match(/^(?:>>|\/)?([a-zA-Z][a-zA-Z0-9_-]*)/);
const promptName = promptMatch?.[1];
if (promptName) {
const suggestions = this.generatePromptSuggestions(promptName, availablePrompts);
return suggestions !== ''
? `Unknown prompt "${promptName}". ${suggestions}`
: `Unknown prompt "${promptName}"`;
}
// Specific parse errors
if (command.startsWith('{')) {
return `Invalid JSON: "${command.slice(0, 50)}${command.length > 50 ? '...' : ''}"`;
}
if (command.startsWith('>') && !command.startsWith('>>')) {
return `Single ">" detected (XML encoding issue). Use: ${command.slice(1).split(' ')[0]}`;
}
return `Parse error: "${command.slice(0, 50)}${command.length > 50 ? '...' : ''}"`;
}
/**
* Update strategy usage statistics
*/
private updateStrategyStats(strategyName: string): void {
const current = this.stats.strategyUsage.get(strategyName) || 0;
this.stats.strategyUsage.set(strategyName, current + 1);
}
/**
* Update confidence statistics
*/
private updateConfidenceStats(confidence: number): void {
const totalSuccessful = this.stats.successfulParses;
this.stats.averageConfidence =
(this.stats.averageConfidence * (totalSuccessful - 1) + confidence) / totalSuccessful;
}
/**
* Get parsing statistics for monitoring
*/
getStats(): typeof this.stats {
return {
...this.stats,
strategyUsage: new Map(this.stats.strategyUsage),
};
}
/**
* Reset statistics (useful for testing or fresh starts)
*/
resetStats(): void {
this.stats = {
totalParses: 0,
successfulParses: 0,
failedParses: 0,
strategyUsage: new Map(),
averageConfidence: 0,
};
}
}
/**
* Factory function to create unified command parser
* @param logger - Logger instance
* @param registeredFrameworkIds - Optional set of registered framework IDs (uppercase).
* When provided, only @framework operators matching registered IDs are detected.
*/
export function createUnifiedCommandParser(
logger: Logger,
registeredFrameworkIds?: Set<string>
): UnifiedCommandParser {
return new UnifiedCommandParser(logger, registeredFrameworkIds);
}