import { PropositionalFormula, PropositionalOperator } from '../types.js';
/**
* Interface for variable mapping
*/
interface VariableMapping {
statement: string;
variable: string;
description?: string;
}
/**
* Enhanced Parser for propositional logic formulas
* with improved natural language processing and multi-letter variable support
*/
export class EnhancedPropositionalParser {
private variableMap: Map<string, string>; // Maps statements to variable names
private nextVarIndex: number = 0; // Used for automatic variable naming
constructor() {
this.variableMap = new Map<string, string>();
}
/**
* Parse a string into a propositional formula with enhanced NLP support
* @param input Input string to parse
* @returns Parsed propositional formula and variable mapping
*/
parse(input: string): {
formula: PropositionalFormula;
variableMap: Map<string, string>;
} {
try {
// Reset state for new parse
this.variableMap = new Map<string, string>();
this.nextVarIndex = 0;
// Check if input contains common natural language patterns
const preprocessed = this.preprocessNaturalLanguage(input);
// Process statements and extract variables
this.extractPropositionalStatements(preprocessed);
// Replace statements with variables
const simplifiedInput = this.replaceStatementsWithVariables(preprocessed);
// Parse the simplified input using the base parser logic
const tokens = this.tokenize(simplifiedInput);
const formula = this.parseExpression(tokens);
return {
formula,
variableMap: this.variableMap
};
} catch (error) {
// Provide helpful error messages
throw this.enhanceError(error, input);
}
}
/**
* Preprocess natural language patterns into formal logic
* @param input Input text
* @returns Preprocessed text
*/
private preprocessNaturalLanguage(input: string): string {
// Store the original input for reference
const original = input;
// First, handle any argument structures (premise/conclusion)
let processedInput = input;
// Remove periods from the end of sentences to avoid tokenization issues
processedInput = processedInput.replace(/\.\s+/g, ' ');
processedInput = processedInput.replace(/\.$/, '');
// Common natural language patterns for logical connectives
const patterns = [
// If-then pattern for implies
{
regex: /\b(if|when|whenever)\s+([^,]+),?\s+then\s+([^,.]+)/gi,
replacement: "($2) implies ($3)"
},
// Unless pattern
{
regex: /\b([^,.]+)\s+unless\s+([^,.]+)/gi,
replacement: "($2) or ($1)"
},
// Either-or pattern
{
regex: /\b(either)\s+([^,]+)\s+or\s+([^,.]+)/gi,
replacement: "($2) or ($3)"
},
// Both-and pattern
{
regex: /\b(both)\s+([^,]+)\s+and\s+([^,.]+)/gi,
replacement: "($2) and ($3)"
},
// Neither-nor pattern
{
regex: /\b(neither)\s+([^,]+)\s+nor\s+([^,.]+)/gi,
replacement: "not(($2) or ($3))"
},
// Only if pattern (reverse implication)
{
regex: /\b([^,]+)\s+only\s+if\s+([^,.]+)/gi,
replacement: "($1) implies ($2)"
},
// If and only if pattern
{
regex: /\b([^,]+)\s+if\s+and\s+only\s+if\s+([^,.]+)/gi,
replacement: "($1) iff ($2)"
},
// Not the case that pattern
{
regex: /\bit\s+is\s+not\s+the\s+case\s+that\s+([^,.]+)/gi,
replacement: "not ($1)"
},
// Sufficient condition
{
regex: /\b([^,]+)\s+is\s+sufficient\s+for\s+([^,.]+)/gi,
replacement: "($1) implies ($2)"
},
// Necessary condition
{
regex: /\b([^,]+)\s+is\s+necessary\s+for\s+([^,.]+)/gi,
replacement: "($2) implies ($1)"
},
// Handle "but" as "and"
{
regex: /\s+but\s+/gi,
replacement: " and "
}
];
// Apply each pattern transformation
for (const pattern of patterns) {
processedInput = processedInput.replace(pattern.regex, pattern.replacement);
}
return processedInput;
}
/**
* Extract and map propositional statements to variables
* @param input Input text
* @returns Map of statements to variables
*/
private extractPropositionalStatements(input: string): Map<string, string> {
// FIRST: Extract longer statements within parentheses (complete phrases)
const parenthesizedRegex = /\(([^()]+)\)/g;
const parenthesizedMatches = Array.from(input.matchAll(parenthesizedRegex));
for (const match of parenthesizedMatches) {
const statement = match[1].trim();
// Skip if it's an already parsed statement
if (this.isComplexFormula(statement) || this.variableMap.has(statement)) {
continue;
}
// Create a meaningful variable name
const variableName = this.createVariableName(statement);
this.variableMap.set(statement, variableName);
}
// SECOND: Extract common phrase patterns that aren't parenthesized
// This catches patterns like "the X is Y", "it is X", "X is Y"
this.extractPhrasePatterns(input);
// THIRD: Process simple variable names, but skip if they're part of already-mapped phrases
const simpleStatementRegex = /\b([A-Za-z][A-Za-z0-9_]*)\b(?!\s*\()/g;
const simpleMatches = Array.from(input.matchAll(simpleStatementRegex));
// Conclusion markers that should never be mapped as variables
const conclusionMarkers = ['therefore', 'thus', 'hence', 'so', 'consequently',
'ergo', 'wherefore', 'accordingly', 'conclusion'];
for (const match of simpleMatches) {
const statement = match[0].trim();
// Skip if it's an operator, conclusion marker, or already mapped
if (this.isLogicalOperator(statement) ||
conclusionMarkers.includes(statement.toLowerCase()) ||
this.variableMap.has(statement)) {
continue;
}
// Skip if this word is part of any already-mapped statement (case-insensitive check)
let isPartOfMappedStatement = false;
const lowerStatement = statement.toLowerCase();
for (const mappedStatement of this.variableMap.keys()) {
const lowerMapped = mappedStatement.toLowerCase();
// Check if this word appears in the mapped statement (case-insensitive)
if (lowerMapped.includes(lowerStatement) && lowerMapped !== lowerStatement) {
isPartOfMappedStatement = true;
break;
}
// Also check if there's a case-insensitive duplicate
if (lowerMapped === lowerStatement && mappedStatement !== statement) {
isPartOfMappedStatement = true;
break;
}
}
if (isPartOfMappedStatement) {
continue;
}
// Map single tokens directly (they're likely already variables)
if (/^[A-Za-z][A-Za-z0-9_]*$/.test(statement)) {
this.variableMap.set(statement, statement);
}
}
return this.variableMap;
}
/**
* Extract common phrase patterns from input
* Handles patterns like "the X is Y", "it is X", "X is Y"
* @param input Input text
*/
private extractPhrasePatterns(input: string): void {
// Define a pattern to exclude logical operators, conclusion markers, and sentence boundaries
// This creates a negative lookahead to stop at these markers
// Also stop at uppercase letters (which typically start new sentences)
const notOperator = '(?!\\s+(?:and|or|not|implies|iff|xor|then|if|therefore|thus|hence|so|consequently)\\b)(?!\\s+[A-Z])';
// Common phrase patterns (ordered from most specific to most general)
// Each pattern now stops before logical operators, conclusion markers, and uppercase letters
const phrasePatterns = [
// "it is X" pattern (e.g., "it is raining")
// Matches: it is [words that aren't operators/conclusion markers/uppercase]
// Limit to reasonable adjectives/participles (1-2 words after "is")
new RegExp(`\\b(it\\s+is\\s+[a-z]+(?:${notOperator}\\s+[a-z]+)?)\\b`, 'gi'),
// "the X is Y" pattern (e.g., "the ground is wet", "the sky is blue")
// Matches: the [1-2 words] is [1-2 words that aren't operators/uppercase]
new RegExp(`\\b(the\\s+[a-z]+(?:\\s+[a-z]+)?\\s+is\\s+[a-z]+(?:${notOperator}\\s+[a-z]+)?)\\b`, 'gi'),
// "the X are Y" pattern (e.g., "the trees are green")
new RegExp(`\\b(the\\s+[a-z]+(?:\\s+[a-z]+)?\\s+are\\s+[a-z]+(?:${notOperator}\\s+[a-z]+)?)\\b`, 'gi'),
// "X is Y" pattern without "the" (e.g., "grass is green")
// Only match if X is not a common article/pronoun
new RegExp(`\\b((?:(?!it|he|she|we|they|there)\\b)[a-z]+(?:\\s+[a-z]+)?\\s+is\\s+[a-z]+(?:${notOperator}\\s+[a-z]+)?)\\b`, 'gi'),
// "X are Y" pattern without "the" (e.g., "birds are flying")
new RegExp(`\\b((?:(?!they|we|there)\\b)[a-z]+(?:\\s+[a-z]+)?\\s+are\\s+[a-z]+(?:${notOperator}\\s+[a-z]+)?)\\b`, 'gi'),
// "X verb Y" pattern for other common verbs
new RegExp(`\\b([a-z]+\\s+(?:has|have|will|can|must|should|would)\\s+[a-z]+(?:${notOperator}\\s+[a-z]+)?)\\b`, 'gi'),
];
// Track all extracted phrases to avoid duplicates
const extractedPhrases = new Set<string>();
for (const pattern of phrasePatterns) {
const matches = Array.from(input.matchAll(pattern));
for (const match of matches) {
const phrase = match[1].trim();
const lowerPhrase = phrase.toLowerCase();
// Skip if already mapped, is a complex formula, or already extracted (case-insensitive)
if (this.variableMap.has(phrase) ||
this.isComplexFormula(phrase) ||
extractedPhrases.has(phrase) ||
extractedPhrases.has(lowerPhrase)) {
continue;
}
// Skip if this phrase is a substring of an already-mapped phrase (case-insensitive)
let isSubphrase = false;
for (const mappedPhrase of this.variableMap.keys()) {
const lowerMapped = mappedPhrase.toLowerCase();
if ((lowerMapped.includes(lowerPhrase) && lowerMapped !== lowerPhrase) ||
(lowerMapped === lowerPhrase && mappedPhrase !== phrase)) {
isSubphrase = true;
break;
}
}
if (isSubphrase) {
continue;
}
// Skip if it contains logical operators (it's not a simple statement)
if (this.containsLogicalOperators(phrase)) {
continue;
}
// Map this phrase
const variableName = this.createVariableName(phrase);
this.variableMap.set(phrase, variableName);
extractedPhrases.add(lowerPhrase); // Store lowercase for deduplication
}
}
}
/**
* Check if a phrase contains logical operators or conclusion markers (for phrase extraction)
* @param phrase The phrase to check
* @returns Whether the phrase contains logical operators or conclusion markers
*/
private containsLogicalOperators(phrase: string): boolean {
const operatorWords = ['and', 'or', 'not', 'implies', 'iff', 'xor', 'then', 'if',
'therefore', 'thus', 'hence', 'so', 'consequently'];
const lowerPhrase = phrase.toLowerCase();
for (const op of operatorWords) {
// Use word boundaries to avoid matching partial words
const regex = new RegExp(`\\b${op}\\b`);
if (regex.test(lowerPhrase)) {
return true;
}
}
return false;
}
/**
* Create a meaningful variable name from a statement
* @param statement The statement to create a variable name for
* @returns A variable name
*/
private createVariableName(statement: string): string {
// If it's already a simple variable, use it directly
if (/^[A-Za-z][A-Za-z0-9_]*$/.test(statement)) {
return statement;
}
// Try to extract a meaningful name by:
// 1. Looking for the subject and verb
// 2. Capitalizing the first letter
// 3. Removing spaces and special characters
// Extract key words by removing stop words
const stopWords = ['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'to', 'of', 'and', 'or', 'not'];
const words = statement.split(/\s+/);
const significantWords = words.filter(word => !stopWords.includes(word.toLowerCase()));
if (significantWords.length > 0) {
// Take the first significant word and capitalize it
const base = significantWords[0].replace(/[^a-zA-Z0-9]/g, '');
if (base.length > 0) {
return base.charAt(0).toUpperCase() + base.slice(1);
}
}
// Fallback: use auto-generated variable name
return `Var${++this.nextVarIndex}`;
}
/**
* Replace statements with their corresponding variables
* @param input Input text
* @returns Text with statements replaced by variables
*/
private replaceStatementsWithVariables(input: string): string {
let result = input;
// Sort statements by length (desc) to avoid partial replacements
const sortedStatements = Array.from(this.variableMap.keys())
.sort((a, b) => b.length - a.length);
for (const statement of sortedStatements) {
const variable = this.variableMap.get(statement)!;
// Replace full statements in parentheses
const regex = new RegExp(`\\(${this.escapeRegExp(statement)}\\)`, 'g');
result = result.replace(regex, variable);
// Also replace the statement if it appears outside parentheses
// but only if it's a complete word
const wordRegex = new RegExp(`\\b${this.escapeRegExp(statement)}\\b`, 'g');
result = result.replace(wordRegex, variable);
}
return result;
}
/**
* Check if a statement contains logical operators (is a complex formula)
* @param statement The statement to check
* @returns Whether the statement is a complex formula
*/
private isComplexFormula(statement: string): boolean {
const operatorTerms = ['and', 'or', 'not', 'implies', 'iff', 'xor',
'∧', '∨', '¬', '→', '↔', '⊕'];
for (const op of operatorTerms) {
if (statement.includes(op)) {
return true;
}
}
return false;
}
/**
* Check if a token is a logical operator
* @param token The token to check
* @returns Whether the token is a logical operator
*/
private isLogicalOperator(token: string): boolean {
const operators = ['and', 'or', 'not', 'implies', 'iff', 'xor',
'∧', '∨', '¬', '→', '↔', '⊕'];
return operators.includes(token.toLowerCase());
}
/**
* Escape special regex characters in a string
* @param string String to escape
* @returns Escaped string
*/
private escapeRegExp(string: string): string {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Tokenize the input string
* @param input Input string
* @returns Array of tokens
*/
private tokenize(input: string): string[] {
// Replace Unicode symbols and alternative operators with text equivalents
const normalized = input
.replace(/∧/g, 'and')
.replace(/∨/g, 'or')
.replace(/¬/g, 'not')
.replace(/→/g, 'implies')
.replace(/↔/g, 'iff')
.replace(/⊕/g, 'xor')
// Support alternative operators
.replace(/&&/g, 'and')
.replace(/\|\|/g, 'or')
.replace(/!/g, 'not ')
.replace(/->/g, ' implies ')
.replace(/:/g, ' implies ')
.replace(/=/g, ' iff ')
.replace(/\biff\b/g, ' iff ')
.replace(/\band\b/g, ' and ')
.replace(/\bor\b/g, ' or ')
.replace(/\bnot\b/g, ' not ')
.replace(/\bimplies\b/g, ' implies ')
.replace(/\bxor\b/g, ' xor ');
// Split into tokens
return normalized
.replace(/\(/g, ' ( ')
.replace(/\)/g, ' ) ')
.split(/\s+/)
.filter(token => token.length > 0);
}
/**
* Parse an expression from tokens
* @param tokens Array of tokens
* @returns Parsed propositional formula
*/
private parseExpression(tokens: string[]): PropositionalFormula {
return this.parseImplication(tokens, 0).formula;
}
/**
* Parse an implication expression (lowest precedence)
* @param tokens Array of tokens
* @param index Current index
* @returns Parse result
*/
private parseImplication(tokens: string[], index: number): {
formula: PropositionalFormula;
nextIndex: number;
} {
let result = this.parseIff(tokens, index);
while (
result.nextIndex < tokens.length &&
(tokens[result.nextIndex] === 'implies' || tokens[result.nextIndex] === '→')
) {
const operator = this.getOperator(tokens[result.nextIndex]);
result.nextIndex++; // Skip operator
const right = this.parseImplication(tokens, result.nextIndex);
result = {
formula: {
type: 'binary',
operator: operator as Exclude<PropositionalOperator, 'not' | '¬'>,
left: result.formula,
right: right.formula
},
nextIndex: right.nextIndex
};
}
return result;
}
/**
* Parse an iff/xor expression
* @param tokens Array of tokens
* @param index Current index
* @returns Parse result
*/
private parseIff(tokens: string[], index: number): {
formula: PropositionalFormula;
nextIndex: number;
} {
let result = this.parseOr(tokens, index);
while (
result.nextIndex < tokens.length &&
(
tokens[result.nextIndex] === 'iff' ||
tokens[result.nextIndex] === '↔' ||
tokens[result.nextIndex] === 'xor' ||
tokens[result.nextIndex] === '⊕'
)
) {
const operator = this.getOperator(tokens[result.nextIndex]);
result.nextIndex++; // Skip operator
const right = this.parseIff(tokens, result.nextIndex);
result = {
formula: {
type: 'binary',
operator: operator as Exclude<PropositionalOperator, 'not' | '¬'>,
left: result.formula,
right: right.formula
},
nextIndex: right.nextIndex
};
}
return result;
}
/**
* Parse an or expression
* @param tokens Array of tokens
* @param index Current index
* @returns Parse result
*/
private parseOr(tokens: string[], index: number): {
formula: PropositionalFormula;
nextIndex: number;
} {
let result = this.parseAnd(tokens, index);
while (
result.nextIndex < tokens.length &&
(tokens[result.nextIndex] === 'or' || tokens[result.nextIndex] === '∨')
) {
const operator = this.getOperator(tokens[result.nextIndex]);
result.nextIndex++; // Skip operator
const right = this.parseOr(tokens, result.nextIndex);
result = {
formula: {
type: 'binary',
operator: operator as Exclude<PropositionalOperator, 'not' | '¬'>,
left: result.formula,
right: right.formula
},
nextIndex: right.nextIndex
};
}
return result;
}
/**
* Parse an and expression
* @param tokens Array of tokens
* @param index Current index
* @returns Parse result
*/
private parseAnd(tokens: string[], index: number): {
formula: PropositionalFormula;
nextIndex: number;
} {
let result = this.parseFactor(tokens, index);
while (
result.nextIndex < tokens.length &&
(tokens[result.nextIndex] === 'and' || tokens[result.nextIndex] === '∧')
) {
const operator = this.getOperator(tokens[result.nextIndex]);
result.nextIndex++; // Skip operator
const right = this.parseAnd(tokens, result.nextIndex);
result = {
formula: {
type: 'binary',
operator: operator as Exclude<PropositionalOperator, 'not' | '¬'>,
left: result.formula,
right: right.formula
},
nextIndex: right.nextIndex
};
}
return result;
}
/**
* Parse a factor (not, variable, or parenthesized expression)
* @param tokens Array of tokens
* @param index Current index
* @returns Parse result
*/
private parseFactor(tokens: string[], index: number): {
formula: PropositionalFormula;
nextIndex: number;
} {
if (index >= tokens.length) {
throw new Error('Unexpected end of input');
}
const token = tokens[index];
// Handle negation
if (token === 'not' || token === '¬') {
const result = this.parseFactor(tokens, index + 1);
return {
formula: {
type: 'unary',
operator: this.getOperator(token) as 'not' | '¬',
operand: result.formula
},
nextIndex: result.nextIndex
};
}
// Handle parenthesized expression
if (token === '(') {
let depth = 1;
let closingIndex = index + 1;
while (depth > 0 && closingIndex < tokens.length) {
if (tokens[closingIndex] === '(') {
depth++;
} else if (tokens[closingIndex] === ')') {
depth--;
}
if (depth > 0) {
closingIndex++;
}
}
if (depth > 0 || tokens[closingIndex] !== ')') {
throw new Error('Mismatched parentheses');
}
// Parse the tokens inside the parentheses
const subTokens = tokens.slice(index + 1, closingIndex);
const result = this.parseExpression(subTokens);
return {
formula: result,
nextIndex: closingIndex + 1
};
}
// Handle variable
if (this.isVariable(token)) {
return {
formula: {
type: 'variable',
name: token
},
nextIndex: index + 1
};
}
throw new Error(`Unexpected token: ${token}`);
}
/**
* Check if a token is a variable name
* @param token Token to check
* @returns Whether the token is a variable
*/
private isVariable(token: string): boolean {
// Allow multi-character variables (letters, numbers, underscores)
return /^[A-Za-z][A-Za-z0-9_]*$/.test(token) && !this.isLogicalOperator(token);
}
/**
* Get standardized operator
* @param token Operator token
* @returns Standardized operator
*/
private getOperator(token: string): PropositionalOperator {
switch (token.toLowerCase()) {
case 'and':
case '∧':
return 'and';
case 'or':
case '∨':
return 'or';
case 'not':
case '¬':
return 'not';
case 'implies':
case '→':
return 'implies';
case 'iff':
case '↔':
return 'iff';
case 'xor':
case '⊕':
return 'xor';
default:
throw new Error(`Unknown operator: ${token}`);
}
}
/**
* Enhance error messages with helpful information
* @param error Original error
* @param input Original input
* @returns Enhanced error
*/
private enhanceError(error: any, input: string): Error {
const errorMessage = error instanceof Error ? error.message : String(error);
if (errorMessage.includes('Unexpected token')) {
const token = errorMessage.match(/Unexpected token: (.*)/)?.[1] || 'unknown';
return new Error(`Syntax error: "${token}" is not a valid propositional logic token.
Allowed tokens:
• Variables: Letters, numbers, and underscores (must start with a letter)
• Operators: and, or, not, implies, iff, xor
• Symbols: ∧, ∨, ¬, →, ↔, ⊕
• Parentheses: ( )
Valid examples:
✓ Rains and Wet
✓ (Rains or Cloudy) implies Wet
✓ not Sunny and Warm
Common patterns:
- "if X then Y" becomes "X implies Y"
- "X unless Y" becomes "Y or X"
- "either X or Y" becomes "X or Y"
- "both X and Y" becomes "X and Y"
Your input: "${input}"`);
}
if (errorMessage.includes('Mismatched parentheses')) {
return new Error(`Syntax error: Mismatched parentheses.
Check that each opening parenthesis '(' has a matching closing parenthesis ')'.
Your input: "${input}"`);
}
if (errorMessage.includes('Unexpected end of input')) {
return new Error(`Syntax error: Incomplete expression.
The expression appears to be incomplete. Check for:
• Missing operands (e.g., "P and" is incomplete)
• Unclosed parentheses
• Missing right-hand side of binary operators
Your input: "${input}"`);
}
// Default enhanced error
return new Error(`Syntax error: ${errorMessage}
Please check your input follows propositional logic syntax.
Use "Rains and Wet", "Sunny or Cloudy", "not Raining", "Cloudy implies Wet", etc.
Natural language patterns are supported:
- "if it rains then the streets are wet"
- "it's either sunny or cloudy"
- "the streets are wet only if it rains"
Your input: "${input}"`);
}
/**
* Format the variable mapping as a string for display
* @returns Formatted variable mapping
*/
getVariableMappingString(): string {
let result = "Variable mapping:\n";
for (const [statement, variable] of this.variableMap.entries()) {
result += `• ${variable}: ${statement}\n`;
}
return result;
}
}