Logic-Thinking MCP Server

PROBABILISTIC_DSL_PARSER_DESIGN.md•34.8 KiB

# PDSL Parser Design **Version:** 0.1.0 **Architecture:** Multi-stage compilation pipeline This document describes the complete parser architecture for converting PDSL to ProbLog. ## Table of Contents 1. [Architecture Overview](#architecture-overview) 2. [Lexical Analysis](#lexical-analysis) 3. [Syntactic Analysis](#syntactic-analysis) 4. [Semantic Analysis](#semantic-analysis) 5. [Type Checking](#type-checking) 6. [Code Generation](#code-generation) 7. [Error Handling](#error-handling) 8. [Testing Strategy](#testing-strategy) ## Architecture Overview ### Pipeline Stages ``` ┌─────────────────────────────────────────────────────────────┐ │ PDSL Compilation Pipeline │ └─────────────────────────────────────────────────────────────┘ Source Text │ ▼ ┌──────────────┐ │ LEXER │ Tokenization │ (Scanner) │ → Stream of tokens └──────────────┘ │ ▼ ┌──────────────┐ │ PARSER │ Syntactic Analysis │ (Recursive │ → Abstract Syntax Tree (AST) │ Descent) │ └──────────────┘ │ ▼ ┌──────────────┐ │ SEMANTIC │ Semantic Analysis │ ANALYZER │ → Annotated AST └──────────────┘ │ ▼ ┌──────────────┐ │ TYPE │ Type Checking │ CHECKER │ → Type-validated AST └──────────────┘ │ ▼ ┌──────────────┐ │ CODE │ ProbLog Generation │ GENERATOR │ → ProbLog program └──────────────┘ │ ▼ ProbLog Output ``` ### Module Structure ``` src/probabilistic/ ├── index.ts # Main entry point ├── lexer/ │ ├── lexer.ts # Tokenizer │ ├── token.ts # Token definitions │ └── tokenTypes.ts # Token type enums ├── parser/ │ ├── parser.ts # Recursive descent parser │ ├── ast.ts # AST node definitions │ └── parserUtils.ts # Helper functions ├── semantics/ │ ├── analyzer.ts # Semantic analysis │ ├── symbolTable.ts # Symbol table for scoping │ └── validator.ts # Validation rules ├── types/ │ ├── typeChecker.ts # Type checking │ ├── typeInference.ts # Type inference │ └── types.ts # Type definitions ├── codegen/ │ ├── generator.ts # ProbLog code generator │ ├── optimizer.ts # AST optimization │ └── templates.ts # Code templates └── errors/ ├── errorHandler.ts # Error reporting └── errorTypes.ts # Error type definitions ``` ## Lexical Analysis ### Token Types ```typescript // src/probabilistic/lexer/tokenTypes.ts export enum TokenType { // Keywords PROBABILISTIC_MODEL = 'PROBABILISTIC_MODEL', OBSERVE = 'OBSERVE', QUERY = 'QUERY', LEARN = 'LEARN', PARAMETERS = 'PARAMETERS', FROM = 'FROM', DATASET = 'DATASET', NOT = 'NOT', TRUE = 'TRUE', FALSE = 'FALSE', // Identifiers and literals VARIABLE = 'VARIABLE', // X, Y, Person CONSTANT = 'CONSTANT', // alice, flu PROBABILITY = 'PROBABILITY', // 0.7, 0.95 NUMBER = 'NUMBER', // 42, 3.14 STRING = 'STRING', // "file.csv" // Operators PROB_ANNOTATION = 'PROB_ANNOTATION', // :: IMPLICATION = 'IMPLICATION', // :- COMMA = 'COMMA', // , SEMICOLON = 'SEMICOLON', // ; // Delimiters LPAREN = 'LPAREN', // ( RPAREN = 'RPAREN', // ) LBRACE = 'LBRACE', // { RBRACE = 'RBRACE', // } LBRACKET = 'LBRACKET', // [ RBRACKET = 'RBRACKET', // ] // Special COMMENT = 'COMMENT', // # comment NEWLINE = 'NEWLINE', // \n EOF = 'EOF', // End of file UNKNOWN = 'UNKNOWN' // Error token } export interface Token { type: TokenType; value: string; line: number; column: number; length: number; } ``` ### Lexer Implementation ```typescript // src/probabilistic/lexer/lexer.ts export class Lexer { private source: string; private position: number = 0; private line: number = 1; private column: number = 1; private tokens: Token[] = []; constructor(source: string) { this.source = source; } public tokenize(): Token[] { while (!this.isAtEnd()) { this.skipWhitespace(); if (this.isAtEnd()) break; const token = this.scanToken(); if (token.type !== TokenType.COMMENT) { this.tokens.push(token); } } this.tokens.push(this.createToken(TokenType.EOF, '')); return this.tokens; } private scanToken(): Token { const char = this.peek(); // Comments if (char === '#') { return this.scanComment(); } // Two-character operators if (char === ':' && this.peekNext() === ':') { return this.scanTwoChar(TokenType.PROB_ANNOTATION, '::'); } if (char === ':' && this.peekNext() === '-') { return this.scanTwoChar(TokenType.IMPLICATION, ':-'); } // Numbers (including probabilities) if (this.isDigit(char)) { return this.scanNumber(); } // Strings if (char === '"') { return this.scanString(); } // Keywords, identifiers, variables if (this.isAlpha(char)) { return this.scanIdentifier(); } // Single-character tokens switch (char) { case '(': return this.scanSingleChar(TokenType.LPAREN); case ')': return this.scanSingleChar(TokenType.RPAREN); case '{': return this.scanSingleChar(TokenType.LBRACE); case '}': return this.scanSingleChar(TokenType.RBRACE); case '[': return this.scanSingleChar(TokenType.LBRACKET); case ']': return this.scanSingleChar(TokenType.RBRACKET); case ',': return this.scanSingleChar(TokenType.COMMA); case ';': return this.scanSingleChar(TokenType.SEMICOLON); default: return this.createToken(TokenType.UNKNOWN, char); } } private scanNumber(): Token { const start = this.position; const startColumn = this.column; while (this.isDigit(this.peek())) { this.advance(); } // Decimal point if (this.peek() === '.' && this.isDigit(this.peekNext())) { this.advance(); // consume '.' while (this.isDigit(this.peek())) { this.advance(); } } const value = this.source.substring(start, this.position); const numValue = parseFloat(value); // Determine if it's a probability or general number const type = (numValue >= 0 && numValue <= 1) ? TokenType.PROBABILITY : TokenType.NUMBER; return { type, value, line: this.line, column: startColumn, length: value.length }; } private scanString(): Token { const start = this.position; const startColumn = this.column; this.advance(); // opening " while (!this.isAtEnd() && this.peek() !== '"') { if (this.peek() === '\n') this.line++; this.advance(); } if (this.isAtEnd()) { throw new LexerError('Unterminated string', this.line, startColumn); } this.advance(); // closing " const value = this.source.substring(start + 1, this.position - 1); return { type: TokenType.STRING, value, line: this.line, column: startColumn, length: this.position - start }; } private scanIdentifier(): Token { const start = this.position; const startColumn = this.column; while (this.isAlphaNumeric(this.peek()) || this.peek() === '_') { this.advance(); } const value = this.source.substring(start, this.position); // Check for keywords const type = this.getKeywordType(value); // If not keyword, determine if variable or constant if (type === TokenType.CONSTANT && this.isUpperCase(value[0])) { return { type: TokenType.VARIABLE, value, line: this.line, column: startColumn, length: value.length }; } return { type, value, line: this.line, column: startColumn, length: value.length }; } private getKeywordType(value: string): TokenType { const keywords: Record<string, TokenType> = { 'probabilistic_model': TokenType.PROBABILISTIC_MODEL, 'observe': TokenType.OBSERVE, 'query': TokenType.QUERY, 'learn': TokenType.LEARN, 'parameters': TokenType.PARAMETERS, 'from': TokenType.FROM, 'dataset': TokenType.DATASET, 'not': TokenType.NOT, 'true': TokenType.TRUE, 'false': TokenType.FALSE, }; return keywords[value] || TokenType.CONSTANT; } // Helper methods private isAlpha(char: string): boolean { return /[a-zA-Z]/.test(char); } private isDigit(char: string): boolean { return /[0-9]/.test(char); } private isAlphaNumeric(char: string): boolean { return this.isAlpha(char) || this.isDigit(char); } private isUpperCase(char: string): boolean { return /[A-Z]/.test(char); } private peek(): string { if (this.isAtEnd()) return '\0'; return this.source[this.position]; } private peekNext(): string { if (this.position + 1 >= this.source.length) return '\0'; return this.source[this.position + 1]; } private advance(): string { const char = this.source[this.position++]; this.column++; return char; } private isAtEnd(): boolean { return this.position >= this.source.length; } private skipWhitespace(): void { while (!this.isAtEnd()) { const char = this.peek(); if (char === ' ' || char === '\t' || char === '\r') { this.advance(); } else if (char === '\n') { this.line++; this.column = 1; this.advance(); } else { break; } } } } ``` ## Syntactic Analysis ### AST Node Definitions ```typescript // src/probabilistic/parser/ast.ts export type ASTNode = | Program | Model | Statement | ProbabilisticFact | ProbabilisticRule | DeterministicFact | AnnotatedDisjunction | Observation | Query | LearningDirective | Atom | Literal | Term; export interface Program { type: 'Program'; models: Model[]; location: Location; } export interface Model { type: 'Model'; name: string; statements: Statement[]; location: Location; } export type Statement = | ProbabilisticFact | ProbabilisticRule | DeterministicFact | AnnotatedDisjunction | Observation | Query | LearningDirective; export interface ProbabilisticFact { type: 'ProbabilisticFact'; probability: number; atom: Atom; location: Location; } export interface ProbabilisticRule { type: 'ProbabilisticRule'; probability: number; head: Atom; body: Literal[]; location: Location; } export interface DeterministicFact { type: 'DeterministicFact'; atom: Atom; location: Location; } export interface AnnotatedDisjunction { type: 'AnnotatedDisjunction'; choices: ProbabilisticFact[]; location: Location; } export interface Observation { type: 'Observation'; literal: Literal; location: Location; } export interface Query { type: 'Query'; atom: Atom; location: Location; } export interface LearningDirective { type: 'LearningDirective'; dataset: string; location: Location; } export interface Atom { type: 'Atom'; predicate: string; arguments: Term[]; location: Location; } export interface Literal { type: 'Literal'; negated: boolean; atom: Atom; location: Location; } export type Term = | Variable | Constant | Number | Atom; export interface Variable { type: 'Variable'; name: string; location: Location; } export interface Constant { type: 'Constant'; value: string; location: Location; } export interface Number { type: 'Number'; value: number; location: Location; } export interface Location { line: number; column: number; length: number; } ``` ### Parser Implementation ```typescript // src/probabilistic/parser/parser.ts export class Parser { private tokens: Token[]; private current: number = 0; constructor(tokens: Token[]) { this.tokens = tokens; } public parse(): Program { const models: Model[] = []; while (!this.isAtEnd()) { models.push(this.parseModel()); } return { type: 'Program', models, location: this.getLocation() }; } private parseModel(): Model { this.consume(TokenType.PROBABILISTIC_MODEL, "Expected 'probabilistic_model'"); const nameToken = this.consume(TokenType.CONSTANT, "Expected model name"); const name = nameToken.value; this.consume(TokenType.LBRACE, "Expected '{'"); const statements: Statement[] = []; while (!this.check(TokenType.RBRACE) && !this.isAtEnd()) { statements.push(this.parseStatement()); } this.consume(TokenType.RBRACE, "Expected '}'"); return { type: 'Model', name, statements, location: this.getLocation() }; } private parseStatement(): Statement { // Check for observation if (this.check(TokenType.OBSERVE)) { return this.parseObservation(); } // Check for query if (this.check(TokenType.QUERY)) { return this.parseQuery(); } // Check for learning directive if (this.check(TokenType.LEARN)) { return this.parseLearningDirective(); } // Check for probabilistic fact/rule or annotated disjunction if (this.check(TokenType.PROBABILITY)) { return this.parseProbabilisticStatement(); } // Otherwise, deterministic fact return this.parseDeterministicFact(); } private parseProbabilisticStatement(): Statement { const firstProb = this.advance(); this.consume(TokenType.PROB_ANNOTATION, "Expected '::'"); const firstAtom = this.parseAtom(); // Check for annotated disjunction if (this.check(TokenType.SEMICOLON)) { return this.parseAnnotatedDisjunction(firstProb, firstAtom); } // Check for rule if (this.check(TokenType.IMPLICATION)) { this.advance(); // consume :- const body = this.parseBody(); return { type: 'ProbabilisticRule', probability: parseFloat(firstProb.value), head: firstAtom, body, location: this.getLocation() }; } // Simple probabilistic fact return { type: 'ProbabilisticFact', probability: parseFloat(firstProb.value), atom: firstAtom, location: this.getLocation() }; } private parseAnnotatedDisjunction( firstProb: Token, firstAtom: Atom ): AnnotatedDisjunction { const choices: ProbabilisticFact[] = [{ type: 'ProbabilisticFact', probability: parseFloat(firstProb.value), atom: firstAtom, location: this.getLocation() }]; while (this.match(TokenType.SEMICOLON)) { const prob = this.consume(TokenType.PROBABILITY, "Expected probability"); this.consume(TokenType.PROB_ANNOTATION, "Expected '::'"); const atom = this.parseAtom(); choices.push({ type: 'ProbabilisticFact', probability: parseFloat(prob.value), atom, location: this.getLocation() }); } return { type: 'AnnotatedDisjunction', choices, location: this.getLocation() }; } private parseDeterministicFact(): DeterministicFact { const atom = this.parseAtom(); return { type: 'DeterministicFact', atom, location: this.getLocation() }; } private parseObservation(): Observation { this.consume(TokenType.OBSERVE, "Expected 'observe'"); const literal = this.parseLiteral(); return { type: 'Observation', literal, location: this.getLocation() }; } private parseQuery(): Query { this.consume(TokenType.QUERY, "Expected 'query'"); const atom = this.parseAtom(); return { type: 'Query', atom, location: this.getLocation() }; } private parseLearningDirective(): LearningDirective { this.consume(TokenType.LEARN, "Expected 'learn'"); this.consume(TokenType.PARAMETERS, "Expected 'parameters'"); this.consume(TokenType.FROM, "Expected 'from'"); this.consume(TokenType.DATASET, "Expected 'dataset'"); this.consume(TokenType.LPAREN, "Expected '('"); const datasetToken = this.consume(TokenType.STRING, "Expected dataset path"); this.consume(TokenType.RPAREN, "Expected ')'"); return { type: 'LearningDirective', dataset: datasetToken.value, location: this.getLocation() }; } private parseBody(): Literal[] { const literals: Literal[] = []; literals.push(this.parseLiteral()); while (this.match(TokenType.COMMA)) { literals.push(this.parseLiteral()); } return literals; } private parseLiteral(): Literal { const negated = this.match(TokenType.NOT); const atom = this.parseAtom(); return { type: 'Literal', negated, atom, location: this.getLocation() }; } private parseAtom(): Atom { const predicateToken = this.consume(TokenType.CONSTANT, "Expected predicate name"); const predicate = predicateToken.value; let args: Term[] = []; if (this.match(TokenType.LPAREN)) { args = this.parseArgumentList(); this.consume(TokenType.RPAREN, "Expected ')'"); } return { type: 'Atom', predicate, arguments: args, location: this.getLocation() }; } private parseArgumentList(): Term[] { const args: Term[] = []; args.push(this.parseTerm()); while (this.match(TokenType.COMMA)) { args.push(this.parseTerm()); } return args; } private parseTerm(): Term { if (this.check(TokenType.VARIABLE)) { const token = this.advance(); return { type: 'Variable', name: token.value, location: this.getLocation() }; } if (this.check(TokenType.NUMBER) || this.check(TokenType.PROBABILITY)) { const token = this.advance(); return { type: 'Number', value: parseFloat(token.value), location: this.getLocation() }; } if (this.check(TokenType.CONSTANT)) { // Could be a constant or a nested atom const lookahead = this.peek(1); if (lookahead && lookahead.type === TokenType.LPAREN) { return this.parseAtom(); } const token = this.advance(); return { type: 'Constant', value: token.value, location: this.getLocation() }; } throw new ParseError( `Expected term, got ${this.peek().type}`, this.peek().line, this.peek().column ); } // Helper methods private match(...types: TokenType[]): boolean { for (const type of types) { if (this.check(type)) { this.advance(); return true; } } return false; } private check(type: TokenType): boolean { if (this.isAtEnd()) return false; return this.peek().type === type; } private advance(): Token { if (!this.isAtEnd()) this.current++; return this.previous(); } private isAtEnd(): boolean { return this.peek().type === TokenType.EOF; } private peek(offset: number = 0): Token { return this.tokens[this.current + offset]; } private previous(): Token { return this.tokens[this.current - 1]; } private consume(type: TokenType, message: string): Token { if (this.check(type)) return this.advance(); throw new ParseError(message, this.peek().line, this.peek().column); } private getLocation(): Location { const token = this.previous(); return { line: token.line, column: token.column, length: token.length }; } } ``` ## Semantic Analysis ### Symbol Table ```typescript // src/probabilistic/semantics/symbolTable.ts export interface Symbol { name: string; type: SymbolType; arity: number; defined: Location; usages: Location[]; } export enum SymbolType { PREDICATE = 'PREDICATE', CONSTANT = 'CONSTANT', VARIABLE = 'VARIABLE' } export class SymbolTable { private symbols: Map<string, Symbol> = new Map(); private parent: SymbolTable | null = null; constructor(parent: SymbolTable | null = null) { this.parent = parent; } public define(name: string, type: SymbolType, arity: number, location: Location): void { const key = type === SymbolType.PREDICATE ? `${name}/${arity}` : name; if (this.symbols.has(key)) { const existing = this.symbols.get(key)!; existing.usages.push(location); } else { this.symbols.set(key, { name, type, arity, defined: location, usages: [] }); } } public lookup(name: string, arity?: number): Symbol | null { const key = arity !== undefined ? `${name}/${arity}` : name; if (this.symbols.has(key)) { return this.symbols.get(key)!; } if (this.parent) { return this.parent.lookup(name, arity); } return null; } public getAllSymbols(): Symbol[] { return Array.from(this.symbols.values()); } } ``` ### Semantic Analyzer ```typescript // src/probabilistic/semantics/analyzer.ts export class SemanticAnalyzer { private symbolTable: SymbolTable; private errors: SemanticError[] = []; constructor() { this.symbolTable = new SymbolTable(); } public analyze(ast: Program): AnalysisResult { this.errors = []; for (const model of ast.models) { this.analyzeModel(model); } return { success: this.errors.length === 0, errors: this.errors, symbolTable: this.symbolTable }; } private analyzeModel(model: Model): void { // First pass: collect all predicate definitions for (const stmt of model.statements) { this.collectPredicates(stmt); } // Second pass: validate usage for (const stmt of model.statements) { this.validateStatement(stmt); } } private collectPredicates(stmt: Statement): void { switch (stmt.type) { case 'ProbabilisticFact': case 'DeterministicFact': this.defineAtom(stmt.atom); break; case 'ProbabilisticRule': this.defineAtom(stmt.head); for (const literal of stmt.body) { this.defineAtom(literal.atom); } break; case 'AnnotatedDisjunction': for (const choice of stmt.choices) { this.defineAtom(choice.atom); } break; } } private defineAtom(atom: Atom): void { this.symbolTable.define( atom.predicate, SymbolType.PREDICATE, atom.arguments.length, atom.location ); } private validateStatement(stmt: Statement): void { switch (stmt.type) { case 'ProbabilisticFact': this.validateProbability(stmt.probability, stmt.location); this.validateAtom(stmt.atom); break; case 'ProbabilisticRule': this.validateProbability(stmt.probability, stmt.location); this.validateRule(stmt); break; case 'AnnotatedDisjunction': this.validateAnnotatedDisjunction(stmt); break; case 'Observation': this.validateLiteral(stmt.literal); break; case 'Query': this.validateAtom(stmt.atom); break; } } private validateProbability(prob: number, location: Location): void { if (prob < 0 || prob > 1) { this.errors.push({ type: 'InvalidProbability', message: `Probability ${prob} must be between 0.0 and 1.0`, location, suggestion: prob > 1 ? 'Did you mean ' + (prob / 10) + '?' : undefined }); } } private validateAnnotatedDisjunction(ad: AnnotatedDisjunction): void { let sum = 0; for (const choice of ad.choices) { sum += choice.probability; this.validateAtom(choice.atom); } if (sum > 1.0) { this.errors.push({ type: 'InvalidAnnotatedDisjunction', message: `Annotated disjunction probabilities sum to ${sum}, which exceeds 1.0`, location: ad.location, suggestion: 'Probabilities must sum to at most 1.0' }); } } private validateRule(rule: ProbabilisticRule): void { // Collect variables from head const headVars = this.collectVariables(rule.head); // Collect variables from body const bodyVars = new Set<string>(); for (const literal of rule.body) { const vars = this.collectVariables(literal.atom); vars.forEach(v => bodyVars.add(v)); } // Check safety: all head variables must appear in body for (const headVar of headVars) { if (!bodyVars.has(headVar)) { this.errors.push({ type: 'UnsafeVariable', message: `Variable ${headVar} in rule head must appear in rule body`, location: rule.location, suggestion: `Add a literal containing ${headVar} to the rule body` }); } } this.validateAtom(rule.head); for (const literal of rule.body) { this.validateLiteral(literal); } } private collectVariables(atom: Atom): Set<string> { const vars = new Set<string>(); for (const arg of atom.arguments) { if (arg.type === 'Variable') { vars.add(arg.name); } else if (arg.type === 'Atom') { const nested = this.collectVariables(arg); nested.forEach(v => vars.add(v)); } } return vars; } private validateAtom(atom: Atom): void { // Check arity consistency const symbol = this.symbolTable.lookup(atom.predicate, atom.arguments.length); if (symbol && symbol.arity !== atom.arguments.length) { this.errors.push({ type: 'ArityMismatch', message: `Predicate ${atom.predicate} used with ${atom.arguments.length} arguments, but defined with ${symbol.arity}`, location: atom.location, suggestion: `Use ${symbol.arity} arguments for ${atom.predicate}` }); } } private validateLiteral(literal: Literal): void { this.validateAtom(literal.atom); } } ``` ## Type Checking ```typescript // src/probabilistic/types/typeChecker.ts export class TypeChecker { private errors: TypeError[] = []; public check(ast: Program): TypeCheckResult { this.errors = []; for (const model of ast.models) { this.checkModel(model); } return { success: this.errors.length === 0, errors: this.errors }; } private checkModel(model: Model): void { for (const stmt of model.statements) { this.checkStatement(stmt); } } private checkStatement(stmt: Statement): void { // Type checking is mostly done during semantic analysis // Here we can add additional type-specific checks switch (stmt.type) { case 'ProbabilisticFact': case 'ProbabilisticRule': // Ensure probability is numeric if (typeof stmt.probability !== 'number') { this.errors.push({ type: 'TypeError', message: 'Probability must be a number', location: stmt.location }); } break; } } } ``` ## Code Generation ```typescript // src/probabilistic/codegen/generator.ts export class CodeGenerator { private output: string[] = []; private indentLevel: number = 0; public generate(ast: Program): string { this.output = []; this.indentLevel = 0; for (const model of ast.models) { this.generateModel(model); } return this.output.join('\n'); } private generateModel(model: Model): void { this.emit(`% Model: ${model.name}`); this.emit(''); for (const stmt of model.statements) { this.generateStatement(stmt); } this.emit(''); } private generateStatement(stmt: Statement): void { switch (stmt.type) { case 'ProbabilisticFact': this.emit(`${stmt.probability}::${this.generateAtom(stmt.atom)}.`); break; case 'ProbabilisticRule': this.emit( `${stmt.probability}::${this.generateAtom(stmt.head)} :- ${this.generateBody(stmt.body)}.` ); break; case 'DeterministicFact': this.emit(`${this.generateAtom(stmt.atom)}.`); break; case 'AnnotatedDisjunction': const choices = stmt.choices .map(c => `${c.probability}::${this.generateAtom(c.atom)}`) .join('; '); this.emit(`${choices}.`); break; case 'Observation': const atomStr = this.generateAtom(stmt.literal.atom); const value = stmt.literal.negated ? 'false' : 'true'; this.emit(`evidence(${atomStr}, ${value}).`); break; case 'Query': this.emit(`query(${this.generateAtom(stmt.atom)}).`); break; case 'LearningDirective': this.emit(`% Learn parameters from ${stmt.dataset}`); this.emit(`learn(${stmt.dataset}).`); break; } } private generateAtom(atom: Atom): string { if (atom.arguments.length === 0) { return atom.predicate; } const args = atom.arguments.map(arg => this.generateTerm(arg)).join(', '); return `${atom.predicate}(${args})`; } private generateTerm(term: Term): string { switch (term.type) { case 'Variable': return term.name; case 'Constant': return term.value; case 'Number': return term.value.toString(); case 'Atom': return this.generateAtom(term); } } private generateBody(body: Literal[]): string { return body.map(lit => this.generateLiteral(lit)).join(', '); } private generateLiteral(literal: Literal): string { const atom = this.generateAtom(literal.atom); return literal.negated ? `\\+ ${atom}` : atom; } private emit(line: string): void { const indent = ' '.repeat(this.indentLevel); this.output.push(indent + line); } } ``` ## Error Handling ```typescript // src/probabilistic/errors/errorHandler.ts export interface CompilerError { type: string; message: string; location: Location; suggestion?: string; } export class ErrorReporter { private errors: CompilerError[] = []; private source: string; constructor(source: string) { this.source = source; } public report(error: CompilerError): void { this.errors.push(error); } public hasErrors(): boolean { return this.errors.length > 0; } public getErrors(): CompilerError[] { return this.errors; } public formatErrors(): string { return this.errors.map(err => this.formatError(err)).join('\n\n'); } private formatError(error: CompilerError): string { const lines = this.source.split('\n'); const line = lines[error.location.line - 1]; let output = `Error: ${error.message}\n`; output += ` Line ${error.location.line}: ${line}\n`; output += ' ' + ' '.repeat(error.location.column) + '^'.repeat(error.location.length) + '\n'; if (error.suggestion) { output += ` Suggestion: ${error.suggestion}\n`; } return output; } } ``` ## Testing Strategy ### Unit Tests ```typescript // tests/probabilistic/lexer.test.ts describe('Lexer', () => { test('tokenizes probabilities', () => { const lexer = new Lexer('0.7 :: rain'); const tokens = lexer.tokenize(); expect(tokens[0].type).toBe(TokenType.PROBABILITY); expect(tokens[0].value).toBe('0.7'); }); test('tokenizes keywords', () => { const lexer = new Lexer('observe query'); const tokens = lexer.tokenize(); expect(tokens[0].type).toBe(TokenType.OBSERVE); expect(tokens[1].type).toBe(TokenType.QUERY); }); test('distinguishes variables from constants', () => { const lexer = new Lexer('flies(X) bird(sparrow)'); const tokens = lexer.tokenize(); expect(tokens[2].type).toBe(TokenType.VARIABLE); // X expect(tokens[5].type).toBe(TokenType.CONSTANT); // bird }); }); // tests/probabilistic/parser.test.ts describe('Parser', () => { test('parses probabilistic facts', () => { const tokens = new Lexer('0.7 :: rain').tokenize(); const ast = new Parser(tokens).parse(); // Assertions... }); test('parses rules with body', () => { const source = '0.9 :: flies(X) :- bird(X), not penguin(X)'; const tokens = new Lexer(source).tokenize(); const ast = new Parser(tokens).parse(); // Assertions... }); test('parses annotated disjunctions', () => { const source = '0.3 :: a; 0.7 :: b'; const tokens = new Lexer(source).tokenize(); const ast = new Parser(tokens).parse(); // Assertions... }); }); // tests/probabilistic/semantics.test.ts describe('Semantic Analyzer', () => { test('detects unsafe variables', () => { const source = 'flies(X) :- bird(Y)'; const ast = parseSource(source); const result = new SemanticAnalyzer().analyze(ast); expect(result.errors).toHaveLength(1); expect(result.errors[0].type).toBe('UnsafeVariable'); }); test('detects invalid probabilities', () => { const source = '1.5 :: impossible'; const ast = parseSource(source); const result = new SemanticAnalyzer().analyze(ast); expect(result.errors[0].type).toBe('InvalidProbability'); }); }); // tests/probabilistic/codegen.test.ts describe('Code Generator', () => { test('generates valid ProbLog', () => { const source = '0.7 :: rain'; const ast = parseSource(source); const problog = new CodeGenerator().generate(ast); expect(problog).toContain('0.7::rain.'); }); test('translates observations to evidence', () => { const source = 'observe fever'; const ast = parseSource(source); const problog = new CodeGenerator().generate(ast); expect(problog).toContain('evidence(fever, true).'); }); }); ``` ### Integration Tests ```typescript // tests/probabilistic/integration.test.ts describe('End-to-end compilation', () => { test('compiles medical diagnosis example', () => { const source = ` probabilistic_model Medical { 0.01 :: flu 0.9 :: fever :- flu observe fever query flu } `; const result = compilePDSL(source); expect(result.success).toBe(true); expect(result.problog).toContain('0.01::flu.'); expect(result.problog).toContain('evidence(fever, true).'); }); }); ``` ## Performance Considerations 1. **Lexer optimization:** Use string interning for identifiers 2. **Parser optimization:** Implement operator precedence climbing 3. **AST optimization:** Constant folding, dead code elimination 4. **Code generation:** Template caching, incremental compilation ## Implementation Timeline See [PROBABILISTIC_DSL_SUMMARY.md](PROBABILISTIC_DSL_SUMMARY.md) for detailed timeline. --- **This parser design provides a robust foundation for PDSL compilation to ProbLog.**

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/quanticsoul4772/logic-thinking'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

PROBABILISTIC_DSL_PARSER_DESIGN.md•34.8 KiB