# PDSL Parser Design
**Version:** 0.1.0
**Architecture:** Multi-stage compilation pipeline
This document describes the complete parser architecture for converting PDSL to ProbLog.
## Table of Contents
1. [Architecture Overview](#architecture-overview)
2. [Lexical Analysis](#lexical-analysis)
3. [Syntactic Analysis](#syntactic-analysis)
4. [Semantic Analysis](#semantic-analysis)
5. [Type Checking](#type-checking)
6. [Code Generation](#code-generation)
7. [Error Handling](#error-handling)
8. [Testing Strategy](#testing-strategy)
## Architecture Overview
### Pipeline Stages
```
┌─────────────────────────────────────────────────────────────┐
│ PDSL Compilation Pipeline │
└─────────────────────────────────────────────────────────────┘
Source Text
│
▼
┌──────────────┐
│ LEXER │ Tokenization
│ (Scanner) │ → Stream of tokens
└──────────────┘
│
▼
┌──────────────┐
│ PARSER │ Syntactic Analysis
│ (Recursive │ → Abstract Syntax Tree (AST)
│ Descent) │
└──────────────┘
│
▼
┌──────────────┐
│ SEMANTIC │ Semantic Analysis
│ ANALYZER │ → Annotated AST
└──────────────┘
│
▼
┌──────────────┐
│ TYPE │ Type Checking
│ CHECKER │ → Type-validated AST
└──────────────┘
│
▼
┌──────────────┐
│ CODE │ ProbLog Generation
│ GENERATOR │ → ProbLog program
└──────────────┘
│
▼
ProbLog Output
```
### Module Structure
```
src/probabilistic/
├── index.ts # Main entry point
├── lexer/
│ ├── lexer.ts # Tokenizer
│ ├── token.ts # Token definitions
│ └── tokenTypes.ts # Token type enums
├── parser/
│ ├── parser.ts # Recursive descent parser
│ ├── ast.ts # AST node definitions
│ └── parserUtils.ts # Helper functions
├── semantics/
│ ├── analyzer.ts # Semantic analysis
│ ├── symbolTable.ts # Symbol table for scoping
│ └── validator.ts # Validation rules
├── types/
│ ├── typeChecker.ts # Type checking
│ ├── typeInference.ts # Type inference
│ └── types.ts # Type definitions
├── codegen/
│ ├── generator.ts # ProbLog code generator
│ ├── optimizer.ts # AST optimization
│ └── templates.ts # Code templates
└── errors/
├── errorHandler.ts # Error reporting
└── errorTypes.ts # Error type definitions
```
## Lexical Analysis
### Token Types
```typescript
// src/probabilistic/lexer/tokenTypes.ts
export enum TokenType {
// Keywords
PROBABILISTIC_MODEL = 'PROBABILISTIC_MODEL',
OBSERVE = 'OBSERVE',
QUERY = 'QUERY',
LEARN = 'LEARN',
PARAMETERS = 'PARAMETERS',
FROM = 'FROM',
DATASET = 'DATASET',
NOT = 'NOT',
TRUE = 'TRUE',
FALSE = 'FALSE',
// Identifiers and literals
VARIABLE = 'VARIABLE', // X, Y, Person
CONSTANT = 'CONSTANT', // alice, flu
PROBABILITY = 'PROBABILITY', // 0.7, 0.95
NUMBER = 'NUMBER', // 42, 3.14
STRING = 'STRING', // "file.csv"
// Operators
PROB_ANNOTATION = 'PROB_ANNOTATION', // ::
IMPLICATION = 'IMPLICATION', // :-
COMMA = 'COMMA', // ,
SEMICOLON = 'SEMICOLON', // ;
// Delimiters
LPAREN = 'LPAREN', // (
RPAREN = 'RPAREN', // )
LBRACE = 'LBRACE', // {
RBRACE = 'RBRACE', // }
LBRACKET = 'LBRACKET', // [
RBRACKET = 'RBRACKET', // ]
// Special
COMMENT = 'COMMENT', // # comment
NEWLINE = 'NEWLINE', // \n
EOF = 'EOF', // End of file
UNKNOWN = 'UNKNOWN' // Error token
}
export interface Token {
type: TokenType;
value: string;
line: number;
column: number;
length: number;
}
```
### Lexer Implementation
```typescript
// src/probabilistic/lexer/lexer.ts
export class Lexer {
private source: string;
private position: number = 0;
private line: number = 1;
private column: number = 1;
private tokens: Token[] = [];
constructor(source: string) {
this.source = source;
}
public tokenize(): Token[] {
while (!this.isAtEnd()) {
this.skipWhitespace();
if (this.isAtEnd()) break;
const token = this.scanToken();
if (token.type !== TokenType.COMMENT) {
this.tokens.push(token);
}
}
this.tokens.push(this.createToken(TokenType.EOF, ''));
return this.tokens;
}
private scanToken(): Token {
const char = this.peek();
// Comments
if (char === '#') {
return this.scanComment();
}
// Two-character operators
if (char === ':' && this.peekNext() === ':') {
return this.scanTwoChar(TokenType.PROB_ANNOTATION, '::');
}
if (char === ':' && this.peekNext() === '-') {
return this.scanTwoChar(TokenType.IMPLICATION, ':-');
}
// Numbers (including probabilities)
if (this.isDigit(char)) {
return this.scanNumber();
}
// Strings
if (char === '"') {
return this.scanString();
}
// Keywords, identifiers, variables
if (this.isAlpha(char)) {
return this.scanIdentifier();
}
// Single-character tokens
switch (char) {
case '(': return this.scanSingleChar(TokenType.LPAREN);
case ')': return this.scanSingleChar(TokenType.RPAREN);
case '{': return this.scanSingleChar(TokenType.LBRACE);
case '}': return this.scanSingleChar(TokenType.RBRACE);
case '[': return this.scanSingleChar(TokenType.LBRACKET);
case ']': return this.scanSingleChar(TokenType.RBRACKET);
case ',': return this.scanSingleChar(TokenType.COMMA);
case ';': return this.scanSingleChar(TokenType.SEMICOLON);
default:
return this.createToken(TokenType.UNKNOWN, char);
}
}
private scanNumber(): Token {
const start = this.position;
const startColumn = this.column;
while (this.isDigit(this.peek())) {
this.advance();
}
// Decimal point
if (this.peek() === '.' && this.isDigit(this.peekNext())) {
this.advance(); // consume '.'
while (this.isDigit(this.peek())) {
this.advance();
}
}
const value = this.source.substring(start, this.position);
const numValue = parseFloat(value);
// Determine if it's a probability or general number
const type = (numValue >= 0 && numValue <= 1)
? TokenType.PROBABILITY
: TokenType.NUMBER;
return {
type,
value,
line: this.line,
column: startColumn,
length: value.length
};
}
private scanString(): Token {
const start = this.position;
const startColumn = this.column;
this.advance(); // opening "
while (!this.isAtEnd() && this.peek() !== '"') {
if (this.peek() === '\n') this.line++;
this.advance();
}
if (this.isAtEnd()) {
throw new LexerError('Unterminated string', this.line, startColumn);
}
this.advance(); // closing "
const value = this.source.substring(start + 1, this.position - 1);
return {
type: TokenType.STRING,
value,
line: this.line,
column: startColumn,
length: this.position - start
};
}
private scanIdentifier(): Token {
const start = this.position;
const startColumn = this.column;
while (this.isAlphaNumeric(this.peek()) || this.peek() === '_') {
this.advance();
}
const value = this.source.substring(start, this.position);
// Check for keywords
const type = this.getKeywordType(value);
// If not keyword, determine if variable or constant
if (type === TokenType.CONSTANT && this.isUpperCase(value[0])) {
return {
type: TokenType.VARIABLE,
value,
line: this.line,
column: startColumn,
length: value.length
};
}
return {
type,
value,
line: this.line,
column: startColumn,
length: value.length
};
}
private getKeywordType(value: string): TokenType {
const keywords: Record<string, TokenType> = {
'probabilistic_model': TokenType.PROBABILISTIC_MODEL,
'observe': TokenType.OBSERVE,
'query': TokenType.QUERY,
'learn': TokenType.LEARN,
'parameters': TokenType.PARAMETERS,
'from': TokenType.FROM,
'dataset': TokenType.DATASET,
'not': TokenType.NOT,
'true': TokenType.TRUE,
'false': TokenType.FALSE,
};
return keywords[value] || TokenType.CONSTANT;
}
// Helper methods
private isAlpha(char: string): boolean {
return /[a-zA-Z]/.test(char);
}
private isDigit(char: string): boolean {
return /[0-9]/.test(char);
}
private isAlphaNumeric(char: string): boolean {
return this.isAlpha(char) || this.isDigit(char);
}
private isUpperCase(char: string): boolean {
return /[A-Z]/.test(char);
}
private peek(): string {
if (this.isAtEnd()) return '\0';
return this.source[this.position];
}
private peekNext(): string {
if (this.position + 1 >= this.source.length) return '\0';
return this.source[this.position + 1];
}
private advance(): string {
const char = this.source[this.position++];
this.column++;
return char;
}
private isAtEnd(): boolean {
return this.position >= this.source.length;
}
private skipWhitespace(): void {
while (!this.isAtEnd()) {
const char = this.peek();
if (char === ' ' || char === '\t' || char === '\r') {
this.advance();
} else if (char === '\n') {
this.line++;
this.column = 1;
this.advance();
} else {
break;
}
}
}
}
```
## Syntactic Analysis
### AST Node Definitions
```typescript
// src/probabilistic/parser/ast.ts
export type ASTNode =
| Program
| Model
| Statement
| ProbabilisticFact
| ProbabilisticRule
| DeterministicFact
| AnnotatedDisjunction
| Observation
| Query
| LearningDirective
| Atom
| Literal
| Term;
export interface Program {
type: 'Program';
models: Model[];
location: Location;
}
export interface Model {
type: 'Model';
name: string;
statements: Statement[];
location: Location;
}
export type Statement =
| ProbabilisticFact
| ProbabilisticRule
| DeterministicFact
| AnnotatedDisjunction
| Observation
| Query
| LearningDirective;
export interface ProbabilisticFact {
type: 'ProbabilisticFact';
probability: number;
atom: Atom;
location: Location;
}
export interface ProbabilisticRule {
type: 'ProbabilisticRule';
probability: number;
head: Atom;
body: Literal[];
location: Location;
}
export interface DeterministicFact {
type: 'DeterministicFact';
atom: Atom;
location: Location;
}
export interface AnnotatedDisjunction {
type: 'AnnotatedDisjunction';
choices: ProbabilisticFact[];
location: Location;
}
export interface Observation {
type: 'Observation';
literal: Literal;
location: Location;
}
export interface Query {
type: 'Query';
atom: Atom;
location: Location;
}
export interface LearningDirective {
type: 'LearningDirective';
dataset: string;
location: Location;
}
export interface Atom {
type: 'Atom';
predicate: string;
arguments: Term[];
location: Location;
}
export interface Literal {
type: 'Literal';
negated: boolean;
atom: Atom;
location: Location;
}
export type Term =
| Variable
| Constant
| Number
| Atom;
export interface Variable {
type: 'Variable';
name: string;
location: Location;
}
export interface Constant {
type: 'Constant';
value: string;
location: Location;
}
export interface Number {
type: 'Number';
value: number;
location: Location;
}
export interface Location {
line: number;
column: number;
length: number;
}
```
### Parser Implementation
```typescript
// src/probabilistic/parser/parser.ts
export class Parser {
private tokens: Token[];
private current: number = 0;
constructor(tokens: Token[]) {
this.tokens = tokens;
}
public parse(): Program {
const models: Model[] = [];
while (!this.isAtEnd()) {
models.push(this.parseModel());
}
return {
type: 'Program',
models,
location: this.getLocation()
};
}
private parseModel(): Model {
this.consume(TokenType.PROBABILISTIC_MODEL, "Expected 'probabilistic_model'");
const nameToken = this.consume(TokenType.CONSTANT, "Expected model name");
const name = nameToken.value;
this.consume(TokenType.LBRACE, "Expected '{'");
const statements: Statement[] = [];
while (!this.check(TokenType.RBRACE) && !this.isAtEnd()) {
statements.push(this.parseStatement());
}
this.consume(TokenType.RBRACE, "Expected '}'");
return {
type: 'Model',
name,
statements,
location: this.getLocation()
};
}
private parseStatement(): Statement {
// Check for observation
if (this.check(TokenType.OBSERVE)) {
return this.parseObservation();
}
// Check for query
if (this.check(TokenType.QUERY)) {
return this.parseQuery();
}
// Check for learning directive
if (this.check(TokenType.LEARN)) {
return this.parseLearningDirective();
}
// Check for probabilistic fact/rule or annotated disjunction
if (this.check(TokenType.PROBABILITY)) {
return this.parseProbabilisticStatement();
}
// Otherwise, deterministic fact
return this.parseDeterministicFact();
}
private parseProbabilisticStatement(): Statement {
const firstProb = this.advance();
this.consume(TokenType.PROB_ANNOTATION, "Expected '::'");
const firstAtom = this.parseAtom();
// Check for annotated disjunction
if (this.check(TokenType.SEMICOLON)) {
return this.parseAnnotatedDisjunction(firstProb, firstAtom);
}
// Check for rule
if (this.check(TokenType.IMPLICATION)) {
this.advance(); // consume :-
const body = this.parseBody();
return {
type: 'ProbabilisticRule',
probability: parseFloat(firstProb.value),
head: firstAtom,
body,
location: this.getLocation()
};
}
// Simple probabilistic fact
return {
type: 'ProbabilisticFact',
probability: parseFloat(firstProb.value),
atom: firstAtom,
location: this.getLocation()
};
}
private parseAnnotatedDisjunction(
firstProb: Token,
firstAtom: Atom
): AnnotatedDisjunction {
const choices: ProbabilisticFact[] = [{
type: 'ProbabilisticFact',
probability: parseFloat(firstProb.value),
atom: firstAtom,
location: this.getLocation()
}];
while (this.match(TokenType.SEMICOLON)) {
const prob = this.consume(TokenType.PROBABILITY, "Expected probability");
this.consume(TokenType.PROB_ANNOTATION, "Expected '::'");
const atom = this.parseAtom();
choices.push({
type: 'ProbabilisticFact',
probability: parseFloat(prob.value),
atom,
location: this.getLocation()
});
}
return {
type: 'AnnotatedDisjunction',
choices,
location: this.getLocation()
};
}
private parseDeterministicFact(): DeterministicFact {
const atom = this.parseAtom();
return {
type: 'DeterministicFact',
atom,
location: this.getLocation()
};
}
private parseObservation(): Observation {
this.consume(TokenType.OBSERVE, "Expected 'observe'");
const literal = this.parseLiteral();
return {
type: 'Observation',
literal,
location: this.getLocation()
};
}
private parseQuery(): Query {
this.consume(TokenType.QUERY, "Expected 'query'");
const atom = this.parseAtom();
return {
type: 'Query',
atom,
location: this.getLocation()
};
}
private parseLearningDirective(): LearningDirective {
this.consume(TokenType.LEARN, "Expected 'learn'");
this.consume(TokenType.PARAMETERS, "Expected 'parameters'");
this.consume(TokenType.FROM, "Expected 'from'");
this.consume(TokenType.DATASET, "Expected 'dataset'");
this.consume(TokenType.LPAREN, "Expected '('");
const datasetToken = this.consume(TokenType.STRING, "Expected dataset path");
this.consume(TokenType.RPAREN, "Expected ')'");
return {
type: 'LearningDirective',
dataset: datasetToken.value,
location: this.getLocation()
};
}
private parseBody(): Literal[] {
const literals: Literal[] = [];
literals.push(this.parseLiteral());
while (this.match(TokenType.COMMA)) {
literals.push(this.parseLiteral());
}
return literals;
}
private parseLiteral(): Literal {
const negated = this.match(TokenType.NOT);
const atom = this.parseAtom();
return {
type: 'Literal',
negated,
atom,
location: this.getLocation()
};
}
private parseAtom(): Atom {
const predicateToken = this.consume(TokenType.CONSTANT, "Expected predicate name");
const predicate = predicateToken.value;
let args: Term[] = [];
if (this.match(TokenType.LPAREN)) {
args = this.parseArgumentList();
this.consume(TokenType.RPAREN, "Expected ')'");
}
return {
type: 'Atom',
predicate,
arguments: args,
location: this.getLocation()
};
}
private parseArgumentList(): Term[] {
const args: Term[] = [];
args.push(this.parseTerm());
while (this.match(TokenType.COMMA)) {
args.push(this.parseTerm());
}
return args;
}
private parseTerm(): Term {
if (this.check(TokenType.VARIABLE)) {
const token = this.advance();
return {
type: 'Variable',
name: token.value,
location: this.getLocation()
};
}
if (this.check(TokenType.NUMBER) || this.check(TokenType.PROBABILITY)) {
const token = this.advance();
return {
type: 'Number',
value: parseFloat(token.value),
location: this.getLocation()
};
}
if (this.check(TokenType.CONSTANT)) {
// Could be a constant or a nested atom
const lookahead = this.peek(1);
if (lookahead && lookahead.type === TokenType.LPAREN) {
return this.parseAtom();
}
const token = this.advance();
return {
type: 'Constant',
value: token.value,
location: this.getLocation()
};
}
throw new ParseError(
`Expected term, got ${this.peek().type}`,
this.peek().line,
this.peek().column
);
}
// Helper methods
private match(...types: TokenType[]): boolean {
for (const type of types) {
if (this.check(type)) {
this.advance();
return true;
}
}
return false;
}
private check(type: TokenType): boolean {
if (this.isAtEnd()) return false;
return this.peek().type === type;
}
private advance(): Token {
if (!this.isAtEnd()) this.current++;
return this.previous();
}
private isAtEnd(): boolean {
return this.peek().type === TokenType.EOF;
}
private peek(offset: number = 0): Token {
return this.tokens[this.current + offset];
}
private previous(): Token {
return this.tokens[this.current - 1];
}
private consume(type: TokenType, message: string): Token {
if (this.check(type)) return this.advance();
throw new ParseError(message, this.peek().line, this.peek().column);
}
private getLocation(): Location {
const token = this.previous();
return {
line: token.line,
column: token.column,
length: token.length
};
}
}
```
## Semantic Analysis
### Symbol Table
```typescript
// src/probabilistic/semantics/symbolTable.ts
export interface Symbol {
name: string;
type: SymbolType;
arity: number;
defined: Location;
usages: Location[];
}
export enum SymbolType {
PREDICATE = 'PREDICATE',
CONSTANT = 'CONSTANT',
VARIABLE = 'VARIABLE'
}
export class SymbolTable {
private symbols: Map<string, Symbol> = new Map();
private parent: SymbolTable | null = null;
constructor(parent: SymbolTable | null = null) {
this.parent = parent;
}
public define(name: string, type: SymbolType, arity: number, location: Location): void {
const key = type === SymbolType.PREDICATE ? `${name}/${arity}` : name;
if (this.symbols.has(key)) {
const existing = this.symbols.get(key)!;
existing.usages.push(location);
} else {
this.symbols.set(key, {
name,
type,
arity,
defined: location,
usages: []
});
}
}
public lookup(name: string, arity?: number): Symbol | null {
const key = arity !== undefined ? `${name}/${arity}` : name;
if (this.symbols.has(key)) {
return this.symbols.get(key)!;
}
if (this.parent) {
return this.parent.lookup(name, arity);
}
return null;
}
public getAllSymbols(): Symbol[] {
return Array.from(this.symbols.values());
}
}
```
### Semantic Analyzer
```typescript
// src/probabilistic/semantics/analyzer.ts
export class SemanticAnalyzer {
private symbolTable: SymbolTable;
private errors: SemanticError[] = [];
constructor() {
this.symbolTable = new SymbolTable();
}
public analyze(ast: Program): AnalysisResult {
this.errors = [];
for (const model of ast.models) {
this.analyzeModel(model);
}
return {
success: this.errors.length === 0,
errors: this.errors,
symbolTable: this.symbolTable
};
}
private analyzeModel(model: Model): void {
// First pass: collect all predicate definitions
for (const stmt of model.statements) {
this.collectPredicates(stmt);
}
// Second pass: validate usage
for (const stmt of model.statements) {
this.validateStatement(stmt);
}
}
private collectPredicates(stmt: Statement): void {
switch (stmt.type) {
case 'ProbabilisticFact':
case 'DeterministicFact':
this.defineAtom(stmt.atom);
break;
case 'ProbabilisticRule':
this.defineAtom(stmt.head);
for (const literal of stmt.body) {
this.defineAtom(literal.atom);
}
break;
case 'AnnotatedDisjunction':
for (const choice of stmt.choices) {
this.defineAtom(choice.atom);
}
break;
}
}
private defineAtom(atom: Atom): void {
this.symbolTable.define(
atom.predicate,
SymbolType.PREDICATE,
atom.arguments.length,
atom.location
);
}
private validateStatement(stmt: Statement): void {
switch (stmt.type) {
case 'ProbabilisticFact':
this.validateProbability(stmt.probability, stmt.location);
this.validateAtom(stmt.atom);
break;
case 'ProbabilisticRule':
this.validateProbability(stmt.probability, stmt.location);
this.validateRule(stmt);
break;
case 'AnnotatedDisjunction':
this.validateAnnotatedDisjunction(stmt);
break;
case 'Observation':
this.validateLiteral(stmt.literal);
break;
case 'Query':
this.validateAtom(stmt.atom);
break;
}
}
private validateProbability(prob: number, location: Location): void {
if (prob < 0 || prob > 1) {
this.errors.push({
type: 'InvalidProbability',
message: `Probability ${prob} must be between 0.0 and 1.0`,
location,
suggestion: prob > 1 ? 'Did you mean ' + (prob / 10) + '?' : undefined
});
}
}
private validateAnnotatedDisjunction(ad: AnnotatedDisjunction): void {
let sum = 0;
for (const choice of ad.choices) {
sum += choice.probability;
this.validateAtom(choice.atom);
}
if (sum > 1.0) {
this.errors.push({
type: 'InvalidAnnotatedDisjunction',
message: `Annotated disjunction probabilities sum to ${sum}, which exceeds 1.0`,
location: ad.location,
suggestion: 'Probabilities must sum to at most 1.0'
});
}
}
private validateRule(rule: ProbabilisticRule): void {
// Collect variables from head
const headVars = this.collectVariables(rule.head);
// Collect variables from body
const bodyVars = new Set<string>();
for (const literal of rule.body) {
const vars = this.collectVariables(literal.atom);
vars.forEach(v => bodyVars.add(v));
}
// Check safety: all head variables must appear in body
for (const headVar of headVars) {
if (!bodyVars.has(headVar)) {
this.errors.push({
type: 'UnsafeVariable',
message: `Variable ${headVar} in rule head must appear in rule body`,
location: rule.location,
suggestion: `Add a literal containing ${headVar} to the rule body`
});
}
}
this.validateAtom(rule.head);
for (const literal of rule.body) {
this.validateLiteral(literal);
}
}
private collectVariables(atom: Atom): Set<string> {
const vars = new Set<string>();
for (const arg of atom.arguments) {
if (arg.type === 'Variable') {
vars.add(arg.name);
} else if (arg.type === 'Atom') {
const nested = this.collectVariables(arg);
nested.forEach(v => vars.add(v));
}
}
return vars;
}
private validateAtom(atom: Atom): void {
// Check arity consistency
const symbol = this.symbolTable.lookup(atom.predicate, atom.arguments.length);
if (symbol && symbol.arity !== atom.arguments.length) {
this.errors.push({
type: 'ArityMismatch',
message: `Predicate ${atom.predicate} used with ${atom.arguments.length} arguments, but defined with ${symbol.arity}`,
location: atom.location,
suggestion: `Use ${symbol.arity} arguments for ${atom.predicate}`
});
}
}
private validateLiteral(literal: Literal): void {
this.validateAtom(literal.atom);
}
}
```
## Type Checking
```typescript
// src/probabilistic/types/typeChecker.ts
export class TypeChecker {
private errors: TypeError[] = [];
public check(ast: Program): TypeCheckResult {
this.errors = [];
for (const model of ast.models) {
this.checkModel(model);
}
return {
success: this.errors.length === 0,
errors: this.errors
};
}
private checkModel(model: Model): void {
for (const stmt of model.statements) {
this.checkStatement(stmt);
}
}
private checkStatement(stmt: Statement): void {
// Type checking is mostly done during semantic analysis
// Here we can add additional type-specific checks
switch (stmt.type) {
case 'ProbabilisticFact':
case 'ProbabilisticRule':
// Ensure probability is numeric
if (typeof stmt.probability !== 'number') {
this.errors.push({
type: 'TypeError',
message: 'Probability must be a number',
location: stmt.location
});
}
break;
}
}
}
```
## Code Generation
```typescript
// src/probabilistic/codegen/generator.ts
export class CodeGenerator {
private output: string[] = [];
private indentLevel: number = 0;
public generate(ast: Program): string {
this.output = [];
this.indentLevel = 0;
for (const model of ast.models) {
this.generateModel(model);
}
return this.output.join('\n');
}
private generateModel(model: Model): void {
this.emit(`% Model: ${model.name}`);
this.emit('');
for (const stmt of model.statements) {
this.generateStatement(stmt);
}
this.emit('');
}
private generateStatement(stmt: Statement): void {
switch (stmt.type) {
case 'ProbabilisticFact':
this.emit(`${stmt.probability}::${this.generateAtom(stmt.atom)}.`);
break;
case 'ProbabilisticRule':
this.emit(
`${stmt.probability}::${this.generateAtom(stmt.head)} :- ${this.generateBody(stmt.body)}.`
);
break;
case 'DeterministicFact':
this.emit(`${this.generateAtom(stmt.atom)}.`);
break;
case 'AnnotatedDisjunction':
const choices = stmt.choices
.map(c => `${c.probability}::${this.generateAtom(c.atom)}`)
.join('; ');
this.emit(`${choices}.`);
break;
case 'Observation':
const atomStr = this.generateAtom(stmt.literal.atom);
const value = stmt.literal.negated ? 'false' : 'true';
this.emit(`evidence(${atomStr}, ${value}).`);
break;
case 'Query':
this.emit(`query(${this.generateAtom(stmt.atom)}).`);
break;
case 'LearningDirective':
this.emit(`% Learn parameters from ${stmt.dataset}`);
this.emit(`learn(${stmt.dataset}).`);
break;
}
}
private generateAtom(atom: Atom): string {
if (atom.arguments.length === 0) {
return atom.predicate;
}
const args = atom.arguments.map(arg => this.generateTerm(arg)).join(', ');
return `${atom.predicate}(${args})`;
}
private generateTerm(term: Term): string {
switch (term.type) {
case 'Variable':
return term.name;
case 'Constant':
return term.value;
case 'Number':
return term.value.toString();
case 'Atom':
return this.generateAtom(term);
}
}
private generateBody(body: Literal[]): string {
return body.map(lit => this.generateLiteral(lit)).join(', ');
}
private generateLiteral(literal: Literal): string {
const atom = this.generateAtom(literal.atom);
return literal.negated ? `\\+ ${atom}` : atom;
}
private emit(line: string): void {
const indent = ' '.repeat(this.indentLevel);
this.output.push(indent + line);
}
}
```
## Error Handling
```typescript
// src/probabilistic/errors/errorHandler.ts
export interface CompilerError {
type: string;
message: string;
location: Location;
suggestion?: string;
}
export class ErrorReporter {
private errors: CompilerError[] = [];
private source: string;
constructor(source: string) {
this.source = source;
}
public report(error: CompilerError): void {
this.errors.push(error);
}
public hasErrors(): boolean {
return this.errors.length > 0;
}
public getErrors(): CompilerError[] {
return this.errors;
}
public formatErrors(): string {
return this.errors.map(err => this.formatError(err)).join('\n\n');
}
private formatError(error: CompilerError): string {
const lines = this.source.split('\n');
const line = lines[error.location.line - 1];
let output = `Error: ${error.message}\n`;
output += ` Line ${error.location.line}: ${line}\n`;
output += ' ' + ' '.repeat(error.location.column) + '^'.repeat(error.location.length) + '\n';
if (error.suggestion) {
output += ` Suggestion: ${error.suggestion}\n`;
}
return output;
}
}
```
## Testing Strategy
### Unit Tests
```typescript
// tests/probabilistic/lexer.test.ts
describe('Lexer', () => {
test('tokenizes probabilities', () => {
const lexer = new Lexer('0.7 :: rain');
const tokens = lexer.tokenize();
expect(tokens[0].type).toBe(TokenType.PROBABILITY);
expect(tokens[0].value).toBe('0.7');
});
test('tokenizes keywords', () => {
const lexer = new Lexer('observe query');
const tokens = lexer.tokenize();
expect(tokens[0].type).toBe(TokenType.OBSERVE);
expect(tokens[1].type).toBe(TokenType.QUERY);
});
test('distinguishes variables from constants', () => {
const lexer = new Lexer('flies(X) bird(sparrow)');
const tokens = lexer.tokenize();
expect(tokens[2].type).toBe(TokenType.VARIABLE); // X
expect(tokens[5].type).toBe(TokenType.CONSTANT); // bird
});
});
// tests/probabilistic/parser.test.ts
describe('Parser', () => {
test('parses probabilistic facts', () => {
const tokens = new Lexer('0.7 :: rain').tokenize();
const ast = new Parser(tokens).parse();
// Assertions...
});
test('parses rules with body', () => {
const source = '0.9 :: flies(X) :- bird(X), not penguin(X)';
const tokens = new Lexer(source).tokenize();
const ast = new Parser(tokens).parse();
// Assertions...
});
test('parses annotated disjunctions', () => {
const source = '0.3 :: a; 0.7 :: b';
const tokens = new Lexer(source).tokenize();
const ast = new Parser(tokens).parse();
// Assertions...
});
});
// tests/probabilistic/semantics.test.ts
describe('Semantic Analyzer', () => {
test('detects unsafe variables', () => {
const source = 'flies(X) :- bird(Y)';
const ast = parseSource(source);
const result = new SemanticAnalyzer().analyze(ast);
expect(result.errors).toHaveLength(1);
expect(result.errors[0].type).toBe('UnsafeVariable');
});
test('detects invalid probabilities', () => {
const source = '1.5 :: impossible';
const ast = parseSource(source);
const result = new SemanticAnalyzer().analyze(ast);
expect(result.errors[0].type).toBe('InvalidProbability');
});
});
// tests/probabilistic/codegen.test.ts
describe('Code Generator', () => {
test('generates valid ProbLog', () => {
const source = '0.7 :: rain';
const ast = parseSource(source);
const problog = new CodeGenerator().generate(ast);
expect(problog).toContain('0.7::rain.');
});
test('translates observations to evidence', () => {
const source = 'observe fever';
const ast = parseSource(source);
const problog = new CodeGenerator().generate(ast);
expect(problog).toContain('evidence(fever, true).');
});
});
```
### Integration Tests
```typescript
// tests/probabilistic/integration.test.ts
describe('End-to-end compilation', () => {
test('compiles medical diagnosis example', () => {
const source = `
probabilistic_model Medical {
0.01 :: flu
0.9 :: fever :- flu
observe fever
query flu
}
`;
const result = compilePDSL(source);
expect(result.success).toBe(true);
expect(result.problog).toContain('0.01::flu.');
expect(result.problog).toContain('evidence(fever, true).');
});
});
```
## Performance Considerations
1. **Lexer optimization:** Use string interning for identifiers
2. **Parser optimization:** Implement operator precedence climbing
3. **AST optimization:** Constant folding, dead code elimination
4. **Code generation:** Template caching, incremental compilation
## Implementation Timeline
See [PROBABILISTIC_DSL_SUMMARY.md](PROBABILISTIC_DSL_SUMMARY.md) for detailed timeline.
---
**This parser design provides a robust foundation for PDSL compilation to ProbLog.**