/**
* Parser for the Probabilistic Domain-Specific Language (PDSL)
*
* Implements a recursive descent parser that transforms tokens into an AST.
*/
import { Token, TokenType } from './probabilisticLexer.js';
import {
Program,
Model,
Statement,
ProbabilisticFact,
ProbabilisticRule,
DeterministicFact,
AnnotatedDisjunction,
Observation,
Query,
LearningDirective,
Atom,
Literal,
Term,
Variable,
Constant,
NumberTerm,
createLocation,
Location,
} from './probabilisticAST.js';
// ============================================================================
// Parser Class
// ============================================================================
export class Parser {
private tokens: Token[];
private current: number = 0;
constructor(tokens: Token[]) {
// Filter out comments
this.tokens = tokens.filter(t => t.type !== TokenType.COMMENT);
}
/**
* Parse the token stream into an AST
*/
public parse(): Program {
const models: Model[] = [];
while (!this.isAtEnd()) {
models.push(this.parseModel());
}
return {
type: 'Program',
models,
location: createLocation(1, 1),
};
}
// ==========================================================================
// Model Parsing
// ==========================================================================
private parseModel(): Model {
const startToken = this.consume(
TokenType.PROBABILISTIC_MODEL,
"Expected 'probabilistic_model'"
);
// Model name can be either CONSTANT or VARIABLE (CamelCase names start uppercase)
const nameToken = this.peek();
if (!this.match(TokenType.CONSTANT) && !this.match(TokenType.VARIABLE)) {
throw this.error('Expected model name', nameToken);
}
const name = this.previous().value;
this.consume(TokenType.LBRACE, "Expected '{' after model name");
const statements: Statement[] = [];
while (!this.check(TokenType.RBRACE) && !this.isAtEnd()) {
statements.push(this.parseStatement());
}
this.consume(TokenType.RBRACE, "Expected '}' after model body");
return {
type: 'Model',
name,
statements,
location: createLocation(startToken.line, startToken.column),
};
}
// ==========================================================================
// Statement Parsing
// ==========================================================================
private parseStatement(): Statement {
// Check for observation
if (this.check(TokenType.OBSERVE)) {
return this.parseObservation();
}
// Check for query
if (this.check(TokenType.QUERY)) {
return this.parseQuery();
}
// Check for learning directive
if (this.check(TokenType.LEARN)) {
return this.parseLearningDirective();
}
// Check for probabilistic fact/rule or annotated disjunction
// Can be PROBABILITY, NUMBER (for values > 1), or VARIABLE (for learning)
if (this.check(TokenType.PROBABILITY) ||
this.check(TokenType.NUMBER) ||
this.check(TokenType.VARIABLE)) {
// Look ahead to see if followed by ::
const saved = this.current;
this.advance();
if (this.check(TokenType.PROB_ANNOTATION)) {
// It's a probabilistic statement
this.current = saved;
return this.parseProbabilisticStatement();
}
// Not a probabilistic statement, backtrack
this.current = saved;
}
// Check if it's a rule (constant followed eventually by :-)
if (this.check(TokenType.CONSTANT)) {
const saved = this.current;
try {
const atom = this.parseAtom();
if (this.check(TokenType.IMPLICATION)) {
// It's a deterministic rule (probability 1.0)
this.advance(); // consume :-
const body = this.parseBody();
return {
type: 'ProbabilisticRule',
probability: 1.0,
head: atom,
body,
location: atom.location,
};
}
// Just a fact
return {
type: 'DeterministicFact',
atom,
location: atom.location,
};
} catch (e) {
this.current = saved;
throw e;
}
}
throw this.error('Expected statement', this.peek());
}
private parseProbabilisticStatement(): Statement {
const probToken = this.advance();
const prob = probToken.value;
this.consume(TokenType.PROB_ANNOTATION, "Expected '::'");
const firstAtom = this.parseAtom();
// Check for annotated disjunction
if (this.check(TokenType.SEMICOLON)) {
return this.parseAnnotatedDisjunction(prob, firstAtom);
}
// Check for rule
if (this.check(TokenType.IMPLICATION)) {
this.advance(); // consume :-
const body = this.parseBody();
return {
type: 'ProbabilisticRule',
probability: this.parseProbability(prob),
head: firstAtom,
body,
location: createLocation(probToken.line, probToken.column),
};
}
// Simple probabilistic fact
return {
type: 'ProbabilisticFact',
probability: this.parseProbability(prob),
atom: firstAtom,
location: createLocation(probToken.line, probToken.column),
};
}
private parseAnnotatedDisjunction(
firstProb: string,
firstAtom: Atom
): AnnotatedDisjunction {
const startToken = this.previous();
const choices: ProbabilisticFact[] = [
{
type: 'ProbabilisticFact',
probability: this.parseProbability(firstProb),
atom: firstAtom,
location: createLocation(startToken.line, startToken.column),
},
];
while (this.match(TokenType.SEMICOLON)) {
const probToken = this.consume(
TokenType.PROBABILITY,
'Expected probability in annotated disjunction'
);
this.consume(TokenType.PROB_ANNOTATION, "Expected '::'");
const atom = this.parseAtom();
choices.push({
type: 'ProbabilisticFact',
probability: this.parseProbability(probToken.value),
atom,
location: createLocation(probToken.line, probToken.column),
});
}
return {
type: 'AnnotatedDisjunction',
choices,
location: createLocation(startToken.line, startToken.column),
};
}
private parseObservation(): Observation {
const startToken = this.consume(TokenType.OBSERVE, "Expected 'observe'");
const literal = this.parseLiteral();
return {
type: 'Observation',
literal,
location: createLocation(startToken.line, startToken.column),
};
}
private parseQuery(): Query {
const startToken = this.consume(TokenType.QUERY, "Expected 'query'");
const atom = this.parseAtom();
return {
type: 'Query',
atom,
location: createLocation(startToken.line, startToken.column),
};
}
private parseLearningDirective(): LearningDirective {
const startToken = this.consume(TokenType.LEARN, "Expected 'learn'");
this.consume(TokenType.PARAMETERS, "Expected 'parameters'");
this.consume(TokenType.FROM, "Expected 'from'");
this.consume(TokenType.DATASET, "Expected 'dataset'");
this.consume(TokenType.LPAREN, "Expected '('");
const datasetToken = this.consume(TokenType.STRING, 'Expected dataset path');
this.consume(TokenType.RPAREN, "Expected ')'");
return {
type: 'LearningDirective',
dataset: datasetToken.value,
location: createLocation(startToken.line, startToken.column),
};
}
// ==========================================================================
// Logical Constructs
// ==========================================================================
private parseBody(): Literal[] {
const literals: Literal[] = [];
literals.push(this.parseLiteral());
while (this.match(TokenType.COMMA)) {
literals.push(this.parseLiteral());
}
return literals;
}
private parseLiteral(): Literal {
const startToken = this.peek();
const negated = this.match(TokenType.NOT);
const atom = this.parseAtom();
return {
type: 'Literal',
negated,
atom,
location: createLocation(startToken.line, startToken.column),
};
}
private parseAtom(): Atom {
const predicateToken = this.consume(
TokenType.CONSTANT,
'Expected predicate name'
);
const predicate = predicateToken.value;
let args: Term[] = [];
if (this.match(TokenType.LPAREN)) {
args = this.parseArgumentList();
this.consume(TokenType.RPAREN, "Expected ')' after arguments");
}
return {
type: 'Atom',
predicate,
args,
location: createLocation(predicateToken.line, predicateToken.column),
};
}
private parseArgumentList(): Term[] {
const args: Term[] = [];
if (this.check(TokenType.RPAREN)) {
return args;
}
args.push(this.parseTerm());
while (this.match(TokenType.COMMA)) {
args.push(this.parseTerm());
}
return args;
}
private parseTerm(): Term {
const token = this.peek();
// Variable
if (this.match(TokenType.VARIABLE)) {
const name = this.previous().value;
return {
type: 'Variable',
name,
location: createLocation(token.line, token.column),
};
}
// Number
if (this.match(TokenType.NUMBER) || this.match(TokenType.PROBABILITY)) {
const value = parseFloat(this.previous().value);
return {
type: 'Number',
value,
location: createLocation(token.line, token.column),
};
}
// Constant or nested atom
if (this.match(TokenType.CONSTANT)) {
const value = this.previous().value;
// Check if it's a nested atom (followed by '(')
if (this.check(TokenType.LPAREN)) {
// Backtrack and parse as atom
this.current--;
return this.parseAtom();
}
return {
type: 'Constant',
value,
location: createLocation(token.line, token.column),
};
}
throw this.error('Expected term (variable, constant, number, or atom)', token);
}
// ==========================================================================
// Helper Methods
// ==========================================================================
/**
* Parse a probability value (number or variable name for learning)
*/
private parseProbability(value: string): number | string {
const num = parseFloat(value);
if (!isNaN(num)) {
return num;
}
// It's a variable name (for parameter learning)
return value;
}
/**
* Check if current token matches any of the given types
*/
private match(...types: TokenType[]): boolean {
for (const type of types) {
if (this.check(type)) {
this.advance();
return true;
}
}
return false;
}
/**
* Check if current token is of given type
*/
private check(type: TokenType): boolean {
if (this.isAtEnd()) return false;
return this.peek().type === type;
}
/**
* Advance to next token
*/
private advance(): Token {
if (!this.isAtEnd()) this.current++;
return this.previous();
}
/**
* Check if at end of token stream
*/
private isAtEnd(): boolean {
return this.peek().type === TokenType.EOF;
}
/**
* Get current token without advancing
*/
private peek(): Token {
return this.tokens[this.current];
}
/**
* Get previous token
*/
private previous(): Token {
return this.tokens[this.current - 1];
}
/**
* Consume a token of expected type or throw error
*/
private consume(type: TokenType, message: string): Token {
if (this.check(type)) return this.advance();
throw this.error(message, this.peek());
}
/**
* Create a parse error
*/
private error(message: string, token: Token): Error {
return new ParseError(message, token.line, token.column, token.value);
}
}
// ============================================================================
// Error Classes
// ============================================================================
export class ParseError extends Error {
constructor(
message: string,
public line: number,
public column: number,
public tokenValue: string
) {
super(
`Parse error at line ${line}, column ${column}: ${message}\n` +
` Near: "${tokenValue}"`
);
this.name = 'ParseError';
}
}
// ============================================================================
// Utility Functions
// ============================================================================
/**
* Parse PDSL source code into an AST
*/
export function parse(tokens: Token[]): Program {
const parser = new Parser(tokens);
return parser.parse();
}