// ASP Lexer - Tokenizes ASP/Clingo syntax
// Supports full ASP-Core-2 standard
export type TokenType =
| 'ATOM' // Predicates, constants (lowercase)
| 'VARIABLE' // Variables (uppercase, _)
| 'NUMBER' // Numeric literals
| 'STRING' // String literals
| 'LPAREN' // (
| 'RPAREN' // )
| 'LBRACE' // {
| 'RBRACE' // }
| 'LBRACKET' // [
| 'RBRACKET' // ]
| 'DOT' // .
| 'COMMA' // ,
| 'SEMICOLON' // ;
| 'COLON' // :
| 'COLONDASH' // :-
| 'COLONWAVE' // :~
| 'PIPE' // |
| 'DOTDOT' // ..
| 'NOT' // not
| 'MINUS' // - (classical negation or arithmetic)
| 'PLUS' // +
| 'STAR' // *
| 'SLASH' // /
| 'BACKSLASH' // \
| 'PERCENT' // %
| 'STARSTAR' // **
| 'EQ' // =
| 'NEQ' // !=
| 'LT' // <
| 'GT' // >
| 'LEQ' // <=
| 'GEQ' // >=
| 'HASH' // #
| 'AGGREGATE' // #count, #sum, #min, #max
| 'DIRECTIVE' // #show, #hide, #minimize, #maximize, #const
| 'AT' // @
| 'COMMENT' // % comment
| 'EOF'
| 'NEWLINE';
export interface Token {
type: TokenType;
value: string;
line: number;
column: number;
}
export class ASPLexer {
private input: string;
private position: number = 0;
private line: number = 1;
private column: number = 1;
constructor(input: string) {
this.input = input;
}
tokenize(): Token[] {
const tokens: Token[] = [];
while (!this.isAtEnd()) {
this.skipWhitespaceExceptNewline();
if (this.isAtEnd()) break;
// Skip comments
if (this.peek() === '%' && this.peekNext() !== '*') {
this.skipLineComment();
continue;
}
// Skip block comments
if (this.peek() === '%' && this.peekNext() === '*') {
this.skipBlockComment();
continue;
}
const token = this.scanToken();
if (token && token.type !== 'COMMENT') {
tokens.push(token);
}
}
tokens.push({
type: 'EOF',
value: '',
line: this.line,
column: this.column
});
return tokens;
}
private scanToken(): Token | null {
const start = {
line: this.line,
column: this.column
};
const char = this.advance();
// Single-character tokens
switch (char) {
case '(': return this.makeToken('LPAREN', char, start);
case ')': return this.makeToken('RPAREN', char, start);
case '{': return this.makeToken('LBRACE', char, start);
case '}': return this.makeToken('RBRACE', char, start);
case '[': return this.makeToken('LBRACKET', char, start);
case ']': return this.makeToken('RBRACKET', char, start);
case '.':
if (this.peek() === '.') {
this.advance();
return this.makeToken('DOTDOT', '..', start);
}
return this.makeToken('DOT', char, start);
case ',': return this.makeToken('COMMA', char, start);
case ';': return this.makeToken('SEMICOLON', char, start);
case '|': return this.makeToken('PIPE', char, start);
case '@': return this.makeToken('AT', char, start);
case '+': return this.makeToken('PLUS', char, start);
case '/': return this.makeToken('SLASH', char, start);
case '\\': return this.makeToken('BACKSLASH', char, start);
case '\n':
this.line++;
this.column = 1;
return null; // Skip newlines for now
case ':':
if (this.peek() === '-') {
this.advance();
return this.makeToken('COLONDASH', ':-', start);
} else if (this.peek() === '~') {
this.advance();
return this.makeToken('COLONWAVE', ':~', start);
}
return this.makeToken('COLON', char, start);
case '-':
return this.makeToken('MINUS', char, start);
case '*':
if (this.peek() === '*') {
this.advance();
return this.makeToken('STARSTAR', '**', start);
}
return this.makeToken('STAR', char, start);
case '=':
return this.makeToken('EQ', char, start);
case '!':
if (this.peek() === '=') {
this.advance();
return this.makeToken('NEQ', '!=', start);
}
throw new Error(`Unexpected character '!' at line ${start.line}, column ${start.column}`);
case '<':
if (this.peek() === '=') {
this.advance();
return this.makeToken('LEQ', '<=', start);
}
return this.makeToken('LT', char, start);
case '>':
if (this.peek() === '=') {
this.advance();
return this.makeToken('GEQ', '>=', start);
}
return this.makeToken('GT', char, start);
case '#':
return this.scanDirectiveOrAggregate(start);
case '"':
return this.scanString(start);
default:
if (this.isDigit(char)) {
return this.scanNumber(char, start);
} else if (this.isUppercase(char) || char === '_') {
return this.scanVariable(char, start);
} else if (this.isLowercase(char)) {
return this.scanAtomOrKeyword(char, start);
} else {
throw new Error(`Unexpected character '${char}' at line ${start.line}, column ${start.column}`);
}
}
}
private scanNumber(firstChar: string, start: { line: number; column: number }): Token {
let value = firstChar;
while (this.isDigit(this.peek())) {
value += this.advance();
}
return this.makeToken('NUMBER', value, start);
}
private scanVariable(firstChar: string, start: { line: number; column: number }): Token {
let value = firstChar;
while (this.isAlphanumeric(this.peek()) || this.peek() === '_') {
value += this.advance();
}
return this.makeToken('VARIABLE', value, start);
}
private scanAtomOrKeyword(firstChar: string, start: { line: number; column: number }): Token {
let value = firstChar;
while (this.isAlphanumeric(this.peek()) || this.peek() === '_') {
value += this.advance();
}
// Check for keywords
if (value === 'not') {
return this.makeToken('NOT', value, start);
}
return this.makeToken('ATOM', value, start);
}
private scanString(start: { line: number; column: number }): Token {
let value = '';
while (!this.isAtEnd() && this.peek() !== '"') {
if (this.peek() === '\\') {
this.advance(); // Skip escape
if (!this.isAtEnd()) {
value += this.advance();
}
} else {
value += this.advance();
}
}
if (this.isAtEnd()) {
throw new Error(`Unterminated string at line ${start.line}, column ${start.column}`);
}
this.advance(); // Closing quote
return this.makeToken('STRING', value, start);
}
private scanDirectiveOrAggregate(start: { line: number; column: number }): Token {
let value = '#';
while (this.isLowercase(this.peek()) || this.peek() === '_') {
value += this.advance();
}
// Check for known aggregates
const aggregates = ['#count', '#sum', '#min', '#max'];
if (aggregates.includes(value)) {
return this.makeToken('AGGREGATE', value, start);
}
// Check for known directives
const directives = ['#show', '#hide', '#minimize', '#maximize', '#const'];
if (directives.includes(value)) {
return this.makeToken('DIRECTIVE', value, start);
}
// Unknown directive/aggregate
return this.makeToken('HASH', value, start);
}
private skipWhitespaceExceptNewline(): void {
while (!this.isAtEnd() && this.isWhitespaceExceptNewline(this.peek())) {
this.advance();
}
}
private skipLineComment(): void {
while (!this.isAtEnd() && this.peek() !== '\n') {
this.advance();
}
}
private skipBlockComment(): void {
this.advance(); // %
this.advance(); // *
while (!this.isAtEnd()) {
if (this.peek() === '*' && this.peekNext() === '%') {
this.advance(); // *
this.advance(); // %
break;
}
if (this.peek() === '\n') {
this.line++;
this.column = 0;
}
this.advance();
}
}
private peek(): string {
if (this.isAtEnd()) return '\0';
return this.input[this.position];
}
private peekNext(): string {
if (this.position + 1 >= this.input.length) return '\0';
return this.input[this.position + 1];
}
private advance(): string {
const char = this.input[this.position];
this.position++;
this.column++;
return char;
}
private isAtEnd(): boolean {
return this.position >= this.input.length;
}
private isDigit(char: string): boolean {
return char >= '0' && char <= '9';
}
private isUppercase(char: string): boolean {
return char >= 'A' && char <= 'Z';
}
private isLowercase(char: string): boolean {
return char >= 'a' && char <= 'z';
}
private isAlphanumeric(char: string): boolean {
return this.isDigit(char) || this.isUppercase(char) || this.isLowercase(char);
}
private isWhitespaceExceptNewline(char: string): boolean {
return char === ' ' || char === '\t' || char === '\r';
}
private makeToken(type: TokenType, value: string, start: { line: number; column: number }): Token {
return {
type,
value,
line: start.line,
column: start.column
};
}
}