lexer.ts•9.49 kB
/**
 * ZigNet Lexer
 * Tokenizes Zig source code
 */
export enum TokenType {
  // Keywords
  FN = 'FN',
  CONST = 'CONST',
  VAR = 'VAR',
  STRUCT = 'STRUCT',
  UNION = 'UNION',
  ENUM = 'ENUM',
  IF = 'IF',
  ELSE = 'ELSE',
  WHILE = 'WHILE',
  FOR = 'FOR',
  RETURN = 'RETURN',
  BREAK = 'BREAK',
  CONTINUE = 'CONTINUE',
  COMPTIME = 'COMPTIME',
  INLINE = 'INLINE',
  // Types
  I32 = 'I32',
  I64 = 'I64',
  U32 = 'U32',
  F32 = 'F32',
  F64 = 'F64',
  BOOL = 'BOOL',
  VOID = 'VOID',
  // Literals
  NUMBER = 'NUMBER',
  STRING = 'STRING',
  IDENT = 'IDENT',
  TRUE = 'TRUE',
  FALSE = 'FALSE',
  // Operators
  PLUS = 'PLUS',
  MINUS = 'MINUS',
  STAR = 'STAR',
  SLASH = 'SLASH',
  PERCENT = 'PERCENT',
  EQ = 'EQ',
  NEQ = 'NEQ',
  LT = 'LT',
  GT = 'GT',
  LTE = 'LTE',
  GTE = 'GTE',
  ASSIGN = 'ASSIGN',
  PLUS_ASSIGN = 'PLUS_ASSIGN',
  AND = 'AND',
  OR = 'OR',
  NOT = 'NOT',
  // Punctuation
  LPAREN = 'LPAREN',
  RPAREN = 'RPAREN',
  LBRACE = 'LBRACE',
  RBRACE = 'RBRACE',
  LBRACKET = 'LBRACKET',
  RBRACKET = 'RBRACKET',
  COLON = 'COLON',
  SEMICOLON = 'SEMICOLON',
  COMMA = 'COMMA',
  DOT = 'DOT',
  ARROW = 'ARROW',
  FAT_ARROW = 'FAT_ARROW',
  // Special
  EOF = 'EOF',
  ERROR = 'ERROR',
}
export class Token {
  constructor(
    public type: TokenType,
    public value: string,
    public line: number,
    public column: number
  ) {}
  toString(): string {
    return `Token(${this.type}, "${this.value}", ${this.line}:${this.column})`;
  }
}
const KEYWORDS: Record<string, TokenType> = {
  fn: TokenType.FN,
  const: TokenType.CONST,
  var: TokenType.VAR,
  struct: TokenType.STRUCT,
  union: TokenType.UNION,
  enum: TokenType.ENUM,
  if: TokenType.IF,
  else: TokenType.ELSE,
  while: TokenType.WHILE,
  for: TokenType.FOR,
  return: TokenType.RETURN,
  break: TokenType.BREAK,
  continue: TokenType.CONTINUE,
  comptime: TokenType.COMPTIME,
  inline: TokenType.INLINE,
  i32: TokenType.I32,
  i64: TokenType.I64,
  u32: TokenType.U32,
  f32: TokenType.F32,
  f64: TokenType.F64,
  bool: TokenType.BOOL,
  void: TokenType.VOID,
  true: TokenType.TRUE,
  false: TokenType.FALSE,
};
const ESCAPE_SEQUENCES: Record<string, string> = {
  n: '\n',
  t: '\t',
  r: '\r',
  '\\': '\\',
  '"': '"',
  "'": "'",
};
export class Lexer {
  private position = 0;
  private line = 1;
  private column = 1;
  private tokens: Token[] = [];
  constructor(private source: string) {}
  private error(message: string): Token {
    return new Token(TokenType.ERROR, message, this.line, this.column);
  }
  private peek(offset = 0): string {
    const pos = this.position + offset;
    if (pos >= this.source.length) return '\0';
    return this.source[pos];
  }
  private advance(): string {
    const char = this.source[this.position];
    this.position++;
    if (char === '\n') {
      this.line++;
      this.column = 1;
    } else {
      this.column++;
    }
    return char;
  }
  private skipWhitespace(): void {
    while (this.position < this.source.length && /\s/.test(this.peek())) {
      this.advance();
    }
  }
  private skipComment(): boolean {
    if (this.peek() === '/' && this.peek(1) === '/') {
      while (this.peek() !== '\n' && this.peek() !== '\0') {
        this.advance();
      }
      return true;
    }
    return false;
  }
  private readIdentifier(): string {
    let value = '';
    while (/[a-zA-Z0-9_]/.test(this.peek())) {
      value += this.advance();
    }
    return value;
  }
  private readNumber(): string {
    let value = '';
    while (/[0-9]/.test(this.peek())) {
      value += this.advance();
    }
    if (this.peek() === '.' && /[0-9]/.test(this.peek(1))) {
      value += this.advance(); // '.'
      while (/[0-9]/.test(this.peek())) {
        value += this.advance();
      }
    }
    return value;
  }
  private readString(quote: string): string | Token {
    let value = '';
    this.advance(); // opening quote
    while (this.peek() !== quote && this.peek() !== '\0') {
      if (this.peek() === '\\') {
        this.advance();
        const escaped = this.advance();
        value += this.getEscapeSequence(escaped);
      } else {
        value += this.advance();
      }
    }
    if (this.peek() === quote) {
      this.advance(); // closing quote
    } else {
      return this.error('Unterminated string');
    }
    return value;
  }
  private getEscapeSequence(char: string): string {
    return ESCAPE_SEQUENCES[char] || char;
  }
  tokenize(): Token[] {
    while (this.position < this.source.length) {
      this.skipWhitespace();
      if (this.skipComment()) continue;
      const char = this.peek();
      const line = this.line;
      const column = this.column;
      if (char === '\0') break;
      // Numbers
      if (/[0-9]/.test(char)) {
        const value = this.readNumber();
        this.tokens.push(new Token(TokenType.NUMBER, value, line, column));
        continue;
      }
      // Strings
      if (char === '"' || char === "'") {
        const value = this.readString(char);
        if (value instanceof Token) {
          this.tokens.push(value);
        } else {
          this.tokens.push(new Token(TokenType.STRING, value, line, column));
        }
        continue;
      }
      // Identifiers and keywords
      if (/[a-zA-Z_]/.test(char)) {
        const value = this.readIdentifier();
        const type = KEYWORDS[value] || TokenType.IDENT;
        this.tokens.push(new Token(type, value, line, column));
        continue;
      }
      // Operators and punctuation
      this.advance();
      if (char === '+') {
        if (this.peek() === '=') {
          this.advance();
          this.tokens.push(new Token(TokenType.PLUS_ASSIGN, '+=', line, column));
        } else {
          this.tokens.push(new Token(TokenType.PLUS, '+', line, column));
        }
      } else if (char === '-') {
        if (this.peek() === '>') {
          this.advance();
          this.tokens.push(new Token(TokenType.ARROW, '->', line, column));
        } else {
          this.tokens.push(new Token(TokenType.MINUS, '-', line, column));
        }
      } else if (char === '*') {
        this.tokens.push(new Token(TokenType.STAR, '*', line, column));
      } else if (char === '/') {
        this.tokens.push(new Token(TokenType.SLASH, '/', line, column));
      } else if (char === '%') {
        this.tokens.push(new Token(TokenType.PERCENT, '%', line, column));
      } else if (char === '=') {
        if (this.peek() === '=') {
          this.advance();
          this.tokens.push(new Token(TokenType.EQ, '==', line, column));
        } else if (this.peek() === '>') {
          this.advance();
          this.tokens.push(new Token(TokenType.FAT_ARROW, '=>', line, column));
        } else {
          this.tokens.push(new Token(TokenType.ASSIGN, '=', line, column));
        }
      } else if (char === '!') {
        if (this.peek() === '=') {
          this.advance();
          this.tokens.push(new Token(TokenType.NEQ, '!=', line, column));
        } else {
          this.tokens.push(new Token(TokenType.NOT, '!', line, column));
        }
      } else if (char === '<') {
        if (this.peek() === '=') {
          this.advance();
          this.tokens.push(new Token(TokenType.LTE, '<=', line, column));
        } else {
          this.tokens.push(new Token(TokenType.LT, '<', line, column));
        }
      } else if (char === '>') {
        if (this.peek() === '=') {
          this.advance();
          this.tokens.push(new Token(TokenType.GTE, '>=', line, column));
        } else {
          this.tokens.push(new Token(TokenType.GT, '>', line, column));
        }
      } else if (char === '&') {
        if (this.peek() === '&') {
          this.advance();
          this.tokens.push(new Token(TokenType.AND, '&&', line, column));
        } else {
          this.tokens.push(new Token(TokenType.ERROR, `Unexpected char: &`, line, column));
        }
      } else if (char === '|') {
        if (this.peek() === '|') {
          this.advance();
          this.tokens.push(new Token(TokenType.OR, '||', line, column));
        } else {
          this.tokens.push(new Token(TokenType.ERROR, `Unexpected char: |`, line, column));
        }
      } else if (char === '(') {
        this.tokens.push(new Token(TokenType.LPAREN, '(', line, column));
      } else if (char === ')') {
        this.tokens.push(new Token(TokenType.RPAREN, ')', line, column));
      } else if (char === '{') {
        this.tokens.push(new Token(TokenType.LBRACE, '{', line, column));
      } else if (char === '}') {
        this.tokens.push(new Token(TokenType.RBRACE, '}', line, column));
      } else if (char === '[') {
        this.tokens.push(new Token(TokenType.LBRACKET, '[', line, column));
      } else if (char === ']') {
        this.tokens.push(new Token(TokenType.RBRACKET, ']', line, column));
      } else if (char === ':') {
        this.tokens.push(new Token(TokenType.COLON, ':', line, column));
      } else if (char === ';') {
        this.tokens.push(new Token(TokenType.SEMICOLON, ';', line, column));
      } else if (char === ',') {
        this.tokens.push(new Token(TokenType.COMMA, ',', line, column));
      } else if (char === '.') {
        this.tokens.push(new Token(TokenType.DOT, '.', line, column));
      } else {
        this.tokens.push(new Token(TokenType.ERROR, `Unexpected char: ${char}`, line, column));
      }
    }
    this.tokens.push(new Token(TokenType.EOF, '', this.line, this.column));
    return this.tokens;
  }
}