Commodore 64 Ultimate Computer MCP Server

basic_tokenizer.py•9.78 KiB

""" C64 BASIC Tokenizer Translates BASIC program text to C64 in-memory representation. C64 BASIC In-Memory Format: - Program starts at $0801 (2049 decimal) - Each line: [next_ptr_lo][next_ptr_hi][line_num_lo][line_num_hi][tokenized_code...][0x00] - Program ends with two zero bytes (0x00, 0x00) - Keywords are tokenized to single bytes ($80-$CB) - Keywords are NOT tokenized inside string literals (quoted text) """ import re from typing import Tuple # BASIC program start address BASIC_START = 0x0801 # 2049 decimal # C64 BASIC tokens (keyword -> token byte) # Order matters: longer keywords must come before shorter ones with same prefix BASIC_TOKENS = { # Statements (starting at $80) "END": 0x80, "FOR": 0x81, "NEXT": 0x82, "DATA": 0x83, "INPUT#": 0x84, "INPUT": 0x85, "DIM": 0x86, "READ": 0x87, "LET": 0x88, "GOTO": 0x89, "RUN": 0x8A, "IF": 0x8B, "RESTORE": 0x8C, "GOSUB": 0x8D, "RETURN": 0x8E, "REM": 0x8F, "STOP": 0x90, "ON": 0x91, "WAIT": 0x92, "LOAD": 0x93, "SAVE": 0x94, "VERIFY": 0x95, "DEF": 0x96, "POKE": 0x97, "PRINT#": 0x98, "PRINT": 0x99, "CONT": 0x9A, "LIST": 0x9B, "CLR": 0x9C, "CMD": 0x9D, "SYS": 0x9E, "OPEN": 0x9F, "CLOSE": 0xA0, "GET": 0xA1, "NEW": 0xA2, # Secondary keywords "TAB(": 0xA3, "TO": 0xA4, "FN": 0xA5, "SPC(": 0xA6, "THEN": 0xA7, "NOT": 0xA8, "STEP": 0xA9, # Operators "+": 0xAA, "-": 0xAB, "*": 0xAC, "/": 0xAD, "^": 0xAE, # Power/exponentiation "AND": 0xAF, "OR": 0xB0, ">": 0xB1, "=": 0xB2, "<": 0xB3, # Functions "SGN": 0xB4, "INT": 0xB5, "ABS": 0xB6, "USR": 0xB7, "FRE": 0xB8, "POS": 0xB9, "SQR": 0xBA, "RND": 0xBB, "LOG": 0xBC, "EXP": 0xBD, "COS": 0xBE, "SIN": 0xBF, "TAN": 0xC0, "ATN": 0xC1, "PEEK": 0xC2, "LEN": 0xC3, "STR$": 0xC4, "VAL": 0xC5, "ASC": 0xC6, "CHR$": 0xC7, "LEFT$": 0xC8, "RIGHT$": 0xC9, "MID$": 0xCA, "GO": 0xCB, # GO (used in GO TO as alternative to GOTO) } # Reverse mapping: token byte -> keyword (for debugging/listing) TOKEN_TO_KEYWORD = {v: k for k, v in BASIC_TOKENS.items()} # Keywords sorted by length (longest first) for proper tokenization # This ensures "PRINT#" is matched before "PRINT", "INPUT#" before "INPUT", etc. SORTED_KEYWORDS = sorted(BASIC_TOKENS.keys(), key=len, reverse=True) # Operators that should NOT be tokenized (kept as single-byte ASCII) # Note: The C64 does tokenize operators, but we need to be careful with context ALWAYS_TOKENIZE_OPS = {"+", "-", "*", "/", "^", ">", "=", "<"} def tokenize_line(line_text: str) -> bytes: """ Tokenize a single BASIC line (without line number). Handles string literals correctly (keywords inside quotes are not tokenized). Args: line_text: The BASIC code without the line number Returns: Tokenized bytes """ result = bytearray() i = 0 in_string = False in_rem = False # Convert to uppercase for tokenization (C64 BASIC is case-insensitive for keywords) # But we preserve original for string contents upper_line = line_text.upper() while i < len(line_text): char = line_text[i] upper_char = upper_line[i] # Handle string literals if char == '"': result.append(ord(char)) in_string = not in_string i += 1 continue # Inside strings or after REM, don't tokenize if in_string or in_rem: # Convert lowercase to uppercase PETSCII (C64 screen codes) if 'a' <= char <= 'z': result.append(ord(char) - 32) # Convert to uppercase else: result.append(ord(char)) i += 1 continue # Skip spaces if char == ' ': result.append(0x20) i += 1 continue # Try to match keywords (longest first) matched = False for keyword in SORTED_KEYWORDS: if upper_line[i:i + len(keyword)] == keyword: # Check if this is a valid keyword boundary # (not part of a variable name like "FOREST" containing "FOR") if len(keyword) > 1 and keyword not in ALWAYS_TOKENIZE_OPS: # Check if next character would make this part of a variable name next_pos = i + len(keyword) if next_pos < len(line_text): next_char = upper_line[next_pos] # If followed by alphanumeric, it's a variable name, not keyword if next_char.isalnum() or next_char == '$' or next_char == '%': # Exception: keywords ending with ( or $ are always tokenized if not (keyword.endswith('(') or keyword.endswith('$')): continue # Check if preceded by alphanumeric (part of variable name) if i > 0: prev_char = upper_line[i - 1] if prev_char.isalnum() or prev_char == '$' or prev_char == '%': continue result.append(BASIC_TOKENS[keyword]) i += len(keyword) matched = True # After REM, everything is comment (not tokenized) if keyword == "REM": in_rem = True break if matched: continue # Not a keyword, add as PETSCII if 'a' <= char <= 'z': result.append(ord(char) - 32) # Convert to uppercase PETSCII else: result.append(ord(char)) i += 1 return bytes(result) def parse_basic_line(line: str) -> Tuple[int, str]: """ Parse a BASIC line into line number and code. Args: line: Complete BASIC line (e.g., "10 PRINT \"HELLO\"") Returns: Tuple of (line_number, code_without_line_number) Raises: ValueError: If line number is invalid or missing """ line = line.strip() if not line: raise ValueError("Empty line") # Extract line number match = re.match(r'^(\d+)\s*(.*)', line) if not match: raise ValueError(f"Invalid line format (no line number): {line}") line_num = int(match.group(1)) code = match.group(2) if line_num < 0 or line_num > 63999: raise ValueError(f"Line number out of range (0-63999): {line_num}") return line_num, code def basic_to_bytes(program_text: str, start_address: int = BASIC_START) -> bytes: """ Convert a complete BASIC program to C64 in-memory format. Args: program_text: Multi-line BASIC program text start_address: Memory address where program will be loaded (default $0801) Returns: Bytes ready to be written to C64 memory Raises: ValueError: If program has invalid syntax or line numbers """ lines = [] # Parse all lines for line in program_text.strip().split('\n'): line = line.strip() if not line: continue # Skip comment lines (not BASIC REM, but input comments like # or //) if line.startswith('#') or line.startswith('//'): continue try: line_num, code = parse_basic_line(line) tokenized = tokenize_line(code) lines.append((line_num, tokenized)) except ValueError as e: raise ValueError(f"Error parsing line: {e}") # Sort by line number lines.sort(key=lambda x: x[0]) # Check for duplicate line numbers seen = set() for line_num, _ in lines: if line_num in seen: raise ValueError(f"Duplicate line number: {line_num}") seen.add(line_num) # Build the memory image result = bytearray() current_addr = start_address for line_num, tokenized in lines: # Calculate next line pointer # Line format: [next_lo][next_hi][linenum_lo][linenum_hi][code...][0x00] line_length = 4 + len(tokenized) + 1 # 4 bytes header + code + terminator next_addr = current_addr + line_length # Add next line pointer (little-endian) result.append(next_addr & 0xFF) result.append((next_addr >> 8) & 0xFF) # Add line number (little-endian) result.append(line_num & 0xFF) result.append((line_num >> 8) & 0xFF) # Add tokenized code result.extend(tokenized) # Add line terminator result.append(0x00) current_addr = next_addr # Add end-of-program marker (null pointer) result.append(0x00) result.append(0x00) return bytes(result) def get_program_end_address(program_bytes: bytes, start_address: int = BASIC_START) -> int: """ Calculate the end address of the BASIC program. This is the address of the first byte after the program, which is used to set the BASIC variables pointer. Args: program_bytes: The tokenized program bytes start_address: Memory address where program starts Returns: Address of first byte after program """ return start_address + len(program_bytes) def create_prg_file(program_bytes: bytes, start_address: int = BASIC_START) -> bytes: """ Create a .PRG file from tokenized BASIC program. PRG files have a 2-byte load address header followed by the program data. Args: program_bytes: The tokenized program bytes start_address: Memory address where program loads (default $0801) Returns: Complete PRG file bytes """ result = bytearray() # Load address (little-endian) result.append(start_address & 0xFF) result.append((start_address >> 8) & 0xFF) # Program data result.extend(program_bytes) return bytes(result)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/xphileby/c64u-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

basic_tokenizer.py•9.78 KiB