DM20 Protocol

Overview Schema Related Servers Score Discussions

module_parser.py•12.3 KiB

""" Adventure module parser for extracting structure from PDF TOC. Uses the existing library index system to parse adventure module PDFs and extract structured information about chapters, NPCs, encounters, and locations using pattern matching on TOC entries. """ import logging import re from pathlib import Path from ..library.manager import LibraryManager from ..library.models import TOCEntry from .models.module import ( ContentType, ModuleElement, NPCReference, EncounterReference, LocationReference, ModuleStructure, ) logger = logging.getLogger("dm20-protocol") class ModuleParser: """Parses adventure module PDFs into structured data. Uses the existing library index TOC extraction system to understand adventure module organization. Identifies chapters, encounters, NPCs, and locations from TOC entries using pattern matching. """ # Regex patterns for detecting different content types NPC_PATTERNS = [ r"$NPC$$", # Explicit (NPC) suffix ] ENCOUNTER_PATTERNS = [ r"^Encounter:", # Explicit encounter prefix r"^Area\s+\d+", # "Area 12" r"^Room\s+\d+", # "Room 5" r"^[A-Z]\d+\.", # "K1.", "E3." r"^Battle\s+of", # "Battle of..." r"^Fight\s+at", # "Fight at..." ] LOCATION_PATTERNS = [ r"^Areas?\s+of", # "Area of X", "Areas of X" r"^Map\s+of", # "Map of X" r"^The\s+[A-Z][a-z]+\s+(Castle|Village|Town|Tavern|Temple|Dungeon)", # Location names ] # Patterns for appendix detection APPENDIX_PATTERNS = [ r"^Appendix", r"^Dramatis\s+Personae", r"^NPCs$", ] def __init__(self, library_path: str): """Initialize with path to the library directory. Args: library_path: Path to the library root directory """ self.library_manager = LibraryManager(Path(library_path)) logger.debug(f"ModuleParser initialized with library at {library_path}") def parse_module(self, source_id: str) -> ModuleStructure | None: """Parse a module from the library index. Loads the index for the given source_id and extracts structural information from its TOC. Args: source_id: The library source identifier Returns: ModuleStructure if successful, None if source not found or not indexed """ # Load the index index = self.library_manager.get_index(source_id) if not index: logger.warning(f"No index found for source_id: {source_id}") return None logger.debug(f"Parsing module structure for {source_id}") # Extract structural components chapters = self.extract_chapters(index.toc) npcs = self.extract_npcs(index.toc) encounters = self.extract_encounters(index.toc) locations = self.extract_locations(index.toc) structure = ModuleStructure( module_id=source_id, title=index.filename.replace(".pdf", "").replace(".PDF", ""), source_file=index.filename, chapters=chapters, npcs=npcs, encounters=encounters, locations=locations, ) logger.debug( f"Parsed module: {len(chapters)} chapters, " f"{len(npcs)} NPCs, {len(encounters)} encounters, " f"{len(locations)} locations" ) return structure def extract_chapters(self, toc_entries: list[TOCEntry]) -> list[ModuleElement]: """Extract chapter hierarchy from TOC entries. Builds a tree of chapters -> sections -> subsections. Calculates page_end from next entry's page_start. Args: toc_entries: List of TOC entries from the library index Returns: Flat list of ModuleElement objects with parent-child relationships """ elements: list[ModuleElement] = [] def process_entries( entries: list[TOCEntry], parent_name: str | None = None, depth: int = 0 ) -> None: for i, entry in enumerate(entries): # Determine content type - check appendix pattern first if self._matches_patterns(entry.title, self.APPENDIX_PATTERNS): content_type = ContentType.APPENDIX elif depth == 0: content_type = ContentType.CHAPTER else: content_type = ContentType.SECTION # Calculate page_end from next sibling page_end = None if i + 1 < len(entries): page_end = entries[i + 1].page - 1 elif entry.end_page is not None: page_end = entry.end_page # Create element element = ModuleElement( name=entry.title, content_type=content_type, page_start=entry.page, page_end=page_end, parent=parent_name, children=[child.title for child in entry.children], ) elements.append(element) # Process children recursively if entry.children: process_entries(entry.children, entry.title, depth + 1) process_entries(toc_entries) return elements def extract_npcs(self, toc_entries: list[TOCEntry]) -> list[NPCReference]: """Identify NPC entries in TOC. Patterns to detect: - Entries containing "(NPC)" suffix - Proper-cased names in encounter sections - "Appendix: NPCs" or "Dramatis Personae" sections Args: toc_entries: List of TOC entries from the library index Returns: List of NPCReference objects """ npcs: list[NPCReference] = [] chapter_stack: list[str] = [] def process_entries(entries: list[TOCEntry], depth: int = 0) -> None: for entry in entries: # Track chapter context if depth == 0: # Top-level entry - update chapter if len(chapter_stack) == 0: chapter_stack.append(entry.title) else: chapter_stack[0] = entry.title # Check if this entry looks like an NPC if self._matches_patterns(entry.title, self.NPC_PATTERNS): # Extract NPC name (remove (NPC) suffix if present) name = re.sub(r"\s*$NPC$\s*$", "", entry.title).strip() current_chapter = chapter_stack[0] if chapter_stack else "" npc = NPCReference( name=name, chapter=current_chapter, page=entry.page, ) npcs.append(npc) logger.debug(f"Found NPC: {name} at page {entry.page}") # Process children if entry.children: process_entries(entry.children, depth + 1) process_entries(toc_entries) return npcs def extract_encounters(self, toc_entries: list[TOCEntry]) -> list[EncounterReference]: """Identify encounter entries in TOC. Patterns to detect: - "Encounter:" prefix - Area numbers (K1., Area 12, Room 5) - "Battle of X", "Fight at Y" Args: toc_entries: List of TOC entries from the library index Returns: List of EncounterReference objects """ encounters: list[EncounterReference] = [] chapter_stack: list[str] = [] location_stack: list[str] = [] def process_entries(entries: list[TOCEntry], depth: int = 0) -> None: for entry in entries: # Track chapter context if depth == 0: if len(chapter_stack) == 0: chapter_stack.append(entry.title) else: chapter_stack[0] = entry.title # Update current location if this looks like a location if self._matches_patterns(entry.title, self.LOCATION_PATTERNS): if len(location_stack) == 0: location_stack.append(entry.title) else: location_stack[0] = entry.title # Check if this entry looks like an encounter if self._matches_patterns(entry.title, self.ENCOUNTER_PATTERNS): # Determine encounter type encounter_type = "combat" if "social" in entry.title.lower(): encounter_type = "social" elif "puzzle" in entry.title.lower(): encounter_type = "puzzle" elif "exploration" in entry.title.lower(): encounter_type = "exploration" current_chapter = chapter_stack[0] if chapter_stack else "" current_location = location_stack[0] if location_stack else current_chapter encounter = EncounterReference( name=entry.title, location=current_location, chapter=current_chapter, page=entry.page, encounter_type=encounter_type, ) encounters.append(encounter) logger.debug(f"Found encounter: {entry.title} at page {entry.page}") # Process children if entry.children: process_entries(entry.children, depth + 1) process_entries(toc_entries) return encounters def extract_locations(self, toc_entries: list[TOCEntry]) -> list[LocationReference]: """Identify location/area entries in TOC. Patterns to detect: - "Areas of X" - "Map of X" - Chapter titles that are location names - Numbered areas within location chapters Args: toc_entries: List of TOC entries from the library index Returns: List of LocationReference objects """ locations: list[LocationReference] = [] chapter_stack: list[str] = [] def process_entries( entries: list[TOCEntry], parent_location: str | None = None, depth: int = 0 ) -> None: for entry in entries: # Track chapter context if depth == 0: if len(chapter_stack) == 0: chapter_stack.append(entry.title) else: chapter_stack[0] = entry.title # Check if this entry looks like a location if self._matches_patterns(entry.title, self.LOCATION_PATTERNS): current_chapter = chapter_stack[0] if chapter_stack else "" location = LocationReference( name=entry.title, chapter=current_chapter, page=entry.page, parent_location=parent_location, sub_locations=[child.title for child in entry.children], ) locations.append(location) logger.debug(f"Found location: {entry.title} at page {entry.page}") # Process children with this location as parent if entry.children: process_entries(entry.children, entry.title, depth + 1) else: # Process children without changing parent if entry.children: process_entries(entry.children, parent_location, depth + 1) process_entries(toc_entries) return locations def _matches_patterns(self, text: str, patterns: list[str]) -> bool: """Check if text matches any of the given regex patterns. Args: text: Text to check patterns: List of regex patterns Returns: True if any pattern matches """ for pattern in patterns: if re.search(pattern, text, re.IGNORECASE): return True return False

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

module_parser.py•12.3 KiB