DM20 Protocol

Overview Schema Related Servers Score Discussions

resolver.py•10.2 KiB

""" Term resolver with O(1) lookup and accent normalization. """ import logging import re import unicodedata from pathlib import Path from typing import TYPE_CHECKING import yaml from .models import TermEntry if TYPE_CHECKING: from dm20_protocol.rulebooks.manager import RulebookManager logger = logging.getLogger(__name__) class TermResolver: """Resolves bilingual D&D terms with O(1) lookup. Provides fast dictionary-based resolution of Italian and English term variants to canonical game entities. Handles accent normalization, case-insensitivity, and multi-word term matching. The resolver builds an internal lookup dictionary mapping all normalized variants (canonical, en, it_primary, all it_variants) to their TermEntry objects. Example: >>> resolver = TermResolver() >>> resolver.load_yaml(Path("core_terms.yaml")) >>> entry = resolver.resolve("palla di fuoco") >>> entry.canonical 'fireball' >>> matches = resolver.resolve_in_text("Lancio Fireball con Furtività") >>> [(text, entry.canonical) for text, entry in matches] [('Fireball', 'fireball'), ('Furtività', 'stealth')] """ def __init__(self) -> None: """Initialize an empty resolver.""" self._lookup: dict[str, TermEntry] = {} self._sorted_variants: list[tuple[str, str]] = [] # (normalized, original) sorted by length def _normalize(self, text: str) -> str: """Normalize text for accent-insensitive, case-insensitive matching. Uses Unicode NFD normalization to decompose accented characters, then strips combining marks. Also lowercases and strips whitespace. Args: text: Input text to normalize Returns: Normalized text (lowercase, no accents, stripped) Example: >>> resolver._normalize("Furtività") 'furtivita' >>> resolver._normalize(" PALLA DI FUOCO ") 'palla di fuoco' """ # Normalize to NFD form (decompose accents) nfkd = unicodedata.normalize("NFD", text.lower().strip()) # Strip combining marks (accents) return "".join(c for c in nfkd if not unicodedata.combining(c)) def load_yaml(self, path: Path) -> None: """Load term dictionary from YAML file. Parses the YAML file and builds the internal lookup dictionary. All variants (canonical, en, it_primary, it_variants) are mapped to their TermEntry for O(1) resolution. Expected YAML format: terms: - canonical: fireball category: spell en: Fireball it_primary: Palla di Fuoco it_variants: [Palla di fuoco, palla di fuoco] Args: path: Path to YAML file Raises: FileNotFoundError: If the YAML file doesn't exist yaml.YAMLError: If the YAML is malformed ValueError: If required fields are missing """ with open(path, "r", encoding="utf-8") as f: data = yaml.safe_load(f) if not data or "terms" not in data: raise ValueError("YAML file must contain a 'terms' key") # Clear existing data self._lookup.clear() self._sorted_variants.clear() for term_data in data["terms"]: entry = TermEntry(**term_data) # Collect all variants for this entry variants = [ entry.canonical, entry.en, entry.it_primary, *entry.it_variants, ] # Map normalized variants to entry for variant in variants: normalized = self._normalize(variant) if normalized: # Skip empty strings self._lookup[normalized] = entry # Store both normalized and original for text scanning self._sorted_variants.append((normalized, variant)) # Sort variants by length (longest first) for greedy matching # This ensures "palla di fuoco" matches before "palla" self._sorted_variants.sort(key=lambda x: len(x[0]), reverse=True) def resolve(self, text: str) -> TermEntry | None: """Resolve a single term to its TermEntry. Performs O(1) dictionary lookup after normalization. Returns None for unknown terms (no errors, graceful passthrough). Args: text: Term to resolve (any variant) Returns: TermEntry if found, None otherwise Example: >>> entry = resolver.resolve("furtivita") # accent-insensitive >>> entry.canonical 'stealth' >>> resolver.resolve("unknown_term") None """ normalized = self._normalize(text) return self._lookup.get(normalized) def resolve_in_text(self, text: str) -> list[tuple[str, TermEntry]]: """Find all known terms in a text string. Scans the input text for all known term variants and returns the original matched text spans with their resolved TermEntry objects. Handles multi-word terms (e.g., "Palla di Fuoco"). Uses greedy matching with longest-first strategy to handle overlapping terms. Args: text: Input text to scan Returns: List of (matched_text, TermEntry) tuples in order of appearance Example: >>> matches = resolver.resolve_in_text("Lancio Fireball con Furtività") >>> [(m, e.canonical) for m, e in matches] [('Fireball', 'fireball'), ('Furtività', 'stealth')] """ if not text: return [] results: list[tuple[str, TermEntry]] = [] normalized_text = self._normalize(text) # Track matched positions to avoid overlaps matched_positions: set[int] = set() # Try to match each variant (sorted longest-first) for normalized_variant, original_variant in self._sorted_variants: # Build regex pattern for word boundaries # Escape special regex chars pattern = re.escape(normalized_variant) # Use word boundaries for single words, space boundaries for multi-word if " " in pattern: pattern = r"\b" + pattern + r"\b" else: pattern = r"\b" + pattern + r"\b" # Find all matches in normalized text for match in re.finditer(pattern, normalized_text): start, end = match.span() # Check if this position overlaps with existing match if any(pos in matched_positions for pos in range(start, end)): continue # Mark positions as matched matched_positions.update(range(start, end)) # Extract original text (preserve case/accents) matched_text = text[start:end] # Resolve the term entry = self._lookup.get(normalized_variant) if entry: results.append((matched_text, entry)) # Sort by position in text results.sort(key=lambda x: text.find(x[0])) return results # Map source internal storage attributes to TermEntry categories _RULEBOOK_STORAGE_MAP: list[tuple[str, str]] = [ ("_spells", "spell"), ("_monsters", "monster"), ("_classes", "class"), ("_subclasses", "general"), ("_races", "race"), ("_subraces", "race"), ("_feats", "general"), ("_backgrounds", "general"), ("_items", "item"), ] def index_from_rulebook(self, manager: "RulebookManager") -> int: """Auto-index entity names from loaded rulebook sources. Iterates through all loaded sources in the RulebookManager and extracts entity names (spells, monsters, classes, races, items, etc.). Creates basic TermEntry objects for each, indexed for English-term recognition. Terms already present in the lookup dict (e.g., from YAML static dictionary) are skipped — curated translations always take priority. Auto-indexed terms have it_primary set equal to en (no Italian translation) since they are extracted programmatically without translation data. Args: manager: RulebookManager with loaded sources Returns: Count of newly indexed terms """ count = 0 for source in manager._sources.values(): if not source.is_loaded: continue for storage_attr, term_category in self._RULEBOOK_STORAGE_MAP: storage = getattr(source, storage_attr, None) if not storage: continue for index, definition in storage.items(): name = getattr(definition, "name", None) if not name: continue normalized_name = self._normalize(name) normalized_index = self._normalize(index) # Skip if already in lookup (static YAML terms take priority) if normalized_name in self._lookup or normalized_index in self._lookup: continue entry = TermEntry( canonical=index, category=term_category, en=name, it_primary=name, it_variants=[], ) # Index both the display name and the index key self._lookup[normalized_name] = entry self._sorted_variants.append((normalized_name, name)) if normalized_index != normalized_name: self._lookup[normalized_index] = entry self._sorted_variants.append((normalized_index, index)) count += 1 if count > 0: # Re-sort variants by length (longest first) for greedy matching self._sorted_variants.sort(key=lambda x: len(x[0]), reverse=True) logger.info("Indexed %d terms from rulebook sources", count) return count

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

resolver.py•10.2 KiB