SF Permits MCP Server

intent_router.py•18.9 KiB

"""Intent router — classify free-text queries and extract entities. Rule-based intent classification with priority ordering. No LLM calls. Used by the /ask endpoint to route conversational search box queries. Intents (priority order): 0. draft_response — conversational / email-style messages (pasted emails, greetings, multi-line questions, signatures) 1. lookup_permit — permit number detected 2. search_complaint — complaint/violation keywords 3. search_parcel — block/lot pattern 3.5 validate_plans — validation keywords 4. search_address — street address pattern 4.5 draft_response — long question fallback (>150 chars with ?) 5. search_person — person/company name search 6. analyze_project — project description with action verbs 7. general_question — fallback """ from __future__ import annotations import re from dataclasses import dataclass, field from difflib import get_close_matches # --------------------------------------------------------------------------- # Data structures # --------------------------------------------------------------------------- @dataclass class IntentResult: """Result of intent classification.""" intent: str confidence: float entities: dict = field(default_factory=dict) # --------------------------------------------------------------------------- # Regex patterns # --------------------------------------------------------------------------- # Priority 1: Permit numbers (9–15 digits, or letter + 6+ digits) PERMIT_NUMBER_RE = re.compile( r'\b(\d{9,15})\b' r'|' r'\b([A-Z]\d{6,})\b', re.IGNORECASE, ) # Priority 2: Block/lot BLOCK_LOT_RE = re.compile( r'block\s*(\d{3,5})\s*[,/]?\s*lot\s*(\d{1,4})', re.IGNORECASE, ) # Priority 2: Complaint/enforcement keywords # Use regex for word-boundary matching to avoid false positives # (e.g., "nov" matching inside "renovate") COMPLAINT_KEYWORDS_RE = re.compile( r'\b(?:complaints?|violations?|enforcement|cited|abated|' r'code\s+violations?|building\s+complaints?|notice\s+of\s+violations?|' r'inspection\s+results?|failed\s+inspections?|nov)\b', re.IGNORECASE, ) # Complaint number pattern: 9 digits starting with year (20XXXXXXX) COMPLAINT_NUMBER_RE = re.compile(r'\b(20\d{7})\b') # Priority 3.5: Validate keywords VALIDATE_KEYWORDS = [ "validate", "check my plans", "check plans", "epr compliance", "plan set", "upload pdf", "validate pdf", "check pdf", "epr check", "plan review compliance", ] # Priority 4: Address pattern # Two-pass approach: try with suffix first (greedy name), then bare number+name # NOTE: Street names can start with digits (e.g., "16th Ave", "3rd St", "22nd Blvd") ADDRESS_WITH_SUFFIX_RE = re.compile( r'(\d{1,5})\s+' r'(\d{0,3}(?:st|nd|rd|th)\s+|[A-Za-z][A-Za-z\s]{1,30}?)\s*' r'(St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Rd|Road|Dr(?:ive)?' r'|Way|Ct|Court|Ln|Lane|Pl(?:ace)?|Ter(?:race)?)\.?\b', re.IGNORECASE, ) # Bare address: number + word, excluding measurement words # Also support numbered streets (e.g., "723 16th") ADDRESS_BARE_RE = re.compile( r'(\d{1,5})\s+([A-Za-z][A-Za-z]{2,20}|\d{1,3}(?:st|nd|rd|th))', re.IGNORECASE, ) # Words that look like addresses but aren't _NOT_STREET_NAMES = { "sqft", "sq", "square", "feet", "budget", "cost", "dollars", "units", "unit", "stories", "story", "floors", "floor", "rooms", "room", "baths", "bath", "beds", "bed", "year", "years", "month", "months", "day", "days", "percent", "people", "seats", "per", } # Address-intent signal phrases ADDRESS_SIGNALS = [ "permits at", "permits for", "find permits", "search permits", "what's happening at", "what's going on at", "look up", "what's at", "show me", "anything at", ] # Pattern to strip city/state/zip/country tail from pasted mailing addresses # e.g., "146 Lake St 1425 San Francisco, CA 94118 US" → "146 Lake St 1425" _MAILING_TAIL_RE = re.compile( r',?\s*(?:San\s+Francisco|SF)\s*' # city r'(?:,?\s*CA(?:lifornia)?)?\s*' # state r'(?:\d{5}(?:-\d{4})?)?\s*' # zip r'(?:US|USA|United\s+States)?\s*$', # country re.IGNORECASE, ) # Unit / apt / suite / # numbers that can appear after the street suffix _UNIT_RE = re.compile( r'\s+(?:#|apt\.?|suite|ste\.?|unit|fl(?:oor)?\.?)\s*\w+', re.IGNORECASE, ) # Bare trailing 3-5 digit numbers after a street suffix (e.g., "146 Lake St 1425") _TRAILING_UNIT_RE = re.compile( r'((?:St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Rd|Road|Dr(?:ive)?' r'|Way|Ct|Court|Ln|Lane|Pl(?:ace)?|Ter(?:race)?)\.?)' r'\s+(\d{1,5})\s*$', re.IGNORECASE, ) # Priority 5: Person search patterns PERSON_PATTERNS = [ # "projects by John Smith" / "permits for Amy Lee" re.compile(r"(?:projects?|permits?|work|portfolio)\s+(?:by|for|of)\s+(.+)", re.IGNORECASE), # "find contractor Bob" / "search architect Jane" / "show expediter Tom" re.compile(r"(?:find|search|show|look\s*up)\s+(?:contractor|architect|engineer|consultant|expediter|owner)?\s*(.+)", re.IGNORECASE), # "Amy Lee's projects" / "Smith Construction's permits" re.compile(r"(.+?)(?:'s)\s+(?:projects?|permits?|work|portfolio|jobs?)", re.IGNORECASE), # "who is John Smith" / "tell me about Amy Lee" re.compile(r"(?:who\s+is|tell\s+me\s+about|info\s+on|details?\s+(?:on|for))\s+(.+)", re.IGNORECASE), ] # Roles that can appear in person search queries PERSON_ROLES = ["contractor", "architect", "engineer", "consultant", "owner", "designer"] # Common misspellings of roles — map to canonical form _ROLE_TYPOS = { "expiditer": "consultant", "expeditor": "consultant", "expiditor": "consultant", "expediter": "consultant", # old term maps to new canonical "architech": "architect", "architecht": "architect", "enginnier": "engineer", "enginner": "engineer", "contractr": "contractor", "contracter": "contractor", "desinger": "designer", "desginer": "designer", } # Words to strip from the start/end of extracted person names _NAME_NOISE_LEADING = re.compile( r'^(?:me|the|a|an|all|my|about|info\s+on|details?\s+on)\s+', re.IGNORECASE, ) _NAME_NOISE_TRAILING = re.compile( r"\s+(?:projects?|permits?|work|portfolio|jobs?|details?|info)$", re.IGNORECASE, ) # Priority 6: Analyze project signals ANALYZE_SIGNALS = [ "renovate", "remodel", "build", "construct", "convert", "install", "add", "replace", "retrofit", "upgrade", "i want to", "planning to", "how much will", "what permits do i need", "kitchen", "bathroom", "adu", "garage", "basement", "commercial", "restaurant", "tenant improvement", "new construction", "demolition", "addition", "solar", "seismic", "sprinkler", "elevator", ] # Priority 3.7 / 4.5: Draft response / email-style query # Explicit prefix commands — ALWAYS route to draft regardless of length DRAFT_EXPLICIT_RE = re.compile( r'^(?:draft|reply\s+to|respond\s+to):', re.IGNORECASE, ) # Broader signals — require word_count > 12 to avoid short address queries DRAFT_SIGNALS_RE = re.compile( r'(?:^(?:hi|hello|hey|dear)\b|' # greeting start r'client\s+(?:is\s+)?asking|' # "client is asking" r'homeowner\s+(?:wants|needs|asked)|' # "homeowner wants" r'^(?:draft|reply\s+to|respond\s+to):|' # explicit prefix r'how\s+(?:should|would|do)\s+(?:I|we)\s+respond|' # "how should I respond" r'(?:they|he|she)\s+(?:asked|wants?\s+to\s+know))', # "they asked" re.IGNORECASE | re.MULTILINE, ) # Entity extraction patterns COST_RE = re.compile(r'\$\s*([\d,]+)\s*([kK])?') SQFT_RE = re.compile(r'(\d[\d,]*)\s*(?:sq\.?\s*ft\.?|square\s*feet|sqft|sf)\b', re.IGNORECASE) # --------------------------------------------------------------------------- # Intent classification # --------------------------------------------------------------------------- def classify(text: str, neighborhoods: list[str] | None = None) -> IntentResult: """Classify a free-text query into an intent with extracted entities. Args: text: Raw user input from the search box. neighborhoods: Optional list of valid neighborhood names for fuzzy matching. Returns: IntentResult with intent name, confidence, and extracted entities. """ text = text.strip() if not text: return IntentResult(intent="general_question", confidence=0.0, entities={"query": ""}) text_lower = text.lower() word_count = len(text.split()) line_count = text.count('\n') + 1 # --- Priority 0: Conversational / email-style messages --- # Multi-line messages with greetings, signatures, or email structure # should ALWAYS route to draft_response, even if they contain keywords # like "complaint", "violation", "remodel", etc. These are pasted # emails or conversational questions, not search queries. has_explicit_draft = bool(DRAFT_EXPLICIT_RE.search(text)) has_draft_signal = bool(DRAFT_SIGNALS_RE.search(text)) is_multiline = line_count >= 3 has_greeting = bool(re.match(r'^(?:hi|hello|hey|dear)\b', text_lower)) has_signature = bool(re.search( r'(?:^[\u2014—-]{1,3}\s*\w|' # "— Karen" or "- Karen" r'(?:regards|thanks|sincerely|best|cheers),?\s*$|' # sign-off r'^sent\s+from\s+my\b)', # mobile signature text, re.IGNORECASE | re.MULTILINE, )) is_long = len(text) > 150 # Route to draft if the text looks like a conversation / pasted email: # - Explicit "draft:" / "reply to:" prefix → always # - Multi-line + greeting or signature → always (pasted email) # - Greeting + long text (>150 chars) → always (conversational question) # - Draft signal keywords + enough words (>12) → likely a question for AI if has_explicit_draft: return IntentResult( intent="draft_response", confidence=0.95, entities={"query": text}, ) if is_multiline and (has_greeting or has_signature): return IntentResult( intent="draft_response", confidence=0.90, entities={"query": text}, ) if has_greeting and is_long: return IntentResult( intent="draft_response", confidence=0.85, entities={"query": text}, ) if has_draft_signal and word_count > 12: return IntentResult( intent="draft_response", confidence=0.80, entities={"query": text}, ) # --- Priority 1: Permit number --- m = PERMIT_NUMBER_RE.search(text) if m: permit_num = m.group(1) or m.group(2) return IntentResult( intent="lookup_permit", confidence=0.95, entities={"permit_number": permit_num}, ) # --- Priority 2: Complaint / enforcement search --- # Check before block/lot so "violations on block 2920 lot 020" routes to complaints has_complaint_keyword = bool(COMPLAINT_KEYWORDS_RE.search(text_lower)) complaint_num_match = COMPLAINT_NUMBER_RE.search(text) if has_complaint_keyword: complaint_entities: dict = {} if complaint_num_match: complaint_entities["complaint_number"] = complaint_num_match.group(1) # Try to extract address for complaint search addr_suffix_m = ADDRESS_WITH_SUFFIX_RE.search(text) addr_m = addr_suffix_m or ADDRESS_BARE_RE.search(text) if addr_m: sn = addr_m.group(1) if sn: complaint_entities["street_number"] = sn street = addr_m.group(2).strip() # Include suffix (Dr, Ave, St, etc.) when available if addr_suffix_m and addr_m is addr_suffix_m: suffix = addr_m.group(3) if suffix: street = f"{street} {suffix}" if street and street.lower() not in _NOT_STREET_NAMES: complaint_entities["street_name"] = street # Try block/lot bl_m = BLOCK_LOT_RE.search(text) if bl_m: complaint_entities["block"] = bl_m.group(1) complaint_entities["lot"] = bl_m.group(2) return IntentResult( intent="search_complaint", confidence=0.9, entities=complaint_entities, ) # --- Priority 3: Block/lot --- m = BLOCK_LOT_RE.search(text) if m: return IntentResult( intent="search_parcel", confidence=0.9, entities={"block": m.group(1), "lot": m.group(2)}, ) # --- Priority 3.5: Validate plans --- if any(kw in text_lower for kw in VALIDATE_KEYWORDS): return IntentResult( intent="validate_plans", confidence=0.85, entities={}, ) # --- Priority 4: Address search --- has_address_signal = any(sig in text_lower for sig in ADDRESS_SIGNALS) is_short = word_count <= 8 # reuse word_count from Priority 0 # Pre-clean: strip mailing address tails and unit numbers so # "146 Lake St 1425 San Francisco, CA 94118 US" → "146 Lake St" addr_text = _MAILING_TAIL_RE.sub('', text).strip() addr_text = _UNIT_RE.sub('', addr_text).strip() m_trailing = _TRAILING_UNIT_RE.search(addr_text) if m_trailing: # Remove bare trailing unit number after suffix: "Lake St 1425" → "Lake St" addr_text = addr_text[:m_trailing.start(2)].strip() # Try with suffix first (e.g., "123 Main St"). # A street suffix IS a signal — no need for word-count or signal-phrase gates. has_suffix = ADDRESS_WITH_SUFFIX_RE.search(addr_text) # Bare address (e.g., "456 Market") still needs a gate. m = has_suffix or ( ADDRESS_BARE_RE.search(addr_text) if (has_address_signal or is_short) else None ) if m and (has_suffix or has_address_signal or is_short): street_number = m.group(1) street_name = m.group(2).strip() # Reject measurement words that look like street names if street_name.lower() in _NOT_STREET_NAMES: m = None if m and (has_suffix or has_address_signal or is_short): street_number = m.group(1) street_name = m.group(2).strip() # Append the street suffix (Dr, Ave, St, etc.) when matched by # ADDRESS_WITH_SUFFIX_RE so primary address saves the full name. # group(3) only exists on the suffix regex, not ADDRESS_BARE_RE. if has_suffix and m is has_suffix: suffix = m.group(3) if suffix: street_name = f"{street_name} {suffix}" # Clean trailing prepositions/articles street_name = re.sub( r'\s+(?:in|at|for|near|of|the|and|or|with)$', '', street_name, flags=re.IGNORECASE, ) street_name = street_name.strip() if street_name and len(street_name) >= 2: return IntentResult( intent="search_address", confidence=0.85, entities={"street_number": street_number, "street_name": street_name}, ) # --- Priority 4.5: Draft response fallback --- # Long questions (>150 chars with ?) that didn't match earlier patterns is_long_question = is_long and '?' in text if has_draft_signal or is_long_question: return IntentResult( intent="draft_response", confidence=0.75, entities={"query": text}, ) # --- Priority 5: Person search --- for pattern in PERSON_PATTERNS: m = pattern.search(text) if m: name = m.group(1).strip() # Extract optional role (check canonical + misspellings) role = None for r in PERSON_ROLES: if r in text_lower: role = r name = re.sub(r'\b' + r + r'\b', '', name, flags=re.IGNORECASE).strip() break if role is None: for typo, canonical in _ROLE_TYPOS.items(): if typo in text_lower: role = canonical name = re.sub(r'\b' + typo + r'\b', '', name, flags=re.IGNORECASE).strip() break # Strip leading/trailing noise words name = _NAME_NOISE_LEADING.sub('', name) name = _NAME_NOISE_TRAILING.sub('', name) # Clean trailing punctuation and possessives name = name.rstrip(".,;:!?") name = re.sub(r"'s$", '', name).strip() if name and len(name) >= 2: return IntentResult( intent="search_person", confidence=0.8, entities={"person_name": name, "role": role}, ) # --- Priority 6: Analyze project --- analyze_score = sum(1 for sig in ANALYZE_SIGNALS if sig in text_lower) if analyze_score >= 1 and len(text.split()) >= 4: entities = _extract_project_entities(text, neighborhoods) return IntentResult( intent="analyze_project", confidence=min(0.5 + analyze_score * 0.1, 0.9), entities=entities, ) # --- Priority 7: General question --- return IntentResult( intent="general_question", confidence=0.5, entities={"query": text}, ) # --------------------------------------------------------------------------- # Entity extraction helpers # --------------------------------------------------------------------------- def _extract_project_entities(text: str, neighborhoods: list[str] | None = None) -> dict: """Extract structured fields from a project description.""" entities: dict = {"description": text} # Cost m = COST_RE.search(text) if m: cost_str = m.group(1).replace(",", "") cost = float(cost_str) if m.group(2): # k/K suffix cost *= 1000 entities["estimated_cost"] = cost # Square footage m = SQFT_RE.search(text) if m: entities["square_footage"] = float(m.group(1).replace(",", "")) # Neighborhood if neighborhoods: entities["neighborhood"] = _match_neighborhood(text, neighborhoods) return entities def _match_neighborhood(text: str, neighborhoods: list[str]) -> str | None: """Fuzzy match a neighborhood name from text. First tries direct substring match (case-insensitive), then falls back to difflib fuzzy matching on individual words. """ text_lower = text.lower() # Direct substring match (longest first to avoid "Mission" matching before "Mission Bay") sorted_hoods = sorted( [n for n in neighborhoods if n], key=len, reverse=True, ) for n in sorted_hoods: if n.lower() in text_lower: return n # Fuzzy match on words (for "Noe" -> "Noe Valley", etc.) words = text.split() hood_lowers = [n.lower() for n in neighborhoods if n] for word in words: if len(word) < 3: continue matches = get_close_matches(word.lower(), hood_lowers, n=1, cutoff=0.8) if matches: for n in neighborhoods: if n and n.lower() == matches[0]: return n return None

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tbrennem-source/sf-permits-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

intent_router.py•18.9 KiB