Canadian Building Code MCP Server

02_IMPLEMENTATION.md•9.73 KiB

# 02. Implementation - Implementation Guide ## Development Environment Setup ### Required Packages ```bash # Developer side (map generation) pip install marker-pdf pymupdf # User side (MCP server) pip install mcp pymupdf ``` ### Directory Structure ``` canadian-building-code-mcp/ ├── scripts/ # Developer scripts │ ├── generate_map.py # structure_map generation │ ├── validate_map.py # Map validation │ └── extract_checksums.py # PDF hash extraction │ ├── maps/ # Distribution map files │ ├── obc_2024_map.json │ └── checksums.json │ ├── src/ # MCP server code │ ├── __init__.py │ ├── server.py # MCP server main │ ├── extractor.py # PDF text extraction │ ├── database.py # SQLite management │ └── config.py # Configuration │ └── tests/ └── test_extraction.py ``` --- ## Part 1: Map Generation (Developer Side) ### 1.1 Marker Execution and Coordinate Extraction ```python # scripts/generate_map.py import json import fitz # PyMuPDF from pathlib import Path from datetime import datetime import hashlib def calculate_pdf_hash(pdf_path: str) -> str: """Calculate MD5 hash of PDF file""" with open(pdf_path, 'rb') as f: return hashlib.md5(f.read()).hexdigest() def extract_structure_from_marker(marker_output_dir: str) -> dict: """ Extract structure information from Marker output - Uses polygon info from meta.json table_of_contents """ meta_path = Path(marker_output_dir) / "meta.json" with open(meta_path, 'r', encoding='utf-8') as f: meta = json.load(f) sections = [] for item in meta.get('table_of_contents', []): # Convert polygon to bbox (4 points → [x1, y1, x2, y2]) polygon = item.get('polygon', []) if len(polygon) >= 4: bbox = [ polygon[0][0], # x1 polygon[0][1], # y1 polygon[2][0], # x2 polygon[2][1] # y2 ] else: bbox = None sections.append({ "title": item.get('title', ''), "page": item.get('page_id', 1), "bbox": bbox, "heading_level": item.get('heading_level') }) return sections ``` ### 1.2 Map Validation ```python # scripts/validate_map.py import json import fitz def validate_map(map_path: str, pdf_path: str) -> dict: """ Validate that structure_map matches the PDF """ with open(map_path, 'r') as f: structure_map = json.load(f) doc = fitz.open(pdf_path) results = { "total": len(structure_map['sections']), "valid": 0, "invalid": 0, "errors": [] } for section in structure_map['sections'][:50]: # Sample validation try: page = doc[section['page'] - 1] # 0-indexed if section['bbox']: rect = fitz.Rect(section['bbox']) text = page.get_text("text", clip=rect) # Check if ID is in extracted text if section['id'] in text or section.get('title', '') in text: results['valid'] += 1 else: results['invalid'] += 1 results['errors'].append({ "id": section['id'], "expected": section.get('title', ''), "got": text[:100] }) except Exception as e: results['invalid'] += 1 results['errors'].append({ "id": section['id'], "error": str(e) }) return results ``` --- ## Part 2: MCP Server (User Side) ### 2.1 PDF Text Extractor ```python # src/extractor.py import fitz import hashlib from pathlib import Path from typing import Optional class PDFExtractor: def __init__(self, pdf_path: str, structure_map: dict): self.pdf_path = pdf_path self.structure_map = structure_map self.doc = None self.mode = "fast" # or "slow" def verify_pdf(self) -> bool: """Verify PDF hash""" expected_hash = self.structure_map.get('pdf_hash') with open(self.pdf_path, 'rb') as f: actual_hash = hashlib.md5(f.read()).hexdigest() if actual_hash == expected_hash: self.mode = "fast" return True else: self.mode = "slow" print(f"Warning: PDF hash mismatch. Using slow mode.") return False def extract_section(self, section_id: str) -> Optional[str]: """Extract specific section text""" section = None for s in self.structure_map['sections']: if s['id'] == section_id: section = s break if not section: return None if self.mode == "fast" and section.get('bbox'): return self._extract_by_bbox(section) else: return self._extract_by_pattern(section) def _extract_by_bbox(self, section: dict) -> str: """Coordinate-based extraction (Fast Mode)""" page = self.doc[section['page'] - 1] rect = fitz.Rect(section['bbox']) return page.get_text("text", clip=rect).strip() ``` ### 2.2 Local Database ```python # src/database.py import sqlite3 from pathlib import Path class LocalDB: def __init__(self, db_path: str = "~/.building-code-mcp/codes.db"): self.db_path = Path(db_path).expanduser() self.db_path.parent.mkdir(parents=True, exist_ok=True) self.conn = None def connect(self): self.conn = sqlite3.connect(str(self.db_path)) self._create_tables() def _create_tables(self): cursor = self.conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS sections ( id TEXT PRIMARY KEY, code TEXT, type TEXT, title TEXT, content TEXT, page INTEGER, parent_id TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """) cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS sections_fts USING fts5(id, title, content, tokenize='porter') """) self.conn.commit() def search(self, query: str, code: str = None, limit: int = 10) -> list: cursor = self.conn.cursor() sql = """ SELECT s.id, s.title, s.type, s.page, snippet(sections_fts, 2, '<mark>', '</mark>', '...', 32) FROM sections_fts JOIN sections s ON sections_fts.id = s.id WHERE sections_fts MATCH ? """ params = [query] if code: sql += " AND s.code = ?" params.append(code) sql += " LIMIT ?" params.append(limit) cursor.execute(sql, params) return cursor.fetchall() ``` ### 2.3 MCP Server ```python # src/server.py from mcp.server import Server from mcp.types import Tool, TextContent import json from pathlib import Path from .database import LocalDB from .extractor import PDFExtractor server = Server("canadian-building-code") db = LocalDB() @server.tool() async def setup_code(pdf_path: str, code: str = "OBC") -> str: """ Set up and index the Building Code PDF. Args: pdf_path: PDF file path code: Code type (OBC, NBC, BCBC, etc.) """ # Load map map_path = Path(__file__).parent.parent / f"maps/{code.lower()}_2024_map.json" if not map_path.exists(): return f"Error: Map for {code} not found" with open(map_path, 'r') as f: structure_map = json.load(f) # Initialize extractor extractor = PDFExtractor(pdf_path, structure_map) extractor.verify_pdf() extractor.open() # Extract text and save to DB db.connect() sections = extractor.extract_all() for section_id, data in sections.items(): db.insert_section({ "id": section_id, **data }, code) extractor.close() return f"Successfully indexed {len(sections)} sections from {code}" @server.tool() async def search_code(query: str, code: str = None) -> str: """ Search the Building Code. Args: query: Search terms code: Limit to specific code (optional) """ db.connect() results = db.search(query, code) if not results: return "No results found" output = [] for r in results: output.append(f"**{r[0]}** ({r[2]}, p.{r[3]})\n{r[4]}") return "\n\n---\n\n".join(output) @server.tool() async def get_section(section_id: str) -> str: """ Get the full content of a specific section. Args: section_id: Section ID (e.g., 9.8.2.1) """ db.connect() section = db.get_section(section_id) if not section: return f"Section {section_id} not found" return f"""# {section['id']} - {section['title']} **Type:** {section['type']} **Page:** {section['page']} **Code:** {section['code']} --- {section['content']} """ ``` --- ## Part 3: User Configuration ### Claude Desktop Configuration ```json // claude_desktop_config.json { "mcpServers": { "building-code": { "command": "python", "args": ["-m", "canadian_building_code_mcp.server"], "env": { "PDF_PATH": "C:/path/to/OBC_2024.pdf" } } } } ``` ### First Run ``` User: "Set up the OBC PDF" Claude: [calls setup_code] "Successfully indexed 1,247 sections from OBC" User: "Tell me the stair width requirements" Claude: [calls search_code("stair width")] "According to 9.8.2.1..." ``` --- ## Next Document → [03_LEGAL.md](./03_LEGAL.md) - Legal Analysis

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DavidCho1999/Canada-AEC-Code-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

02_IMPLEMENTATION.md•9.73 KiB