Canadian Building Code MCP Server

verifier.py•23 KiB

# verifier.py - GPT Building Code 응답 검증 도구 # extractor.py 재사용 + 검증 전용 함수 import re import json import difflib from pathlib import Path # extractor.py 함수 재사용 try: from extractor import search_json, extract_section, extract_sections_batch, extract_table except ImportError: # 독립 실행 시 경로 추가 import sys sys.path.insert(0, str(Path(__file__).parent)) from extractor import search_json, extract_section, extract_sections_batch, extract_table # ========== 정규식 패턴 ========== # **NBC Section 4.1.3.2** - Title (p.452) SECTION_PATTERN = re.compile( r'\*\*(\w+)\s+Section\s+([\d\.]+)\*\*\s*[-–]\s*(.+?)\s*\(p\.?(\d+)\)', re.IGNORECASE ) # **NBC Table 4.1.5.3** - Live Loads (p.452) TABLE_PATTERN = re.compile( r'\*\*(\w+)\s+Table\s+([\d\.]+)\*\*\s*[-–]\s*(.+?)\s*\(p\.?(\d+)\)', re.IGNORECASE ) # > "quoted text..." or > 'quoted text...' QUOTE_PATTERN = re.compile(r'>\s*["\'](.+?)["\']', re.DOTALL) # ========== 1. GPT 응답 파싱 ========== def parse_gpt_response(text: str) -> list: """ GPT 응답에서 섹션/테이블 참조를 파싱 입력 형식 예시: **NBC Section 4.1.3.2** - Strength and Stability (p.452) > "A building and its structural components shall be designed..." **NBC Table 4.1.5.3** - Live Loads on Floors (p.452) 반환: [ { "type": "section", "code": "NBC", "id": "4.1.3.2", "title": "Strength and Stability", "page_claimed": 452, "quote": "A building and its structural components...", "raw_match": "원본 매치" } ] """ results = [] lines = text.split('\n') i = 0 while i < len(lines): line = lines[i] # Section 패턴 매칭 section_match = SECTION_PATTERN.search(line) if section_match: ref = { "type": "section", "code": section_match.group(1).upper(), "id": section_match.group(2), "title": section_match.group(3).strip(), "page_claimed": int(section_match.group(4)), "quote": None, "raw_match": section_match.group(0) } # 다음 줄에서 인용문 찾기 if i + 1 < len(lines): quote_match = QUOTE_PATTERN.search(lines[i + 1]) if quote_match: ref["quote"] = quote_match.group(1).strip() i += 1 results.append(ref) # Table 패턴 매칭 table_match = TABLE_PATTERN.search(line) if table_match: ref = { "type": "table", "code": table_match.group(1).upper(), "id": f"Table-{table_match.group(2)}", "title": table_match.group(3).strip(), "page_claimed": int(table_match.group(4)), "quote": None, "raw_match": table_match.group(0) } results.append(ref) i += 1 return results # ========== 2. JSON 검증 ========== def verify_reference_exists(json_data: dict, ref_type: str, ref_id: str) -> dict: """ JSON에서 섹션 또는 테이블 존재 확인 Args: json_data: 로드된 JSON 데이터 ref_type: "section" | "table" ref_id: 섹션/테이블 ID (예: "4.1.3.2" 또는 "Table-4.1.5.3") Returns: { "exists": True/False, "data": {...} or None, "similar_ids": ["4.1.3.1", "4.1.3.3"] } """ result = { "exists": False, "data": None, "similar_ids": [] } if ref_type == "section": sections = json_data.get("sections", []) # 직접 매칭 시도 for section in sections: if section.get("id") == ref_id: result["exists"] = True result["data"] = section return result # Division prefix 추가 시도 (B-4.1.3.2, A-4.1.3.2 등) for prefix in ["B-", "A-", "C-", ""]: prefixed_id = f"{prefix}{ref_id}" for section in sections: if section.get("id") == prefixed_id: result["exists"] = True result["data"] = section result["matched_id"] = prefixed_id # 실제 매칭된 ID 기록 return result # 유사한 ID 찾기 result["similar_ids"] = find_similar_ids( [s.get("id", "") for s in sections], ref_id ) elif ref_type == "table": tables = json_data.get("tables", []) # Table- 접두사 정규화 normalized_id = ref_id if ref_id.startswith("Table-") else f"Table-{ref_id}" for table in tables: if table.get("id") == normalized_id: result["exists"] = True result["data"] = table return result # 유사한 ID 찾기 result["similar_ids"] = find_similar_ids( [t.get("id", "") for t in tables], normalized_id ) return result def verify_page_match(json_data: dict, ref_type: str, ref_id: str, claimed_page: int) -> dict: """ 페이지 번호 일치 확인 Returns: { "match": True/False, "json_page": 450, "claimed_page": 452, "difference": 2 } """ result = { "match": False, "json_page": None, "claimed_page": claimed_page, "difference": None } # 먼저 존재 확인 ref_result = verify_reference_exists(json_data, ref_type, ref_id) if not ref_result["exists"]: return result json_page = ref_result["data"].get("page") result["json_page"] = json_page result["match"] = (json_page == claimed_page) result["difference"] = abs(json_page - claimed_page) if json_page else None return result def find_similar_ids(all_ids: list, target_id: str, limit: int = 5) -> list: """유사한 ID 찾기 (difflib 사용)""" matches = difflib.get_close_matches(target_id, all_ids, n=limit, cutoff=0.6) return matches # ========== 3. PDF 텍스트 검증 ========== def verify_quote(pdf_path: str, page: int, section_id: str, claimed_quote: str) -> dict: """ PDF에서 실제 텍스트 추출 후 인용문 비교 Returns: { "match": True/False, "match_ratio": 0.85, "actual_text": "실제 추출된 텍스트", "claimed_quote": "GPT가 인용한 텍스트", "extraction_success": True/False } """ result = { "match": False, "match_ratio": 0.0, "actual_text": None, "claimed_quote": claimed_quote, "extraction_success": False } if not claimed_quote: result["match"] = True # 인용문 없으면 스킵 return result try: actual_text = extract_section(pdf_path, page, section_id) result["actual_text"] = actual_text[:500] if actual_text else None # 500자 제한 result["extraction_success"] = bool(actual_text and "Error" not in actual_text) if result["extraction_success"]: result["match_ratio"] = calculate_similarity(claimed_quote, actual_text) result["match"] = result["match_ratio"] >= 0.7 # 70% 이상이면 일치 except Exception as e: result["actual_text"] = f"Error: {e}" return result def calculate_similarity(text1: str, text2: str) -> float: """두 텍스트의 유사도 계산 (0-1)""" if not text1 or not text2: return 0.0 # 정규화: 소문자, 공백 정리 t1 = ' '.join(text1.lower().split()) t2 = ' '.join(text2.lower().split()) # SequenceMatcher로 유사도 계산 return difflib.SequenceMatcher(None, t1, t2).ratio() # ========== 4. 종합 검증 ========== def verify_gpt_response(gpt_text: str, maps_dir: str, sources_dir: str = None) -> dict: """ GPT 응답 종합 검증 (메인 함수) Args: gpt_text: GPT 응답 텍스트 maps_dir: maps/ 폴더 경로 sources_dir: sources/ 폴더 경로 (PDF, None이면 텍스트 비교 스킵) Returns: { "summary": { "total_references": 5, "passed": 3, "issues": 2, "pass_rate": 0.6 }, "details": [...], "recommendations": [...] } """ result = { "summary": { "total_references": 0, "passed": 0, "issues": 0, "pass_rate": 0.0 }, "details": [], "recommendations": [] } # 1. GPT 응답 파싱 references = parse_gpt_response(gpt_text) result["summary"]["total_references"] = len(references) if not references: result["recommendations"].append("No section/table references found in GPT response") return result # 2. 각 참조 검증 for ref in references: detail = { "type": ref["type"], "code": ref["code"], "id": ref["id"], "title": ref["title"], "checks": {}, "status": "PASS" } # JSON 로드 json_path = get_code_json_path(ref["code"], maps_dir) if not json_path: detail["checks"]["json_load"] = {"pass": False, "error": f"JSON not found for {ref['code']}"} detail["status"] = "FAIL" result["details"].append(detail) result["summary"]["issues"] += 1 result["recommendations"].append(f"JSON file not found for code: {ref['code']}") continue try: with open(json_path, 'r', encoding='utf-8') as f: json_data = json.load(f) except Exception as e: detail["checks"]["json_load"] = {"pass": False, "error": str(e)} detail["status"] = "FAIL" result["details"].append(detail) result["summary"]["issues"] += 1 continue has_issue = False # 2a. 존재 확인 exists_result = verify_reference_exists(json_data, ref["type"], ref["id"]) detail["checks"]["exists"] = { "pass": exists_result["exists"], "similar_ids": exists_result["similar_ids"] } if not exists_result["exists"]: has_issue = True if exists_result["similar_ids"]: result["recommendations"].append( f"{ref['type'].title()} {ref['id']} not found. Did you mean: {', '.join(exists_result['similar_ids'][:3])}?" ) else: result["recommendations"].append(f"{ref['type'].title()} {ref['id']} not found in {ref['code']}") # 2b. 페이지 확인 page_result = verify_page_match(json_data, ref["type"], ref["id"], ref["page_claimed"]) detail["checks"]["page"] = { "pass": page_result["match"], "json_page": page_result["json_page"], "claimed_page": page_result["claimed_page"] } if not page_result["match"] and page_result["json_page"]: has_issue = True result["recommendations"].append( f"{ref['type'].title()} {ref['id']}: Page should be {page_result['json_page']}, not {page_result['claimed_page']}" ) # 2c. 텍스트 비교 (섹션만, PDF 있을 때) if ref["type"] == "section" and sources_dir and ref.get("quote"): pdf_path = get_pdf_path(ref["code"], json_data, sources_dir) if pdf_path: quote_result = verify_quote( pdf_path, page_result["json_page"] or ref["page_claimed"], ref["id"], ref["quote"] ) detail["checks"]["quote"] = { "pass": quote_result["match"], "similarity": round(quote_result["match_ratio"], 2), "extraction_success": quote_result["extraction_success"] } if not quote_result["match"] and quote_result["extraction_success"]: has_issue = True result["recommendations"].append( f"Quote mismatch for {ref['id']} (similarity: {quote_result['match_ratio']:.0%})" ) # 상태 결정 if has_issue: # 모든 체크 실패하면 FAIL, 일부만 실패하면 PARTIAL all_failed = all(not c.get("pass", True) for c in detail["checks"].values()) detail["status"] = "FAIL" if all_failed else "PARTIAL" result["summary"]["issues"] += 1 else: result["summary"]["passed"] += 1 result["details"].append(detail) # 통과율 계산 total = result["summary"]["total_references"] if total > 0: result["summary"]["pass_rate"] = result["summary"]["passed"] / total return result # ========== 5. JSON 무결성 검증 ========== def validate_json_structure(json_path: str) -> dict: """ JSON 파일 구조 검증 Returns: { "valid": True/False, "checks": { "required_keys": {"pass": True, "missing": []}, "sections_structure": {"pass": True, "errors": []}, "tables_structure": {"pass": True, "errors": []} }, "stats": {"sections": 3000, "tables": 230} } """ result = { "valid": True, "checks": {}, "stats": {} } try: with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) except Exception as e: result["valid"] = False result["checks"]["load"] = {"pass": False, "error": str(e)} return result # 필수 키 확인 required_keys = ["code", "version", "sections"] missing = [k for k in required_keys if k not in data] result["checks"]["required_keys"] = { "pass": len(missing) == 0, "missing": missing } if missing: result["valid"] = False # sections 구조 확인 sections = data.get("sections", []) section_errors = [] for i, s in enumerate(sections[:100]): # 처음 100개만 샘플 검사 if not s.get("id"): section_errors.append(f"Section {i}: missing 'id'") if not s.get("page"): section_errors.append(f"Section {i} ({s.get('id', '?')}): missing 'page'") result["checks"]["sections_structure"] = { "pass": len(section_errors) == 0, "errors": section_errors[:10] # 최대 10개만 } result["stats"]["sections"] = len(sections) # tables 구조 확인 tables = data.get("tables", []) table_errors = [] for i, t in enumerate(tables[:50]): # 처음 50개만 샘플 검사 if not t.get("id"): table_errors.append(f"Table {i}: missing 'id'") if not t.get("markdown"): table_errors.append(f"Table {i} ({t.get('id', '?')}): missing 'markdown'") result["checks"]["tables_structure"] = { "pass": len(table_errors) == 0, "errors": table_errors[:10] } result["stats"]["tables"] = len(tables) if section_errors or table_errors: result["valid"] = False return result def test_extractor_functions(json_path: str, pdf_path: str = None) -> dict: """ extractor.py 함수 테스트 Returns: { "all_pass": True/False, "tests": { "search_json": {"pass": True, "result": "5 results"}, "extract_section": {"pass": True, "result": "extracted 500 chars"}, "extract_table": {"pass": True, "result": "markdown found"} } } """ result = { "all_pass": True, "tests": {} } try: with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) except Exception as e: result["all_pass"] = False result["tests"]["load"] = {"pass": False, "error": str(e)} return result # search_json 테스트 try: results = search_json(data, "guard height") result["tests"]["search_json"] = { "pass": len(results) > 0, "result": f"{len(results)} results found" } except Exception as e: result["tests"]["search_json"] = {"pass": False, "error": str(e)} result["all_pass"] = False # extract_table 테스트 tables = data.get("tables", []) if tables: try: table = extract_table(data, tables[0].get("id", "")) has_markdown = bool(table.get("markdown")) result["tests"]["extract_table"] = { "pass": has_markdown, "result": f"markdown {'found' if has_markdown else 'missing'}" } except Exception as e: result["tests"]["extract_table"] = {"pass": False, "error": str(e)} result["all_pass"] = False # extract_section 테스트 (PDF 있을 때만) if pdf_path and Path(pdf_path).exists(): sections = data.get("sections", []) if sections: test_section = sections[0] try: text = extract_section(pdf_path, test_section.get("page", 1), test_section.get("id", "")) has_text = bool(text and "Error" not in text) result["tests"]["extract_section"] = { "pass": has_text, "result": f"extracted {len(text) if text else 0} chars" } except Exception as e: result["tests"]["extract_section"] = {"pass": False, "error": str(e)} result["all_pass"] = False return result # ========== 6. 리포트 생성 ========== def generate_report(verification_result: dict, format: str = "markdown") -> str: """ 검증 결과를 Markdown 리포트로 변환 """ if format == "json": return json.dumps(verification_result, indent=2, ensure_ascii=False) summary = verification_result.get("summary", {}) details = verification_result.get("details", []) recommendations = verification_result.get("recommendations", []) lines = [ "# GPT 응답 검증 리포트\n", "## 요약\n", "| 항목 | 결과 |", "|------|------|", f"| 총 참조 수 | {summary.get('total_references', 0)} |", f"| 검증 통과 | {summary.get('passed', 0)} |", f"| 문제 발견 | {summary.get('issues', 0)} |", f"| 통과율 | {summary.get('pass_rate', 0):.0%} |", "\n---\n", "## 상세 결과\n" ] for i, detail in enumerate(details, 1): status_emoji = {"PASS": "\u2705", "PARTIAL": "\u26a0\ufe0f", "FAIL": "\u274c"}.get(detail.get("status"), "\u2753") lines.append(f"### {i}. {detail.get('code', '')} {detail.get('type', '').title()} {detail.get('id', '')}") lines.append(f"**{detail.get('title', '')}**\n") lines.append("| 검증 항목 | 결과 | 상세 |") lines.append("|----------|------|------|") checks = detail.get("checks", {}) # 존재 확인 if "exists" in checks: c = checks["exists"] emoji = "\u2705" if c.get("pass") else "\u274c" extra = "" if not c.get("pass") and c.get("similar_ids"): extra = f" (유사: {', '.join(c['similar_ids'][:2])})" lines.append(f"| 존재 확인 | {emoji} | {'JSON 확인' if c.get('pass') else '찾을 수 없음'}{extra} |") # 페이지 확인 if "page" in checks: c = checks["page"] emoji = "\u2705" if c.get("pass") else "\u26a0\ufe0f" if c.get("pass"): lines.append(f"| 페이지 | {emoji} | {c.get('json_page', '?')} |") else: lines.append(f"| 페이지 | {emoji} | JSON: {c.get('json_page', '?')}, GPT: {c.get('claimed_page', '?')} |") # 텍스트 비교 if "quote" in checks: c = checks["quote"] emoji = "\u2705" if c.get("pass") else "\u26a0\ufe0f" sim = c.get("similarity", 0) lines.append(f"| 텍스트 비교 | {emoji} | 유사도: {sim:.0%} |") lines.append(f"\n**상태:** {status_emoji} {detail.get('status', 'UNKNOWN')}\n") # 권장 조치 if recommendations: lines.append("---\n") lines.append("## 권장 조치\n") for i, rec in enumerate(recommendations, 1): lines.append(f"{i}. {rec}") lines.append("\n---\n") lines.append("*Generated by gpt-verify skill*") return '\n'.join(lines) # ========== 7. 유틸리티 ========== # Code → JSON 파일 매핑 CODE_JSON_MAP = { "NBC": "NBC2025.json", "NFC": "NFC2025.json", "NPC": "NPC2025.json", "NECB": "NECB2025.json", "OBC": "OBC_Vol1.json", # 기본값 Vol1 "OFC": "OFC.json", "BCBC": "BCBC2024.json", "ABC": "ABC2023.json", "QCC": "QCC2020.json", "QECB": "QECB2020.json", "QPC": "QPC2020.json", "QSC": "QSC2020.json", } # Code → PDF 파일 매핑 CODE_PDF_MAP = { "NBC": "NBC2025p1.pdf", "NFC": "NFC2025p1.pdf", "NPC": "NPC2025p1.pdf", "NECB": "NECB2025p1.pdf", "OBC": "obc volume 1.pdf", "OFC": "OFC_2024.pdf", "BCBC": "bcbc_2024_web_version_20240409.pdf", "ABC": "2023NBCAE-V1_National_Building_Code2023_Alberta_Edition.pdf", } def get_code_json_path(code_name: str, maps_dir: str) -> str: """코드명에서 JSON 파일 경로 반환""" code_upper = code_name.upper() json_file = CODE_JSON_MAP.get(code_upper) if not json_file: return None path = Path(maps_dir) / json_file return str(path) if path.exists() else None def get_pdf_path(code_name: str, json_data: dict, sources_dir: str) -> str: """코드명에서 PDF 파일 경로 반환""" code_upper = code_name.upper() # JSON에서 source_pdf 확인 source_pdf = json_data.get("source_pdf", {}).get("filename") if source_pdf: path = Path(sources_dir) / source_pdf if path.exists(): return str(path) # 매핑 테이블에서 찾기 pdf_file = CODE_PDF_MAP.get(code_upper) if pdf_file: path = Path(sources_dir) / pdf_file if path.exists(): return str(path) return None # ========== CLI 실행 ========== if __name__ == "__main__": # 간단한 테스트 test_text = """ **NBC Section 4.1.3.2** - Strength and Stability (p.452) > "A building and its structural components shall be designed to have sufficient structural capacity." **NBC Table 4.1.5.3** - Live Loads on Floors (p.460) """ print("=== Parse Test ===") refs = parse_gpt_response(test_text) for r in refs: print(f" {r['type']}: {r['code']} {r['id']} (p.{r['page_claimed']})") print("\n=== JSON Validation Test ===") import sys if len(sys.argv) > 1: json_path = sys.argv[1] result = validate_json_structure(json_path) print(f" Valid: {result['valid']}") print(f" Sections: {result['stats'].get('sections', 0)}") print(f" Tables: {result['stats'].get('tables', 0)}")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DavidCho1999/Canada-AEC-Code-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

verifier.py•23 KiB