Open Census MCP Server

verify_registry_counts.py•12.5 KiB

#!/usr/bin/env python3 """ Verify numbers cited in paper/numbers_registry.md V&V artifact: every number traced to source with reproducible computation. SRS: Supports Section 8.9 V&V Registry Usage: python -m src.eval.verify_registry_counts """ import json import yaml import sqlite3 import os from pathlib import Path from datetime import datetime def main(): base = Path(__file__).parent.parent.parent # repo root results = {} # === SD-001: Query count === with open(base / 'src/eval/battery/queries.yaml') as f: battery = yaml.safe_load(f) queries = battery if isinstance(battery, list) else battery.get('queries', battery) if isinstance(queries, dict): query_count = len(queries) categories = {} for qid, q in queries.items(): cat = q.get('category', 'unknown') categories[cat] = categories.get(cat, 0) + 1 else: query_count = len(queries) categories = {} for q in queries: cat = q.get('category', 'unknown') categories[cat] = categories.get(cat, 0) + 1 results['SD-001'] = {'value': query_count, 'source': 'src/eval/battery/queries.yaml'} # === SD-009: Normal/edge split === normal_count = categories.get('normal', 0) edge_count = query_count - normal_count results['SD-009'] = { 'normal': normal_count, 'edge': edge_count, 'normal_pct': f'{normal_count/query_count*100:.1f}%', 'edge_pct': f'{edge_count/query_count*100:.1f}%', 'categories': categories, 'source': 'src/eval/battery/queries.yaml' } # === SD-006: Stage 2 record count === stage2_dir = base / 'results/v2_redo/stage2' stage2_total = 0 stage2_parse_fails = 0 stage2_per_file = {} for jsonl_file in sorted(stage2_dir.glob('*.jsonl')): with open(jsonl_file) as f: records = [json.loads(line) for line in f if line.strip()] count = len(records) fails = sum(1 for r in records if r.get('preference') == 'parse_failed') stage2_total += count stage2_parse_fails += fails stage2_per_file[jsonl_file.name] = {'records': count, 'parse_failures': fails} results['SD-006'] = { 'value': stage2_total, 'parse_failures': stage2_parse_fails, 'per_file': stage2_per_file, 'source': 'results/v2_redo/stage2/*.jsonl' } # === SD-007: Records per comparison === results['SD-007'] = { 'value': stage2_total // 3 if stage2_total % 3 == 0 else 'UNEVEN', 'expected': query_count * 6, # 39 queries × 6 passes 'source': 'derived from SD-006' } # === PL-001: Compiled pragmatic items (ACS pack) === acs_db = base / 'packs/acs.db' conn = sqlite3.connect(str(acs_db)) cur = conn.cursor() cur.execute('SELECT COUNT(*) FROM context') acs_context_count = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM threads') acs_thread_count = cur.fetchone()[0] cur.execute('SELECT pack_id, version FROM packs') pack_info = cur.fetchone() conn.close() results['PL-001'] = { 'context_items': acs_context_count, 'threads': acs_thread_count, 'pack_version': pack_info[1] if pack_info else 'unknown', 'source': 'packs/acs.db (context table)' } # Also check census and general packs pack_totals = {'acs': acs_context_count} for pack_name, pack_file in [('census', 'census.db'), ('general', 'general_statistics.db')]: db_path = base / f'packs/{pack_file}' if db_path.exists(): conn = sqlite3.connect(str(db_path)) cur = conn.cursor() cur.execute('SELECT COUNT(*) FROM context') pack_totals[pack_name] = cur.fetchone()[0] conn.close() results['PL-001_inheritance'] = { 'per_pack': pack_totals, 'total_with_inheritance': sum(pack_totals.values()), 'note': 'ACS inherits from census inherits from general (FR-PC-005)' } # === PL-002: Staged pragmatic items === staging_dir = base / 'staging/acs' staged_total = 0 staged_per_file = {} for json_file in sorted(staging_dir.glob('*.json')): if json_file.name in ('manifest.json', '.gitkeep'): continue with open(json_file) as f: data = json.load(f) if isinstance(data, list): count = len(data) elif isinstance(data, dict) and 'contexts' in data: count = len(data['contexts']) else: count = 1 # single item staged_total += count staged_per_file[json_file.name] = count results['PL-002'] = { 'value': staged_total, 'per_file': staged_per_file, 'source': 'staging/acs/*.json (excluding manifest.json)' } # === PL-004: Grounding compliance === # Check Stage 1 pragmatics responses for methodology_guidance calls prag_file = base / 'results/v2_redo/stage1/pragmatics_responses_20260216_074817.jsonl' if prag_file.exists(): with open(prag_file) as f: prag_records = [json.loads(line) for line in f if line.strip()] grounded = 0 for rec in prag_records: tool_calls = rec.get('tool_calls', []) has_methodology = any( tc.get('tool_name', tc.get('name', '')) == 'get_methodology_guidance' for tc in tool_calls ) if has_methodology: grounded += 1 results['PL-004'] = { 'grounded': grounded, 'total': len(prag_records), 'compliance': f'{grounded}/{len(prag_records)}', 'pct': f'{grounded/len(prag_records)*100:.1f}%' if prag_records else 'N/A', 'source': 'results/v2_redo/stage1/pragmatics_responses_20260216_074817.jsonl' } else: results['PL-004'] = {'error': f'File not found: {prag_file}'} # Also check control and RAG for grounding compliance for cond, fname in [('control', 'control_responses_20260216_055354.jsonl'), ('rag', 'rag_responses_20260216_055354.jsonl')]: cond_file = base / f'results/v2_redo/stage1/{fname}' if cond_file.exists(): with open(cond_file) as f: cond_records = [json.loads(line) for line in f if line.strip()] grounded = sum(1 for rec in cond_records if any(tc.get('tool_name', tc.get('name', '')) == 'get_methodology_guidance' for tc in rec.get('tool_calls', []))) results[f'PL-004_{cond}'] = { 'grounded': grounded, 'total': len(cond_records), 'compliance': f'{grounded}/{len(cond_records)}' } # === GAP-008: Bootstrap CI parameters === config_file = base / 'src/eval/judge_config.yaml' with open(config_file) as f: config = yaml.safe_load(f) analysis_config = config.get('analysis', {}) results['GAP-008'] = { 'bootstrap_iterations': analysis_config.get('bootstrap_iterations', 'NOT FOUND'), 'bootstrap_seed': analysis_config.get('bootstrap_seed', 'NOT FOUND'), 'source': 'src/eval/judge_config.yaml (analysis section)' } # === GAP-009: RAG index parameters === rag_index_dir = base / 'results/rag_ablation/index' if rag_index_dir.exists(): rag_files = list(rag_index_dir.iterdir()) results['GAP-009'] = { 'index_files': [f.name for f in rag_files], 'source': 'results/rag_ablation/index/' } # Try to read metadata if exists for f in rag_files: if f.suffix == '.json' and 'meta' in f.name.lower(): with open(f) as fh: results['GAP-009']['metadata'] = json.load(fh) else: results['GAP-009'] = {'error': 'results/rag_ablation/index/ not found (V1 legacy path)'} # === OUTPUT REPORT === timestamp = datetime.now().isoformat() sd001_status = 'PASS' if results['SD-001']['value'] == 39 else 'FAIL' sd006_status = 'PASS' if results['SD-006']['value'] == 2106 else f"DISCREPANCY (got {results['SD-006']['value']})" sd007_val = results['SD-007']['value'] sd007_status = 'PASS' if sd007_val == 702 else f"DISCREPANCY (got {sd007_val})" sd009_normal_frac = results['SD-009']['normal'] / query_count sd009_status = 'PASS' if abs(sd009_normal_frac - 0.41) < 0.02 else 'DISCREPANCY' report = f"""# Numbers Registry Verification Report **Generated:** {timestamp} **Script:** src/eval/verify_registry_counts.py **Reproduce:** `python -m src.eval.verify_registry_counts` ## Study Design Parameters | ID | Claimed | Verified | Source | Status | |----|---------|----------|--------|--------| | SD-001 | 39 queries | {results['SD-001']['value']} | {results['SD-001']['source']} | {sd001_status} | | SD-006 | 2,106 records | {results['SD-006']['value']} | {results['SD-006']['source']} | {sd006_status} | | SD-007 | 702/comparison | {sd007_val} (expected {results['SD-007']['expected']}) | {results['SD-007']['source']} | {sd007_status} | | SD-009 | 41%/59% | {results['SD-009']['normal_pct']}/{results['SD-009']['edge_pct']} | {results['SD-009']['source']} | {sd009_status} | ### SD-009 Category Breakdown """ for cat, count in sorted(results['SD-009']['categories'].items()): report += f"- {cat}: {count}\n" report += f""" ### SD-006 Per-File Breakdown """ for fname, info in results['SD-006']['per_file'].items(): report += f"- {fname}: {info['records']} records ({info['parse_failures']} parse failures)\n" pl001_status = 'PASS' if results['PL-001']['context_items'] == 36 else f"NOTE: expected 36, got {results['PL-001']['context_items']}" report += f""" ## Pragmatics Layer | ID | Claimed | Verified | Source | Status | |----|---------|----------|--------|--------| | PL-001 | 36 items | {results['PL-001']['context_items']} context, {results['PL-001']['threads']} threads | {results['PL-001']['source']} | {pl001_status} | | PL-002 | 47 staged | {results['PL-002']['value']} | {results['PL-002']['source']} | {'PASS' if results['PL-002']['value'] == 47 else f"DISCREPANCY (got {results['PL-002']['value']})"} | """ if 'PL-004' in results and 'error' not in results['PL-004']: pl004_status = 'PASS' if results['PL-004']['grounded'] == results['PL-004']['total'] else 'FAIL' report += f"| PL-004 | 39/39 (100%) | {results['PL-004']['compliance']} ({results['PL-004']['pct']}) | {results['PL-004']['source']} | {pl004_status} |\n" report += f""" ### PL-001 Pack Inheritance """ for pack, count in results['PL-001_inheritance']['per_pack'].items(): report += f"- {pack}: {count} items\n" report += f"- Total with inheritance: {results['PL-001_inheritance']['total_with_inheritance']}\n" report += f""" ### PL-002 Staged Items Per File """ for fname, count in results['PL-002']['per_file'].items(): report += f"- {fname}: {count}\n" if 'PL-004' in results and 'error' not in results['PL-004']: report += f""" ### PL-004 Grounding Compliance Per Condition - Pragmatics: {results['PL-004']['compliance']} """ for cond in ['control', 'rag']: key = f'PL-004_{cond}' if key in results: report += f"- {cond.title()}: {results[key]['compliance']}\n" report += f""" ## Config Parameters | Parameter | Value | Source | |-----------|-------|--------| | Bootstrap iterations | {results['GAP-008']['bootstrap_iterations']} | {results['GAP-008']['source']} | | Bootstrap seed | {results['GAP-008']['bootstrap_seed']} | {results['GAP-008']['source']} | """ if 'error' in results.get('GAP-009', {}): report += f"\n## GAP-009: RAG Index\n\n{results['GAP-009']['error']}\n" elif 'GAP-009' in results: report += f"\n## GAP-009: RAG Index Files\n\n" for f in results['GAP-009']['index_files']: report += f"- {f}\n" if 'metadata' in results['GAP-009']: report += f"\nMetadata: {json.dumps(results['GAP-009']['metadata'], indent=2)}\n" # Write report output_dir = base / 'paper' output_dir.mkdir(exist_ok=True) report_path = output_dir / 'registry_verification_report.md' with open(report_path, 'w') as f: f.write(report) # Write JSON for programmatic consumption json_path = output_dir / 'registry_verification.json' with open(json_path, 'w') as f: json.dump({'timestamp': timestamp, 'results': results}, f, indent=2, default=str) print(report) print(f"\nReport written to: {report_path}") print(f"JSON written to: {json_path}") if __name__ == '__main__': main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brockwebb/open-census-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

verify_registry_counts.py•12.5 KiB