Skip to main content
Glama
brockwebb

Open Census MCP Server

by brockwebb
geoadvisor.py12 kB
# mcp/geoadvisor.py from pathlib import Path import json from typing import Dict, List, Optional, Union class GeoAdvisor: """ Three-layer geographic suitability engine for Census data. Layer 1: Table availability (deterministic - can we?) Layer 2: Semantic heterogeneity hints (should we? - based on spatial variance) Layer 3: Statistical validation (MOE checks - is it reliable?) """ def __init__(self, table_geo_file: str = "data/table_geos.json", geo_weight_file: Optional[str] = None, moe_resolver=None): """ Initialize GeoAdvisor with data files. Args: table_geo_file: Path to table→geography mapping JSON geo_weight_file: Path to variable→geography weight mapping JSON (optional) moe_resolver: Function for real-time MOE checking (optional) """ self.table_geo_file = table_geo_file self.geo_weight_file = geo_weight_file self.moe_resolver = moe_resolver # Load table availability data (Layer 1) try: with open(table_geo_file, 'r') as f: self.table_geo = json.load(f) print(f"✅ Loaded {len(self.table_geo)} table geography mappings") except FileNotFoundError: raise FileNotFoundError(f"Table geography file not found: {table_geo_file}") except json.JSONDecodeError: raise ValueError(f"Invalid JSON in table geography file: {table_geo_file}") # Load semantic geography weights (Layer 2) - optional self.geo_weights = {} if geo_weight_file: try: with open(geo_weight_file, 'r') as f: self.geo_weights = json.load(f) print(f"✅ Loaded {len(self.geo_weights)} variable geography weights") except FileNotFoundError: print(f"⚠️ Geography weight file not found: {geo_weight_file}") except json.JSONDecodeError: print(f"⚠️ Invalid JSON in geography weight file: {geo_weight_file}") # Geographic hierarchy for suggestions (ordered by specificity) self.geo_hierarchy = [ 'us', 'state', 'county', 'place', 'tract', 'block_group' ] # Geography display names self.geo_display_names = { 'us': 'National', 'state': 'State', 'county': 'County', 'place': 'Place', 'tract': 'Census Tract', 'block_group': 'Block Group', 'msa': 'Metropolitan Statistical Area', 'cd': 'Congressional District', 'zcta': 'ZIP Code Tabulation Area', 'puma': 'Public Use Microdata Area' } def extract_table_id(self, variable_id: str) -> str: """Extract table ID from variable ID (e.g., B01001_001E → B01001).""" if '_' in variable_id: return variable_id.split('_')[0] return variable_id[:6] if len(variable_id) >= 6 else variable_id def is_available(self, variable_id: str, geo_level: str) -> bool: """ Layer 1: Check if variable is available at geographic level. Args: variable_id: Census variable ID (e.g., B01001_001E) geo_level: Geographic level (e.g., 'tract', 'county') Returns: True if variable is published at this geographic level """ table_id = self.extract_table_id(variable_id) available_levels = self.table_geo.get(table_id, []) return geo_level in available_levels def get_available_levels(self, variable_id: str) -> List[str]: """Get all geographic levels where variable is available.""" table_id = self.extract_table_id(variable_id) return self.table_geo.get(table_id, []) def suggest_coarser_levels(self, variable_id: str, current_level: str) -> List[str]: """ Suggest coarser geographic levels that are available. Args: variable_id: Census variable ID current_level: Current geographic level that failed Returns: List of coarser levels available for this variable """ available = self.get_available_levels(variable_id) # Find coarser levels (earlier in hierarchy) try: current_idx = self.geo_hierarchy.index(current_level) coarser_available = [ level for level in self.geo_hierarchy[:current_idx] if level in available ] return coarser_available except ValueError: # Current level not in standard hierarchy, return all available return available def get_geographic_hint(self, variable_id: str) -> Optional[Dict]: """ Layer 2: Get semantic geographic hint for variable. Returns semantic analysis of how spatially variable this phenomenon is. High weights suggest more local variation, low weights suggest more uniformity. """ if not self.geo_weights: return None weight = self.geo_weights.get(variable_id) if weight is None: return None # Interpret weight levels (based on your sample analysis) if weight < 0.30: category = "nationally_stable" guidance = "Low spatial variation - county or state level may be sufficient" elif weight < 0.38: category = "regionally_variable" guidance = "Moderate spatial variation - county level recommended" else: category = "locally_clustered" guidance = "High spatial variation - tract or block group recommended" return { "geography_weight": weight, "spatial_category": category, "guidance": guidance } def check_moe_reliability(self, variable_id: str, geo_level: str, geoid: str) -> Optional[Dict]: """ Layer 3: Check margin of error for statistical reliability. Args: variable_id: Census variable ID geo_level: Geographic level geoid: Specific geographic identifier Returns: MOE analysis or None if MOE resolver not available """ if not self.moe_resolver: return None try: moe_ratio = self.moe_resolver(variable_id, geo_level, geoid) if moe_ratio is None: return None # Classify reliability if moe_ratio < 0.10: reliability = "high" guidance = "Reliable estimate" elif moe_ratio < 0.30: reliability = "medium" guidance = "Use with caution" else: reliability = "low" guidance = "Consider aggregating to larger geography" return { "moe_ratio": moe_ratio, "reliability": reliability, "guidance": guidance } except Exception as e: return {"error": f"MOE check failed: {str(e)}"} def recommend(self, variable_id: str, geo_level: str, geoid: Optional[str] = None) -> Dict: """ Main recommendation engine - combines all three layers. Args: variable_id: Census variable ID (e.g., B01001_001E) geo_level: Requested geographic level (e.g., 'tract') geoid: Specific geographic ID for MOE checking (optional) Returns: Recommendation dictionary with status, message, and suggestions """ result = { "variable_id": variable_id, "requested_level": geo_level, "table_id": self.extract_table_id(variable_id) } # Layer 1: Availability check if not self.is_available(variable_id, geo_level): coarser_levels = self.suggest_coarser_levels(variable_id, geo_level) result.update({ "status": "blocked", "reason": "not_published", "message": f"Variable {variable_id} not published at {self.geo_display_names.get(geo_level, geo_level)} level", "available_levels": coarser_levels, "suggestion": f"Try {', '.join(coarser_levels)} instead" if coarser_levels else "No alternative levels available" }) return result # Layer 3: MOE reliability check (if geoid provided) moe_analysis = None if geoid: moe_analysis = self.check_moe_reliability(variable_id, geo_level, geoid) if moe_analysis and moe_analysis.get("reliability") == "low": coarser_levels = self.suggest_coarser_levels(variable_id, geo_level) result.update({ "status": "warning", "reason": "high_moe", "message": f"High sampling error (MOE/estimate = {moe_analysis['moe_ratio']:.1%})", "moe_analysis": moe_analysis, "available_levels": coarser_levels, "suggestion": f"Consider using {coarser_levels[0] if coarser_levels else 'coarser geography'} for more reliable estimates" }) return result # Layer 2: Semantic geographic hint geo_hint = self.get_geographic_hint(variable_id) # Success case result.update({ "status": "ok", "message": f"Variable available at {self.geo_display_names.get(geo_level, geo_level)} level", "geographic_hint": geo_hint, "moe_analysis": moe_analysis }) # Add contextual guidance if geo_hint: if geo_hint["spatial_category"] == "locally_clustered" and geo_level in ['county', 'state']: result["message"] += f" | Note: {geo_hint['guidance']}" elif geo_hint["spatial_category"] == "nationally_stable" and geo_level in ['tract', 'block_group']: result["message"] += f" | Note: {geo_hint['guidance']}" return result def batch_recommend(self, requests: List[Dict]) -> List[Dict]: """ Process multiple recommendation requests efficiently. Args: requests: List of dicts with 'variable_id', 'geo_level', 'geoid' (optional) Returns: List of recommendation results """ results = [] for req in requests: variable_id = req.get('variable_id') geo_level = req.get('geo_level') geoid = req.get('geoid') if not variable_id or not geo_level: results.append({ "error": "Missing required fields: variable_id, geo_level", "request": req }) continue recommendation = self.recommend(variable_id, geo_level, geoid) results.append(recommendation) return results def get_stats(self) -> Dict: """Get statistics about loaded data.""" stats = { "tables_loaded": len(self.table_geo), "variables_with_weights": len(self.geo_weights), "moe_resolver_available": self.moe_resolver is not None, "supported_geographies": list(self.geo_display_names.keys()) } # Geography distribution geo_counts = {} for table_id, levels in self.table_geo.items(): for level in levels: geo_counts[level] = geo_counts.get(level, 0) + 1 stats["geography_distribution"] = geo_counts return stats

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brockwebb/open-census-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server