Skip to main content
Glama
brockwebb

Open Census MCP Server

by brockwebb
python_census_api.py15.6 kB
#!/usr/bin/env python3 """ Fixed Python Census API - Hybrid approach with Batch Query Support Accepts either: 1. Pre-constructed Census API URLs (for Claude's direct construction) 2. Parameters (for internal URL construction) NEW: Supports batch queries (county:*, place:*) returning multiple rows """ import os import json import logging import requests from typing import Dict, List, Any, Optional, Union from urllib.parse import urlparse, parse_qs logger = logging.getLogger(__name__) class PythonCensusAPI: """ Hybrid Python Census API client with batch query support. Can handle both pre-constructed URLs and parameter-based construction. Supports batch queries like county:* returning multiple geographies. """ def __init__(self): """Initialize Census API client.""" self.base_url = "https://api.census.gov/data/2023/acs/acs5" self.api_key = os.getenv("CENSUS_API_KEY") logger.info("✅ PythonCensusAPI initialized (hybrid mode)") def get_acs_data(self, url: str = None, variables: List[str] = None, geography_type: str = None, **geo_params) -> Dict[str, Any]: """ Get ACS data using either pre-constructed URL or parameters. Args: url: Pre-constructed Census API URL (takes priority) variables: List of variable IDs (for parameter mode) geography_type: Geography type (for parameter mode) **geo_params: FIPS codes (for parameter mode) Returns: Dict with data, location_name, source, error """ # Mode 1: Pre-constructed URL if url: return self._execute_census_url(url) # Mode 2: Parameter construction elif variables and geography_type: return self._execute_with_parameters(variables, geography_type, geo_params) else: return {"error": "Must provide either 'url' or 'variables + geography_type'"} def _execute_census_url(self, url: str) -> Dict[str, Any]: """Execute pre-constructed Census API URL.""" logger.info(f"🌐 Executing pre-constructed URL: {url[:100]}...") # Validate it's a Census API URL if not url.startswith("https://api.census.gov/"): return {"error": "Invalid Census API URL - must start with https://api.census.gov/"} # Add API key if available and not already present if self.api_key and "key=" not in url: separator = "&" if "?" in url else "?" url = f"{url}{separator}key={self.api_key}" try: response = requests.get(url, timeout=30) if response.status_code != 200: error_msg = f"Census API HTTP {response.status_code}: {response.text}" logger.error(error_msg) return {"error": error_msg} try: data = response.json() except json.JSONDecodeError as e: return {"error": f"Invalid JSON response: {e}"} if isinstance(data, list) and len(data) >= 2: # Normal Census API response: [headers, ...data_rows] return self._format_census_response(data, url) else: return {"error": f"Unexpected Census API response format: {data}"} except requests.RequestException as e: return {"error": f"Request failed: {e}"} def _execute_with_parameters(self, variables: List[str], geography_type: str, geo_params: Dict) -> Dict[str, Any]: """Execute using parameter-based URL construction.""" logger.info(f"📊 Constructing URL from parameters: {variables} for {geography_type}") # Build Census API parameters params = { "get": ",".join(variables) } if self.api_key: params["key"] = self.api_key # Build geography clauses if geography_type == "us": params["for"] = "us:1" elif geography_type == "state": state_fips = geo_params.get("state_fips") if not state_fips: return {"error": "State geography requires state_fips"} params["for"] = f"state:{state_fips}" elif geography_type == "county": state_fips = geo_params.get("state_fips") county_fips = geo_params.get("county_fips") if not state_fips or not county_fips: return {"error": "County geography requires state_fips and county_fips"} params["for"] = f"county:{county_fips}" params["in"] = f"state:{state_fips}" elif geography_type == "place": state_fips = geo_params.get("state_fips") place_fips = geo_params.get("place_fips") if not state_fips or not place_fips: return {"error": "Place geography requires state_fips and place_fips"} params["for"] = f"place:{place_fips}" params["in"] = f"state:{state_fips}" elif geography_type == "zcta": zcta_code = geo_params.get("zcta_code") if not zcta_code: return {"error": "ZCTA geography requires zcta_code"} params["for"] = f"zip code tabulation area:{zcta_code}" elif geography_type == "cbsa": cbsa_code = geo_params.get("cbsa_code") if not cbsa_code: return {"error": "CBSA geography requires cbsa_code"} params["for"] = f"metropolitan statistical area/micropolitan statistical area:{cbsa_code}" else: return {"error": f"Unsupported geography type: {geography_type}"} # Make the request try: logger.info(f"🌐 Census API request: {self.base_url} with params {params}") response = requests.get(self.base_url, params=params, timeout=30) if response.status_code != 200: return {"error": f"Census API HTTP {response.status_code}: {response.text}"} try: data = response.json() except json.JSONDecodeError as e: return {"error": f"Invalid JSON response: {e}"} if isinstance(data, list) and len(data) >= 2: return self._format_census_response(data, response.url) else: return {"error": f"Unexpected Census API response format: {data}"} except requests.RequestException as e: return {"error": f"Request failed: {e}"} def _format_census_response(self, data: List, url: str) -> Dict[str, Any]: """ Format Census API response into structured data. NEW: Handles both single and batch queries (multiple rows). """ headers = data[0] rows = data[1:] if not rows: return {"error": "No data returned from Census API"} # Detect batch query (multiple rows = batch, single row = specific geography) is_batch_query = len(rows) > 1 if is_batch_query: return self._format_batch_response(headers, rows, url) else: return self._format_single_response(headers, rows[0], url) def _format_single_response(self, headers: List, data_row: List, url: str) -> Dict[str, Any]: """Format single geography response (original behavior).""" # Map headers to values row_data = dict(zip(headers, data_row)) # Format the response formatted_data = {} notes = [] for header, value in row_data.items(): # Skip geographic identifiers (state, county, place codes) if header in ['state', 'county', 'place', 'zip code tabulation area', 'metropolitan statistical area/micropolitan statistical area']: continue # Handle null values if value is None or value == "null" or value == -666666666: formatted_data[header] = { "estimate": "Data not available", "moe": "N/A", "label": self._get_variable_label(header) } notes.append(f"{header}: Data suppressed or not available") else: # Convert to numeric if possible try: numeric_value = float(value) formatted_data[header] = { "estimate": int(numeric_value) if numeric_value.is_integer() else numeric_value, "moe": "See ACS documentation", "label": self._get_variable_label(header) } except (ValueError, TypeError): formatted_data[header] = { "estimate": value, "moe": "N/A", "label": self._get_variable_label(header) } # Build location name location_name = self._build_location_name(row_data) return { "data": formatted_data, "location_name": location_name, "notes": notes, "source": "U.S. Census Bureau, American Community Survey 2023 5-Year Estimates", "api_url": url, "raw_census_data": row_data } def _format_batch_response(self, headers: List, data_rows: List[List], url: str) -> Dict[str, Any]: """ Format batch query response (NEW: handles multiple geographies). Returns all geographies with their data, sorted by first variable value. """ batch_results = [] variable_columns = [] # Identify variable columns (non-geographic identifiers) for header in headers: if header not in ['state', 'county', 'place', 'zip code tabulation area', 'metropolitan statistical area/micropolitan statistical area', 'NAME']: variable_columns.append(header) for data_row in data_rows: row_data = dict(zip(headers, data_row)) # Format variables for this geography formatted_data = {} for header in variable_columns: value = row_data.get(header) if value is None or value == "null" or value == -666666666: formatted_data[header] = { "estimate": "Data not available", "moe": "N/A", "label": self._get_variable_label(header) } else: try: numeric_value = float(value) formatted_data[header] = { "estimate": int(numeric_value) if numeric_value.is_integer() else numeric_value, "moe": "See ACS documentation", "label": self._get_variable_label(header) } except (ValueError, TypeError): formatted_data[header] = { "estimate": value, "moe": "N/A", "label": self._get_variable_label(header) } # Build result for this geography geography_result = { "data": formatted_data, "location_name": self._build_location_name(row_data), "geography_codes": {k: v for k, v in row_data.items() if k in ['state', 'county', 'place']}, "sort_value": self._get_sort_value(formatted_data, variable_columns[0]) } batch_results.append(geography_result) # Sort by first variable value (descending) batch_results.sort(key=lambda x: x["sort_value"], reverse=True) # Remove sort_value from final results for result in batch_results: del result["sort_value"] return { "data": batch_results, "location_name": f"Batch query: {len(batch_results)} geographies", "source": "U.S. Census Bureau, American Community Survey 2023 5-Year Estimates", "api_url": url, "batch_query": True, "total_geographies": len(batch_results), "variables": variable_columns } def _get_sort_value(self, formatted_data: Dict, variable_id: str) -> float: """Get numeric sort value for ranking geographies.""" try: estimate = formatted_data.get(variable_id, {}).get("estimate", 0) return float(estimate) if estimate != "Data not available" else 0 except (ValueError, TypeError): return 0 def _get_variable_label(self, variable_id: str) -> str: """Get human-readable label for Census variable.""" # Common variable labels variable_labels = { "B01003_001E": "Total Population", "B19013_001E": "Median Household Income", "B25064_001E": "Median Gross Rent", "B17001_002E": "Income Below Poverty Level", "B23025_005E": "Unemployed Population", "B01002_001E": "Median Age", "B15003_022E": "Bachelor's Degree or Higher", "B25001_001E": "Total Housing Units" } return variable_labels.get(variable_id, variable_id) def _build_location_name(self, row_data: Dict) -> str: """Build human-readable location name from Census response.""" # Try to use the NAME field if available if "NAME" in row_data: return row_data["NAME"] # Fallback construction parts = [] if "place" in row_data: parts.append(f"Place {row_data['place']}") elif "county" in row_data: parts.append(f"County {row_data['county']}") if "state" in row_data: parts.append(f"State {row_data['state']}") return ", ".join(parts) if parts else "Unknown Location" # Utility methods for external use def construct_census_url(self, variables: List[str], geography_type: str, **geo_params) -> str: """Utility: Construct Census API URL from parameters (for Claude to use).""" params = {"get": ",".join(variables)} if self.api_key: params["key"] = self.api_key # Geography clauses (simplified for common cases) if geography_type == "place": state_fips = geo_params.get("state_fips") place_fips = geo_params.get("place_fips") params["for"] = f"place:{place_fips}" params["in"] = f"state:{state_fips}" elif geography_type == "state": state_fips = geo_params.get("state_fips") params["for"] = f"state:{state_fips}" elif geography_type == "us": params["for"] = "us:1" # Build URL param_string = "&".join([f"{k}={v}" for k, v in params.items()]) return f"{self.base_url}?{param_string}"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brockwebb/open-census-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server