Skip to main content
Glama
brockwebb

Open Census MCP Server

by brockwebb
build_geo_reference_tables.py11.4 kB
#!/usr/bin/env python3 """ Build official geographic reference tables from Census sources. Creates clean CSV files for hierarchical database building. Input: source-docs/geographic-reference/*.txt Output: knowledge-base/geo-reference-data/*.csv """ import csv from pathlib import Path import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def build_states_table(source_dir: Path, output_path: Path): """Build states.csv from national_state2020.txt + region mappings.""" # Official Census Region/Division mappings from hierarchy docs region_mappings = { '01': ('3', '6'), # Alabama -> South, East South Central '02': ('4', '9'), # Alaska -> West, Pacific '04': ('4', '8'), # Arizona -> West, Mountain '05': ('3', '7'), # Arkansas -> South, West South Central '06': ('4', '9'), # California -> West, Pacific '08': ('4', '8'), # Colorado -> West, Mountain '09': ('1', '1'), # Connecticut -> Northeast, New England '10': ('3', '5'), # Delaware -> South, South Atlantic '11': ('3', '5'), # DC -> South, South Atlantic '12': ('3', '5'), # Florida -> South, South Atlantic '13': ('3', '5'), # Georgia -> South, South Atlantic '15': ('4', '9'), # Hawaii -> West, Pacific '16': ('4', '8'), # Idaho -> West, Mountain '17': ('2', '3'), # Illinois -> Midwest, East North Central '18': ('2', '3'), # Indiana -> Midwest, East North Central '19': ('2', '4'), # Iowa -> Midwest, West North Central '20': ('2', '4'), # Kansas -> Midwest, West North Central '21': ('3', '6'), # Kentucky -> South, East South Central '22': ('3', '7'), # Louisiana -> South, West South Central '23': ('1', '1'), # Maine -> Northeast, New England '24': ('3', '5'), # Maryland -> South, South Atlantic '25': ('1', '1'), # Massachusetts -> Northeast, New England '26': ('2', '3'), # Michigan -> Midwest, East North Central '27': ('2', '4'), # Minnesota -> Midwest, West North Central '28': ('3', '6'), # Mississippi -> South, East South Central '29': ('2', '4'), # Missouri -> Midwest, West North Central '30': ('4', '8'), # Montana -> West, Mountain '31': ('2', '4'), # Nebraska -> Midwest, West North Central '32': ('4', '8'), # Nevada -> West, Mountain '33': ('1', '1'), # New Hampshire -> Northeast, New England '34': ('1', '2'), # New Jersey -> Northeast, Middle Atlantic '35': ('4', '8'), # New Mexico -> West, Mountain '36': ('1', '2'), # New York -> Northeast, Middle Atlantic '37': ('3', '5'), # North Carolina -> South, South Atlantic '38': ('2', '4'), # North Dakota -> Midwest, West North Central '39': ('2', '3'), # Ohio -> Midwest, East North Central '40': ('3', '7'), # Oklahoma -> South, West South Central '41': ('4', '9'), # Oregon -> West, Pacific '42': ('1', '2'), # Pennsylvania -> Northeast, Middle Atlantic '44': ('1', '1'), # Rhode Island -> Northeast, New England '45': ('3', '5'), # South Carolina -> South, South Atlantic '46': ('2', '4'), # South Dakota -> Midwest, West North Central '47': ('3', '6'), # Tennessee -> South, East South Central '48': ('3', '7'), # Texas -> South, West South Central '49': ('4', '8'), # Utah -> West, Mountain '50': ('1', '1'), # Vermont -> Northeast, New England '51': ('3', '5'), # Virginia -> South, South Atlantic '53': ('4', '9'), # Washington -> West, Pacific '54': ('3', '5'), # West Virginia -> South, South Atlantic '55': ('2', '3'), # Wisconsin -> Midwest, East North Central '56': ('4', '8'), # Wyoming -> West, Mountain # Territories (assign to closest region) '60': ('4', '9'), # American Samoa -> West, Pacific '66': ('4', '9'), # Guam -> West, Pacific '69': ('4', '9'), # Northern Mariana Islands -> West, Pacific '72': ('3', '5'), # Puerto Rico -> South, South Atlantic '74': ('4', '9'), # Minor Outlying Islands -> West, Pacific '78': ('3', '5'), # Virgin Islands -> South, South Atlantic } # Read from national_state2020.txt state_file = source_dir / "national_state2020.txt" if not state_file.exists(): raise FileNotFoundError(f"State file not found: {state_file}") logger.info(f"Reading states from {state_file}") with open(state_file, 'r') as f: reader = csv.DictReader(f, delimiter='|') states_data = [] for row in reader: state_fips = row['STATEFP'] state_abbrev = row['STATE'] state_name = row['STATE_NAME'] # Get region/division from mapping region_code, division_code = region_mappings.get(state_fips, ('9', '9')) states_data.append([ state_fips, state_abbrev, state_name, region_code, division_code ]) # Write states.csv with open(output_path, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['state_fips', 'state_abbrev', 'state_name', 'region_code', 'division_code']) writer.writerows(states_data) logger.info(f"✅ Created {output_path} with {len(states_data)} states") def build_regions_divisions_table(output_path: Path): """Build regions-divisions.csv from official Census hierarchy.""" regions_data = [ # region_code, region_name, division_code, division_name ('1', 'Northeast', '1', 'New England'), ('1', 'Northeast', '2', 'Middle Atlantic'), ('2', 'Midwest', '3', 'East North Central'), ('2', 'Midwest', '4', 'West North Central'), ('3', 'South', '5', 'South Atlantic'), ('3', 'South', '6', 'East South Central'), ('3', 'South', '7', 'West South Central'), ('4', 'West', '8', 'Mountain'), ('4', 'West', '9', 'Pacific'), ] with open(output_path, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['region_code', 'region_name', 'division_code', 'division_name']) writer.writerows(regions_data) logger.info(f"✅ Created {output_path} with {len(regions_data)} region/division mappings") def build_summary_levels_table(output_path: Path): """Build summary-levels.csv with official Census geography codes.""" # Official Census Summary Level codes from hierarchy docs summary_levels = [ # summary_level, geography_type, description ('010', 'nation', 'United States'), ('020', 'region', 'Census Region'), ('030', 'division', 'Census Division'), ('040', 'state', 'State'), ('050', 'county', 'County'), ('060', 'county_subdivision', 'County Subdivision'), ('140', 'census_tract', 'Census Tract'), ('150', 'block_group', 'Block Group'), ('160', 'place', 'Incorporated Place'), ('170', 'consolidated_city', 'Consolidated City'), ('230', 'aiannh', 'American Indian Area/Alaska Native Area/Hawaiian Home Land'), ('250', 'aiannh_onsv', 'American Indian Area/Alaska Native Area (Reservation or Statistical Entity Only)'), ('310', 'cbsa', 'Core Based Statistical Area'), ('330', 'csa', 'Combined Statistical Area'), ('350', 'necta', 'New England City and Town Area'), ('400', 'ua', 'Urban Area'), ('500', 'congressional_district', 'Congressional District'), ('610', 'state_senate', 'State Legislative District (Upper Chamber)'), ('620', 'state_house', 'State Legislative District (Lower Chamber)'), ('860', 'zcta', 'ZIP Code Tabulation Area'), ('950', 'school_district_elementary', 'School District (Elementary)'), ('960', 'school_district_secondary', 'School District (Secondary)'), ('970', 'school_district_unified', 'School District (Unified)'), ] with open(output_path, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['summary_level', 'geography_type', 'description']) writer.writerows(summary_levels) logger.info(f"✅ Created {output_path} with {len(summary_levels)} summary levels") def build_aiannh_reference_table(source_dir: Path, output_path: Path): """Build aiannh-areas.csv from national tribal area files.""" # Read national_aiannh2020.txt aiannh_file = source_dir / "national_aiannh2020.txt" if not aiannh_file.exists(): raise FileNotFoundError(f"AIANNH file not found: {aiannh_file}") logger.info(f"Reading AIANNH areas from {aiannh_file}") with open(aiannh_file, 'r') as f: reader = csv.DictReader(f, delimiter='|') aiannh_data = [] for row in reader: aiannh_ce = row['AIANNHCE'] aiannh_ns = row['AIANNHNS'] aiannh_name = row['AIANNHNAME'] states = row['STATES'] # Determine area type from code pattern if aiannh_ce.startswith('0'): area_type = 'reservation' elif aiannh_ce.startswith('5'): area_type = 'hawaiian_home_land' elif aiannh_ce.startswith('6') or aiannh_ce.startswith('7'): area_type = 'alaska_native_village' elif aiannh_ce.startswith('8'): area_type = 'tribal_designated_statistical_area' elif aiannh_ce.startswith('9'): area_type = 'state_designated_tribal_statistical_area' else: area_type = 'other' aiannh_data.append([ aiannh_ce, aiannh_ns, aiannh_name, area_type, states ]) # Write aiannh-areas.csv with open(output_path, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['aiannh_ce', 'aiannh_ns', 'aiannh_name', 'area_type', 'states']) writer.writerows(aiannh_data) logger.info(f"✅ Created {output_path} with {len(aiannh_data)} tribal areas") def main(): """Build all geographic reference tables.""" # Paths relative to knowledge-base/scripts/ location script_dir = Path(__file__).parent kb_dir = script_dir.parent source_dir = kb_dir / "source-docs/geographic-reference" output_dir = kb_dir / "geo-reference-data" # Validate source directory exists if not source_dir.exists(): raise FileNotFoundError(f"Source directory not found: {source_dir}") # Create output directory output_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Source: {source_dir}") logger.info(f"Output: {output_dir}") # Build reference tables build_states_table(source_dir, output_dir / "states.csv") build_regions_divisions_table(output_dir / "regions-divisions.csv") build_summary_levels_table(output_dir / "summary-levels.csv") build_aiannh_reference_table(source_dir, output_dir / "aiannh-areas.csv") logger.info("🎉 All geographic reference tables built successfully!") logger.info(f"Reference data: {output_dir}") logger.info("Ready for hierarchical database build.") if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brockwebb/open-census-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server