Skip to main content
Glama

FedMCP - Federal Parliamentary Information

map_politician_ids.py8.15 kB
#!/usr/bin/env python3 """ Map OpenParliament politician IDs to Neo4j MP nodes. This script: 1. Queries all MPs from Neo4j (id and slug) 2. Queries all politicians from PostgreSQL (id and slug) 3. Matches by slug and adds openparliament_politician_id property to MP nodes 4. Reports statistics on matching success """ import os from pathlib import Path from dotenv import load_dotenv # Add parent directory to path for imports import sys sys.path.insert(0, str(Path(__file__).parent / "packages" / "data-pipeline")) from fedmcp_pipeline.utils.neo4j_client import Neo4jClient from fedmcp_pipeline.utils.postgres_client import PostgresClient from fedmcp_pipeline.utils.config import Config # Load environment variables env_file = Path(__file__).parent / "packages" / "data-pipeline" / ".env" load_dotenv(env_file) def get_postgres_politicians(pg_client): """Fetch all politicians with slugs from PostgreSQL.""" query = """ SELECT id, name, slug FROM core_politician WHERE slug IS NOT NULL AND slug <> '' ORDER BY slug """ results = pg_client.execute_query(query, dict_cursor=True) politicians = {} duplicates = [] for row in results: politician_id = row['id'] name = row['name'] slug = row['slug'] if slug in politicians: duplicates.append({ 'slug': slug, 'existing': politicians[slug], 'new': {'id': politician_id, 'name': name} }) # Keep the one with most recent ID (higher ID = more recent) if politician_id > politicians[slug]['id']: politicians[slug] = {'id': politician_id, 'name': name, 'slug': slug} else: politicians[slug] = {'id': politician_id, 'name': name, 'slug': slug} return politicians, duplicates def get_neo4j_mps(neo4j_client): """Fetch all MPs from Neo4j.""" query = """ MATCH (mp:MP) RETURN mp.id AS id, mp.name AS name, mp.slug AS slug ORDER BY mp.slug """ results = neo4j_client.run_query(query) mps = {} for record in results: mp_id = record['id'] name = record['name'] slug = record['slug'] if slug: mps[slug] = {'id': mp_id, 'name': name, 'slug': slug} return mps def map_and_update_mps(neo4j_client, mps, politicians): """ Map Neo4j MPs to PostgreSQL politicians and update with openparliament_politician_id. Returns: dict: Statistics on mapping results """ matched = [] unmatched = [] for slug, mp_data in mps.items(): if slug in politicians: politician = politicians[slug] matched.append({ 'mp_id': mp_data['id'], 'mp_name': mp_data['name'], 'politician_id': politician['id'], 'politician_name': politician['name'], 'slug': slug }) else: unmatched.append({ 'mp_id': mp_data['id'], 'mp_name': mp_data['name'], 'slug': slug }) # Update Neo4j MPs with OpenParliament politician IDs if matched: update_query = """ UNWIND $mappings AS mapping MATCH (mp:MP {id: mapping.mp_id}) SET mp.openparliament_politician_id = mapping.politician_id RETURN mp.id AS id, mp.name AS name, mp.openparliament_politician_id AS politician_id """ mappings = [ {'mp_id': m['mp_id'], 'politician_id': m['politician_id']} for m in matched ] neo4j_client.run_query(update_query, {'mappings': mappings}) stats = { 'total_mps': len(mps), 'total_politicians': len(politicians), 'matched': len(matched), 'unmatched': len(unmatched), 'match_rate': (len(matched) / len(mps) * 100) if mps else 0 } return stats, matched, unmatched def main(): """Main execution function.""" print("=" * 80) print("OPENPARLIAMENT POLITICIAN ID MAPPING") print("=" * 80) print() # Load configuration config = Config(env_file=env_file) # PostgreSQL connection parameters # Default to local OpenParliament database pg_host = os.getenv("POSTGRES_HOST", "localhost") pg_port = int(os.getenv("POSTGRES_PORT", "5432")) pg_db = os.getenv("POSTGRES_DB", "openparliament") pg_user = os.getenv("POSTGRES_USER", "fedmcp") pg_password = os.getenv("POSTGRES_PASSWORD", "fedmcp2024") # Connect to databases print("1. Connecting to databases...") pg_client = PostgresClient( dbname=pg_db, user=pg_user, password=pg_password, host=pg_host, port=pg_port ) neo4j_client = Neo4jClient( uri=config.neo4j_uri, user=config.neo4j_user, password=config.neo4j_password ) try: print(" ✅ Connected to PostgreSQL") print(" ✅ Connected to Neo4j") print() # Fetch politicians from PostgreSQL print("2. Fetching politicians from OpenParliament PostgreSQL...") politicians, duplicates = get_postgres_politicians(pg_client) print(f" ✅ Found {len(politicians)} politicians with slugs") if duplicates: print(f" ⚠️ Found {len(duplicates)} duplicate slugs (using most recent ID)") for dup in duplicates[:5]: # Show first 5 print(f" - {dup['slug']}: ID {dup['existing']['id']} vs {dup['new']['id']}") print() # Fetch MPs from Neo4j print("3. Fetching MPs from Neo4j...") mps = get_neo4j_mps(neo4j_client) print(f" ✅ Found {len(mps)} MPs with slugs") print() # Map and update print("4. Mapping MPs to politicians and updating Neo4j...") stats, matched, unmatched = map_and_update_mps(neo4j_client, mps, politicians) print(f" ✅ Updated {stats['matched']} MPs with openparliament_politician_id") print() # Print statistics print("=" * 80) print("MAPPING STATISTICS") print("=" * 80) print(f"Total MPs in Neo4j: {stats['total_mps']}") print(f"Total Politicians in PG: {stats['total_politicians']}") print(f"Successfully Matched: {stats['matched']}") print(f"Unmatched: {stats['unmatched']}") print(f"Match Rate: {stats['match_rate']:.1f}%") print() # Show sample matches if matched: print("Sample Matched MPs:") print("-" * 80) for match in matched[:10]: print(f" MP: {match['mp_name']:30} → Politician ID: {match['politician_id']:6} ({match['politician_name']})") if len(matched) > 10: print(f" ... and {len(matched) - 10} more") print() # Show unmatched if unmatched: print(f"⚠️ Unmatched MPs ({len(unmatched)}):") print("-" * 80) for um in unmatched[:20]: print(f" {um['mp_name']:30} (slug: {um['slug']})") if len(unmatched) > 20: print(f" ... and {len(unmatched) - 20} more") print() print("=" * 80) print("✅ MAPPING COMPLETE") print("=" * 80) # Verify a few high-profile MPs print() print("Verification - High-Profile MPs:") print("-" * 80) verify_query = """ MATCH (mp:MP) WHERE mp.name IN ['Pierre Poilievre', 'Justin Trudeau', 'Jagmeet Singh', 'Mark Holland'] RETURN mp.name AS name, mp.id AS mp_id, mp.openparliament_politician_id AS politician_id ORDER BY mp.name """ results = neo4j_client.run_query(verify_query) for record in results: print(f" {record['name']:25} → OpenParliament ID: {record['politician_id']}") finally: pg_client.close() neo4j_client.close() print() if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server