Skip to main content
Glama

FedMCP - Federal Parliamentary Information

test_hansard_relationships.py7.34 kB
#!/usr/bin/env python3 """ Test Hansard relationship creation for existing statements. This script tests the link_statements_to_mps() and link_statements_to_bills() functions on the existing 25K statements in Neo4j. """ from pathlib import Path import sys # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent / "packages" / "data-pipeline")) from fedmcp_pipeline.utils.neo4j_client import Neo4jClient from fedmcp_pipeline.utils.config import Config from fedmcp_pipeline.ingest.hansard import ( link_statements_to_mps, link_statements_to_bills, ) def check_current_state(neo4j_client): """Check current state of statements and relationships.""" print("=" * 80) print("CURRENT STATE") print("=" * 80) # Count statements result = neo4j_client.run_query("MATCH (s:Statement) RETURN count(s) as count") statement_count = result[0]["count"] if result else 0 print(f"Total Statements: {statement_count:,}") # Count statements with politician_id result = neo4j_client.run_query( "MATCH (s:Statement) WHERE s.politician_id IS NOT NULL RETURN count(s) as count" ) with_politician_id = result[0]["count"] if result else 0 print(f"Statements with politician_id: {with_politician_id:,}") # Count statements with bill_debated_id result = neo4j_client.run_query( "MATCH (s:Statement) WHERE s.bill_debated_id IS NOT NULL RETURN count(s) as count" ) with_bill_id = result[0]["count"] if result else 0 print(f"Statements with bill_debated_id: {with_bill_id:,}") # Count existing MADE_BY relationships result = neo4j_client.run_query( "MATCH ()-[r:MADE_BY]->() RETURN count(r) as count" ) made_by_count = result[0]["count"] if result else 0 print(f"Existing MADE_BY relationships: {made_by_count:,}") # Count existing MENTIONS relationships result = neo4j_client.run_query( "MATCH ()-[r:MENTIONS]->() RETURN count(r) as count" ) mentions_count = result[0]["count"] if result else 0 print(f"Existing MENTIONS relationships: {mentions_count:,}") # Count MPs with openparliament_politician_id result = neo4j_client.run_query( "MATCH (mp:MP) WHERE mp.openparliament_politician_id IS NOT NULL RETURN count(mp) as count" ) mps_with_id = result[0]["count"] if result else 0 print(f"MPs with openparliament_politician_id: {mps_with_id:,}") # Count Bills with openparliament_bill_id result = neo4j_client.run_query( "MATCH (b:Bill) WHERE b.openparliament_bill_id IS NOT NULL RETURN count(b) as count" ) bills_with_id = result[0]["count"] if result else 0 print(f"Bills with openparliament_bill_id: {bills_with_id:,}") print() return { "statements": statement_count, "with_politician_id": with_politician_id, "with_bill_id": with_bill_id, "made_by": made_by_count, "mentions": mentions_count, "mps_mapped": mps_with_id, "bills_mapped": bills_with_id, } def main(): """Main execution function.""" print("=" * 80) print("HANSARD RELATIONSHIP CREATION TEST") print("=" * 80) print() # Load configuration env_file = Path(__file__).parent / "packages" / "data-pipeline" / ".env" config = Config(env_file=env_file) # Connect to Neo4j print("1. Connecting to Neo4j...") neo4j_client = Neo4jClient( uri=config.neo4j_uri, user=config.neo4j_user, password=config.neo4j_password ) try: print(" ✅ Connected to Neo4j") print() # Check current state print("2. Checking current state...") initial_state = check_current_state(neo4j_client) # Create MADE_BY relationships print("3. Creating MADE_BY relationships (Statement → MP)...") print("-" * 80) made_by_created = link_statements_to_mps(neo4j_client, batch_size=5000) print(f"✅ Created {made_by_created:,} MADE_BY relationships") print() # Create MENTIONS relationships print("4. Creating MENTIONS relationships (Statement → Bill)...") print("-" * 80) mentions_created = link_statements_to_bills(neo4j_client, batch_size=5000) print(f"✅ Created {mentions_created:,} MENTIONS relationships") print() # Check final state print("5. Checking final state...") final_state = check_current_state(neo4j_client) # Print summary print("=" * 80) print("SUMMARY") print("=" * 80) print(f"MADE_BY relationships: {initial_state['made_by']:,} → {final_state['made_by']:,} (+{made_by_created:,})") print(f"MENTIONS relationships: {initial_state['mentions']:,} → {final_state['mentions']:,} (+{mentions_created:,})") print() # Calculate coverage if final_state['with_politician_id'] > 0: mp_coverage = (final_state['made_by'] / final_state['with_politician_id']) * 100 print(f"MP Coverage: {mp_coverage:.1f}% ({final_state['made_by']:,} / {final_state['with_politician_id']:,} statements)") if final_state['with_bill_id'] > 0: bill_coverage = (final_state['mentions'] / final_state['with_bill_id']) * 100 print(f"Bill Coverage: {bill_coverage:.1f}% ({final_state['mentions']:,} / {final_state['with_bill_id']:,} statements)") print() # Sample queries to verify print("=" * 80) print("VERIFICATION QUERIES") print("=" * 80) # Get a sample MP's recent speeches print("\nSample: Pierre Poilievre's recent speeches:") print("-" * 80) query = """ MATCH (mp:MP {name: 'Pierre Poilievre'})<-[:MADE_BY]-(s:Statement) RETURN s.time as time, s.h2_en as context, s.wordcount as words ORDER BY s.time DESC LIMIT 5 """ results = neo4j_client.run_query(query) if results: for r in results: time = str(r['time'])[:19] if r['time'] else 'No time' context = r['context'][:50] if r['context'] else 'No context' words = r['words'] or 0 print(f" {time} | {context:50} | {words:4} words") else: print(" No speeches found") # Get a sample bill's debates print("\nSample: Bill C-12 (45-1) debate statements:") print("-" * 80) query = """ MATCH (b:Bill {number: 'C-12', session: '45-1'})<-[r:MENTIONS]-(s:Statement) RETURN s.time as time, r.debate_stage as stage, s.wordcount as words ORDER BY s.time LIMIT 5 """ results = neo4j_client.run_query(query) if results: for r in results: time = str(r['time'])[:19] if r['time'] else 'No time' stage = r['stage'] or 'Unknown' words = r['words'] or 0 print(f" {time} | Reading {stage:2} | {words:4} words") else: print(" No debates found") print() print("=" * 80) print("✅ RELATIONSHIP CREATION TEST COMPLETE") print("=" * 80) finally: neo4j_client.close() if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server