Skip to main content
Glama
northernvariables

FedMCP - Federal Parliamentary Information

backfill_november_sittings.py3.57 kB
#!/usr/bin/env python3 """Backfill specific sitting numbers for November 2025.""" import sys import os from pathlib import Path from datetime import datetime # Add packages to path sys.path.insert(0, str(Path(__file__).parent.parent / 'packages' / 'data-pipeline')) sys.path.insert(0, str(Path(__file__).parent.parent / 'packages' / 'fedmcp' / 'src')) # Import after path setup from fedmcp_pipeline.utils.neo4j_client import Neo4jClient from fedmcp_pipeline.utils.progress import logger from fedmcp.clients.ourcommons import OurCommonsHansardClient from fedmcp_pipeline.ingest.hansard import link_statements_to_mps_by_name # Import functions from daily-hansard-import.py sys.path.insert(0, str(Path(__file__).parent)) from daily_hansard_import import ( parse_hansard_with_enhanced_metadata, import_hansard_to_neo4j, get_latest_document_id ) def main(): """Import sittings 050-057.""" logger.info("=" * 80) logger.info("NOVEMBER 2025 HANSARD BACKFILL") logger.info(f"Started at: {datetime.now().isoformat()}") logger.info("=" * 80) # Get Neo4j connection neo4j_uri = os.getenv('NEO4J_URI', 'bolt://localhost:7687') neo4j_user = os.getenv('NEO4J_USERNAME', 'neo4j') neo4j_password = os.getenv('NEO4J_PASSWORD') if not neo4j_password: logger.error("NEO4J_PASSWORD environment variable not set") sys.exit(1) neo4j = Neo4jClient(uri=neo4j_uri, user=neo4j_user, password=neo4j_password) client = OurCommonsHansardClient() # Import sittings 050-057 imported_count = 0 for sitting_num in range(50, 58): sitting = f"{sitting_num:03d}" url = f"https://www.ourcommons.ca/Content/House/451/Debates/{sitting}/HAN{sitting}-E.XML" logger.info(f"Fetching sitting {sitting} from {url}...") try: # Fetch XML response = client.session.get(url, timeout=30) if response.status_code != 200: logger.warning(f"✗ Sitting {sitting}: HTTP {response.status_code}") continue logger.success(f"✓ Found XML for sitting {sitting}") # Parse hansard_data = parse_hansard_with_enhanced_metadata(response.text, url) iso_date = hansard_data['date'] # Check if already exists result = neo4j.run_query( "MATCH (d:Document) WHERE d.date = $date RETURN d.id as id", {"date": iso_date} ) if result: logger.info(f"⏭ Sitting {sitting} ({iso_date}) already exists, skipping") continue # Get next document ID latest_doc_id = get_latest_document_id(neo4j) document_id = latest_doc_id + 1 # Import stmt_count, linked_count = import_hansard_to_neo4j( neo4j, hansard_data, iso_date, document_id, sitting ) logger.success( f"✅ Imported sitting {sitting} ({iso_date}): " f"{stmt_count} statements, {linked_count} linked" ) imported_count += 1 except Exception as e: logger.error(f"Failed to import sitting {sitting}: {e}") import traceback traceback.print_exc() neo4j.close() logger.info("=" * 80) if imported_count > 0: logger.success(f"✅ Successfully imported {imported_count} new debate(s)") else: logger.info("ℹ️ No new debates imported") logger.info("=" * 80) if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server