#!/usr/bin/env python3
"""
Detect bill amendments by comparing different versions of bills.
This script fetches all available versions of bills from Parliament.ca XML,
compares section text between versions, and stores the text history in Neo4j.
Usage:
# Detect amendments for a specific bill
python scripts/detect_bill_amendments.py --bill C-11 --session 45-1
# Detect amendments for all bills in a session
python scripts/detect_bill_amendments.py --session 45-1
# Detect amendments with limit
python scripts/detect_bill_amendments.py --session 45-1 --limit 10
"""
import argparse
import os
import sys
from pathlib import Path
# Add the pipeline package to the path
sys.path.insert(0, str(Path(__file__).parent.parent))
from fedmcp_pipeline.ingest.bill_amendments import (
detect_bill_amendments,
detect_amendments_for_session,
)
from fedmcp_pipeline.utils.neo4j_client import Neo4jClient
from fedmcp_pipeline.utils.progress import logger
def main():
parser = argparse.ArgumentParser(
description="Detect bill amendments by comparing versions"
)
parser.add_argument(
"--bill",
type=str,
help="Bill number to check (e.g., C-11, S-234)",
)
parser.add_argument(
"--session",
type=str,
default="45-1",
help="Parliament session (e.g., 45-1). Default: 45-1",
)
parser.add_argument(
"--limit",
type=int,
help="Limit number of bills to process (for testing)",
)
parser.add_argument(
"--government-bill",
action="store_true",
help="Indicate this is a government bill (affects XML path)",
)
args = parser.parse_args()
# Parse session
session_parts = args.session.split("-")
if len(session_parts) != 2:
logger.error(f"Invalid session format: {args.session}. Expected format: XX-Y (e.g., 45-1)")
sys.exit(1)
parliament = int(session_parts[0])
session = int(session_parts[1])
# Get Neo4j connection
neo4j_uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
neo4j_user = os.environ.get("NEO4J_USERNAME", "neo4j")
neo4j_password = os.environ.get("NEO4J_PASSWORD", "")
if not neo4j_password:
logger.error("NEO4J_PASSWORD environment variable is required")
sys.exit(1)
neo4j_client = Neo4jClient(uri=neo4j_uri, user=neo4j_user, password=neo4j_password)
try:
if args.bill:
# Single bill detection - auto-detect if government bill from DB
is_gov = args.government_bill
if not is_gov:
# Check database for is_government_bill flag
check_query = """
MATCH (b:Bill {number: $number, session: $session})
RETURN b.is_government_bill as is_gov
"""
check_result = neo4j_client.run_query(check_query, {
"number": args.bill.upper(),
"session": args.session,
})
if check_result and check_result[0].get("is_gov"):
is_gov = True
logger.info(f" Auto-detected {args.bill} as government bill")
logger.info(f"Detecting amendments for {args.bill} in session {args.session}")
result = detect_bill_amendments(
neo4j_client,
parliament=parliament,
session=session,
bill_number=args.bill,
is_government=is_gov,
)
# Print results
print("\n" + "=" * 50)
print(f"Bill: {result['bill']}")
print(f"Versions found: {result['versions_found']}")
print(f"Has amendments: {result['has_amendments']}")
if result.get("total_diffs"):
print(f"Total changes: {result['total_diffs']}")
if result.get("diffs_by_type"):
print("Changes by type:")
for change_type, count in result["diffs_by_type"].items():
print(f" - {change_type}: {count}")
else:
# Session-wide detection
logger.info(f"Detecting amendments for all bills in session {args.session}")
result = detect_amendments_for_session(
neo4j_client,
parliament=parliament,
session=session,
limit=args.limit,
)
# Print summary
print("\n" + "=" * 50)
print(f"Session: {result['session']}")
print(f"Bills processed: {result['bills_processed']}")
print(f"Bills with amendments: {result['bills_with_amendments']}")
print(f"Total diffs detected: {result['total_diffs']}")
except Exception as e:
logger.error(f"Error detecting amendments: {e}")
raise
finally:
neo4j_client.close()
if __name__ == "__main__":
main()