"""
Political Entities Ingestion Module.
Standalone module for ingesting Party and Riding nodes from MP data.
This is separate from parliament.py to avoid fedmcp dependencies in jobs that don't need them.
"""
import json
from datetime import datetime
from pathlib import Path
from typing import Optional
from ..utils.neo4j_client import Neo4jClient
from ..utils.progress import logger
def detect_province(riding_name: str) -> Optional[str]:
"""
Detect the province/territory for a riding using keyword matching.
Args:
riding_name: Name of the riding (e.g., "Calgary Centre", "Toronto—Danforth")
Returns:
Two-letter province/territory code (e.g., "ON", "QC", "BC") or None if not detected
"""
# Load province mapping data
data_file = Path(__file__).parent.parent / "data" / "riding_provinces.json"
if not data_file.exists():
logger.warning(f"Province mapping file not found: {data_file}")
return None
with open(data_file, 'r') as f:
mapping = json.load(f)
# Check manual overrides first (for ridings like "Nunavut", "Northwest Territories")
manual_overrides = mapping.get("manual_overrides", {})
for override_name, province_code in manual_overrides.items():
if override_name.lower() in riding_name.lower():
return province_code
# Check keyword patterns
keyword_patterns = mapping.get("keyword_patterns", {})
for province_code, keywords in keyword_patterns.items():
for keyword in keywords:
if keyword.lower() in riding_name.lower():
return province_code
# No match found
return None
def ingest_parties(neo4j_client: Neo4jClient) -> int:
"""
Ingest political parties extracted from MP data.
Note: OpenParliament doesn't have a parties endpoint, so we extract from MPs.
Args:
neo4j_client: Neo4j client instance
Returns:
Number of parties created/updated
"""
logger.info("Extracting parties from MPs...")
# Query existing MPs to get unique parties
result = neo4j_client.run_query(
"""
MATCH (m:MP)
WHERE m.party IS NOT NULL
WITH DISTINCT m.party AS party_name
RETURN party_name
ORDER BY party_name
"""
)
parties = [record["party_name"] for record in result]
logger.info(f"Found {len(parties)} unique parties")
if len(parties) == 0:
logger.warning("No parties found in MP data - MPs may not have party property")
return 0
# Manually map party short names to codes and full names
party_mapping = {
"Conservative": {"code": "CPC", "name": "Conservative Party of Canada"},
"Liberal": {"code": "LPC", "name": "Liberal Party of Canada"},
"NDP": {"code": "NDP", "name": "New Democratic Party"},
"Bloc Québécois": {"code": "BQ", "name": "Bloc Québécois"},
"Green": {"code": "GPC", "name": "Green Party of Canada"},
"Independent": {"code": "IND", "name": "Independent"},
"People's Party": {"code": "PPC", "name": "People's Party of Canada"},
}
parties_data = []
for party_short in parties:
if party_short in party_mapping:
party_props = {
"code": party_mapping[party_short]["code"],
"name": party_mapping[party_short]["name"],
"short_name": party_short,
"updated_at": datetime.utcnow().isoformat(),
}
parties_data.append(party_props)
else:
# Fallback for unmapped parties
parties_data.append({
"code": party_short.upper().replace(" ", "_"),
"name": party_short,
"short_name": party_short,
"updated_at": datetime.utcnow().isoformat(),
})
# Create/update parties using MERGE (idempotent)
created = neo4j_client.batch_merge_nodes("Party", parties_data, merge_keys=["code"])
logger.success(f"✅ Created/updated {created} parties")
return created
def ingest_ridings(neo4j_client: Neo4jClient) -> int:
"""
Ingest electoral ridings (extracted from MPs) with province detection.
Args:
neo4j_client: Neo4j client instance
Returns:
Number of ridings created/updated
"""
logger.info("Extracting ridings from MPs...")
result = neo4j_client.run_query(
"""
MATCH (m:MP)
WHERE m.riding IS NOT NULL
WITH DISTINCT m.riding AS riding_name
RETURN riding_name
ORDER BY riding_name
"""
)
ridings = [record["riding_name"] for record in result]
logger.info(f"Found {len(ridings)} unique ridings")
if len(ridings) == 0:
logger.warning("No ridings found in MP data - MPs may not have riding property")
return 0
# Create riding nodes with province detection
ridings_data = []
provinces_detected = 0
for riding in ridings:
province = detect_province(riding)
if province:
provinces_detected += 1
ridings_data.append({
"id": riding.lower().replace(" ", "-").replace("'", "").replace("—", "-"),
"name": riding,
"province": province,
"is_vacant": False, # All current ridings have MPs (extracted from MP data)
})
logger.info(f"Detected provinces for {provinces_detected}/{len(ridings)} ridings")
created = neo4j_client.batch_merge_nodes("Riding", ridings_data, merge_keys=["id"])
logger.success(f"✅ Created/updated {created} ridings")
return created