#!/usr/bin/env python3
"""
ATIP Request Summaries Ingestion Cloud Run Job Entry Point.
This script imports Access to Information request summaries from Open Canada
into Neo4j for querying via GraphQL and MCP tools.
Environment variables required:
- NEO4J_URI: Neo4j connection URI (e.g., bolt://10.128.0.3:7687)
- NEO4J_USERNAME: Neo4j username
- NEO4J_PASSWORD: Neo4j password
Usage:
python run_atip_ingestion.py
"""
import os
import sys
# Add the package to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from fedmcp_pipeline.utils.neo4j_client import Neo4jClient
from fedmcp_pipeline.utils.progress import logger
from fedmcp_pipeline.ingest.atip import ingest_atip_data
def main():
"""Main entry point for ATIP ingestion Cloud Run job."""
logger.info("=" * 80)
logger.info("ATIP REQUEST SUMMARIES INGESTION CLOUD RUN JOB")
logger.info("=" * 80)
logger.info("")
logger.info("This job will:")
logger.info(" - Download ATIP request summaries from Open Canada (~50MB)")
logger.info(" - Import ~227K request summaries into Neo4j")
logger.info(" - Create Department relationships")
logger.info(" - Full refresh of all ATIP data")
logger.info("")
# Get Neo4j connection details from environment
neo4j_uri = os.environ.get("NEO4J_URI", "bolt://localhost:7687")
neo4j_user = os.environ.get("NEO4J_USERNAME", "neo4j")
neo4j_password = os.environ.get("NEO4J_PASSWORD", "")
if not neo4j_password:
logger.error("NEO4J_PASSWORD environment variable is required")
sys.exit(1)
logger.info(f"Connecting to Neo4j at {neo4j_uri}...")
try:
neo4j_client = Neo4jClient(
uri=neo4j_uri,
user=neo4j_user,
password=neo4j_password
)
# Run ingestion
stats = ingest_atip_data(neo4j_client, batch_size=5000)
logger.info("")
logger.info("=" * 80)
logger.info("ATIP REQUEST SUMMARIES INGESTION CLOUD RUN JOB - COMPLETED")
logger.info("=" * 80)
logger.success(f"✅ Successfully imported {stats.get('atip_requests', 0):,} ATIP requests")
logger.success(f"✅ Merged {stats.get('departments_merged', 0):,} department nodes")
except Exception as e:
logger.error(f"ATIP ingestion failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
if 'neo4j_client' in locals():
neo4j_client.close()
if __name__ == "__main__":
main()