#!/bin/bash
set -e
# Deploy News Ingestion Cloud Run job
# This job runs every 30 minutes to fetch Canadian political news from RSS feeds,
# extract entity mentions (MPs, bills, committees), and create activity feed items.
PROJECT_ID="canada-gpt-ca"
REGION="us-central1"
JOB_NAME="news-ingestion"
IMAGE_NAME="gcr.io/${PROJECT_ID}/${JOB_NAME}"
echo "================================================================================"
echo "DEPLOYING NEWS INGESTION JOB"
echo "================================================================================"
# Build and push Docker image
echo "Building Docker image..."
docker build \
--platform linux/amd64 \
-f Dockerfile.news-ingestion \
-t ${IMAGE_NAME}:latest \
.
echo "Pushing image to Container Registry..."
docker push ${IMAGE_NAME}:latest
# Check if job exists
if gcloud run jobs describe ${JOB_NAME} --region=${REGION} --project=${PROJECT_ID} &>/dev/null; then
echo "Updating existing Cloud Run job..."
gcloud run jobs update ${JOB_NAME} \
--region=${REGION} \
--image=${IMAGE_NAME}:latest \
--set-env-vars="NEO4J_URI=bolt://10.128.0.3:7687,NEO4J_USERNAME=neo4j" \
--set-secrets="NEO4J_PASSWORD=neo4j-password:latest,SUPABASE_URL=supabase-url:latest,SUPABASE_SERVICE_ROLE_KEY=supabase-service-role-key:latest" \
--max-retries=2 \
--task-timeout=15m \
--memory=1Gi \
--cpu=1 \
--vpc-connector=canadagpt-connector \
--vpc-egress=private-ranges-only \
--project=${PROJECT_ID}
else
echo "Creating new Cloud Run job..."
gcloud run jobs create ${JOB_NAME} \
--region=${REGION} \
--image=${IMAGE_NAME}:latest \
--set-env-vars="NEO4J_URI=bolt://10.128.0.3:7687,NEO4J_USERNAME=neo4j" \
--set-secrets="NEO4J_PASSWORD=neo4j-password:latest,SUPABASE_URL=supabase-url:latest,SUPABASE_SERVICE_ROLE_KEY=supabase-service-role-key:latest" \
--max-retries=2 \
--task-timeout=15m \
--memory=1Gi \
--cpu=1 \
--vpc-connector=canadagpt-connector \
--vpc-egress=private-ranges-only \
--project=${PROJECT_ID}
fi
echo "Cloud Run job deployed successfully"
# Create or update Cloud Scheduler job to run every 30 minutes
SCHEDULER_JOB_NAME="news-ingestion-trigger"
echo "Setting up Cloud Scheduler..."
# Check if scheduler job exists
if gcloud scheduler jobs describe ${SCHEDULER_JOB_NAME} --location=${REGION} --project=${PROJECT_ID} &>/dev/null; then
echo "Updating existing scheduler job..."
gcloud scheduler jobs update http ${SCHEDULER_JOB_NAME} \
--location=${REGION} \
--schedule="*/30 * * * *" \
--time-zone="America/Toronto" \
--uri="https://${REGION}-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/${PROJECT_ID}/jobs/${JOB_NAME}:run" \
--http-method=POST \
--oauth-service-account-email="${PROJECT_ID}@appspot.gserviceaccount.com" \
--project=${PROJECT_ID}
else
echo "Creating new scheduler job..."
gcloud scheduler jobs create http ${SCHEDULER_JOB_NAME} \
--location=${REGION} \
--schedule="*/30 * * * *" \
--time-zone="America/Toronto" \
--uri="https://${REGION}-run.googleapis.com/apis/run.googleapis.com/v1/namespaces/${PROJECT_ID}/jobs/${JOB_NAME}:run" \
--http-method=POST \
--oauth-service-account-email="${PROJECT_ID}@appspot.gserviceaccount.com" \
--project=${PROJECT_ID}
fi
echo "Cloud Scheduler configured to run every 30 minutes"
echo "================================================================================"
echo "DEPLOYMENT COMPLETE"
echo "================================================================================"
echo ""
echo "Job Name: ${JOB_NAME}"
echo "Image: ${IMAGE_NAME}:latest"
echo "Schedule: Every 30 minutes"
echo ""
echo "News Sources:"
echo " - CBC Politics"
echo " - Globe & Mail"
echo " - National Post"
echo " - CTV News"
echo " - iPolitics"
echo " - LEGISinfo Bill Events"
echo ""
echo "To manually trigger the job:"
echo " gcloud run jobs execute ${JOB_NAME} --region=${REGION}"
echo ""
echo "To view logs:"
echo " gcloud logging read \"resource.type=cloud_run_job AND resource.labels.job_name=${JOB_NAME}\" --limit=50 --format=json"
echo ""