FedMCP - Federal Parliamentary Information

FedMCP
scripts

run-news-ingestion.py•3.6 KiB

#!/usr/bin/env python3 """ Entry point script for news ingestion Cloud Run job. Fetches Canadian political news from RSS feeds, extracts entity mentions (MPs, bills, committees), and creates activity feed items for bookmarked entities. """ import os import sys import argparse import logging # Add packages to path sys.path.insert(0, '/app/packages/data-pipeline') sys.path.insert(0, '/app/packages/fedmcp/src') from fedmcp_pipeline.ingest.news import NewsIngestionPipeline # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def main(): parser = argparse.ArgumentParser(description='Ingest Canadian political news from RSS feeds') parser.add_argument( '--source', type=str, help='Specific news source ID to fetch (e.g., cbc, globe). If not specified, fetches all active sources.' ) parser.add_argument( '--limit', type=int, default=50, help='Maximum articles to process per source (default: 50)' ) parser.add_argument( '--dry-run', action='store_true', help='Parse and extract entities without storing to database' ) args = parser.parse_args() # Get environment variables neo4j_uri = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') neo4j_username = os.environ.get('NEO4J_USERNAME', 'neo4j') neo4j_password = os.environ.get('NEO4J_PASSWORD') supabase_url = os.environ.get('SUPABASE_URL') supabase_key = os.environ.get('SUPABASE_SERVICE_ROLE_KEY') if not neo4j_password: logger.error("NEO4J_PASSWORD environment variable is required") sys.exit(1) if not supabase_url or not supabase_key: logger.warning("Supabase credentials not set - activity items will not be created") logger.info("=" * 60) logger.info("NEWS INGESTION PIPELINE") logger.info("=" * 60) logger.info(f"Neo4j URI: {neo4j_uri}") logger.info(f"Source filter: {args.source or 'all active sources'}") logger.info(f"Article limit per source: {args.limit}") logger.info(f"Dry run: {args.dry_run}") logger.info("=" * 60) try: # Initialize pipeline pipeline = NewsIngestionPipeline( neo4j_uri=neo4j_uri, neo4j_username=neo4j_username, neo4j_password=neo4j_password, supabase_url=supabase_url, supabase_key=supabase_key ) # Run ingestion stats = pipeline.run( source_id=args.source, limit_per_source=args.limit, dry_run=args.dry_run ) # Log results logger.info("=" * 60) logger.info("INGESTION COMPLETE") logger.info("=" * 60) logger.info(f"Sources processed: {stats.get('sources_processed', 0)}") logger.info(f"Articles fetched: {stats.get('articles_fetched', 0)}") logger.info(f"New articles: {stats.get('new_articles', 0)}") logger.info(f"Entities extracted: {stats.get('entities_extracted', 0)}") logger.info(f"Activity items created: {stats.get('activity_items_created', 0)}") logger.info(f"Errors: {stats.get('errors', 0)}") logger.info("=" * 60) # Exit with error code if there were failures if stats.get('errors', 0) > 0: logger.warning(f"Completed with {stats['errors']} errors") sys.exit(1) logger.info("News ingestion completed successfully") except Exception as e: logger.exception(f"Fatal error during news ingestion: {e}") sys.exit(1) if __name__ == '__main__': main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/northernvariables/FedMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

run-news-ingestion.py•3.6 KiB