OpenDiscourse MCP

config.py•2.81 KiB

"""Configuration for govinfo.gov bulk data ingestion.""" import logging from pathlib import Path import os # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Base URL for govinfo bulk data BASE_URL = "https://www.govinfo.gov/bulkdata" # Congress sessions to process (113th to 119th) CONGRESS_SESSIONS = list(range(113, 120)) # Document types to download DOCUMENT_TYPES = [ "BILLS", "BILLSTATUS", "PLAW", # Public and Private Laws "STATUTE", "FR", # Federal Register "CREC", # Congressional Record ] # Output directory (can be overridden by environment variable) OUTPUT_DIR = Path(os.getenv("GOVINFO_DATA_DIR", "govinfo_data")) # Performance settings WORKERS = int(os.getenv("GOVINFO_WORKERS", "10")) # Number of parallel downloads RATE_LIMIT = int(os.getenv("GOVINFO_RATE_LIMIT", "10")) # Requests per second CHUNK_SIZE = 1024 * 1024 # 1MB chunks for streaming downloads # Timeout for HTTP requests (in seconds) REQUEST_TIMEOUT = 30 # Retry settings MAX_RETRIES = 3 RETRY_DELAY = 5 # seconds # Validation settings VALIDATE_XML = os.getenv("GOVINFO_VALIDATE_XML", "true").lower() == "true" # Logging configuration LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") LOG_FILE = "ingestion.log" # File patterns for different document types FILE_PATTERNS = { "BILLS": "BILLS-{congress}{bill_type}{bill_number}{version}.xml", "BILLSTATUS": "BILLSTATUS-{congress}{bill_type}{bill_number}.xml", "PLAW": "PLAW-{congress}publ{pub_number}.xml", "STATUTE": "STATUTE-{volume}pg{page}.xml", "FR": "FR-{year}-{month}-{day}.xml", "CREC": "CREC-{year}-{month}-{day}.xml", } # XSD schemas for validation (relative to project root) SCHEMA_PATHS = { "BILLS": "schemas/bill.xsd", "BILLSTATUS": "schemas/billstatus.xsd", "PLAW": "schemas/plaw.xsd", "STATUTE": "schemas/statute.xsd", "FR": "schemas/fr.xsd", "CREC": "schemas/crec.xsd", } def get_document_path(congress: int, doc_type: str, **kwargs) -> Path: """ Get the output path for a document. Args: congress: The congress number (e.g., 115) doc_type: Document type (e.g., 'BILLS', 'PLAW') **kwargs: Additional format arguments for the filename pattern Returns: Path where the document should be saved """ doc_dir = OUTPUT_DIR / str(congress) / doc_type doc_dir.mkdir(parents=True, exist_ok=True) if doc_type in FILE_PATTERNS: try: filename = FILE_PATTERNS[doc_type].format(congress=congress, **kwargs) return doc_dir / filename except (KeyError, IndexError) as e: logger.warning(f"Error formatting filename for {doc_type}: {e}") # Fallback naming if pattern formatting fails or no pattern exists return doc_dir / f"{doc_type}_{kwargs.get('id', 'unknown')}.xml"

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cbwinslow/opendiscourse_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

config.py•2.81 KiB