Skip to main content
Glama
enums.py3.14 kB
"""Enums and constants for govinfo.gov data ingestion.""" from enum import Enum from pathlib import Path class DocumentType(Enum): """Document types available from govinfo.gov bulk data.""" BILLS = "BILLS" BILLSTATUS = "BILLSTATUS" PLAW = "PLAW" STATUTE = "STATUTE" FR = "FR" CREC = "CREC" @classmethod def all_types(cls) -> list[str]: """Get all document types as strings.""" return [doc_type.value for doc_type in cls] @classmethod def get_type(cls, value: str) -> "DocumentType": """Get DocumentType enum from string value.""" for doc_type in cls: if doc_type.value == value: return doc_type raise ValueError(f"Unknown document type: {value}") class CongressSession: """Congress session information and utilities.""" # Current available congress sessions (113th to 123rd) AVAILABLE_SESSIONS = list(range(113, 124)) # Historical congress sessions for reference HISTORICAL_SESSIONS = list(range(93, 124)) # 93rd to 123rd Congress @classmethod def get_available_sessions(cls) -> list[int]: """Get list of currently available congress sessions.""" return cls.AVAILABLE_SESSIONS.copy() @classmethod def get_historical_sessions(cls) -> list[int]: """Get list of all historical congress sessions.""" return cls.HISTORICAL_SESSIONS.copy() @classmethod def is_valid_session(cls, session: int) -> bool: """Check if a congress session is valid.""" return session in cls.AVAILABLE_SESSIONS @classmethod def get_session_name(cls, session: int) -> str: """Get formatted congress session name.""" return f"{session}th Congress" @classmethod def get_year_range(cls, session: int) -> tuple[int, int]: """Get approximate year range for a congress session.""" # Congress sessions typically span 2 years # 113th Congress: 2013-2014, 114th: 2015-2016, etc. base_year = 2011 + (session - 112) * 2 return (base_year, base_year + 1) class IngestionConfig: """Configuration constants for data ingestion.""" # Worker configuration DEFAULT_WORKERS = 50 MAX_WORKERS = 100 MIN_WORKERS = 1 # Rate limiting DEFAULT_RATE_LIMIT = 10 # requests per second MAX_RATE_LIMIT = 50 # Timeouts and retries DEFAULT_TIMEOUT = 30 # seconds MAX_RETRIES = 3 RETRY_DELAY = 5 # seconds # File handling CHUNK_SIZE = 1024 * 1024 # 1MB chunks # Validation VALIDATE_XML_BY_DEFAULT = True # Logging DEFAULT_LOG_LEVEL = "INFO" LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] # Commonly used combinations DEFAULT_CONGRESSES = CongressSession.get_available_sessions() DEFAULT_DOCUMENT_TYPES = DocumentType.all_types() # Test combinations (smaller subset for testing) TEST_CONGRESSES = [118, 119] TEST_DOCUMENT_TYPES = ["STATUTE", "BILLS"] # Production combinations (full dataset) PRODUCTION_CONGRESSES = CongressSession.get_available_sessions() PRODUCTION_DOCUMENT_TYPES = DocumentType.all_types()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cbwinslow/opendiscourse_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server