GovInfo MCP Server

packages.py•13.1 kB

"""Packages tools for GovInfo MCP server.""" import base64 from datetime import UTC, datetime, timedelta import os from typing import Annotated from dotenv import load_dotenv from fastmcp import Context, FastMCP import httpx from loguru import logger from pydantic import Field # Load environment variables load_dotenv() # Get API key from environment API_KEY = os.getenv("GOVINFO_API_KEY") # Create the packages server packages: FastMCP = FastMCP( "PackagesServer", instructions="""The PackagesServer provides access to government document packages through the GovInfo API. Key capabilities: - Search and list packages from specific collections (BILLS, PLAW, CFR, etc.) - Retrieve package summaries with metadata - Download package content in multiple formats (HTML, XML, PDF, text) Common collections: - BILLS: Congressional bills - PLAW: Public laws - CFR: Code of Federal Regulations - FR: Federal Register - STATUTE: Statutes at Large - USCOURTS: Court opinions - CZIC: Coastal Zone Information Center When searching packages: - Use appropriate date ranges to limit results - Congress numbers are only applicable to certain collections (BILLS, PLAW) - Page size can be adjusted (1-100) for pagination - Use offset_mark from response for pagination Content formats and availability: - HTML: Best for reading and displaying (most widely available) - XML: Best for parsing and data extraction (most widely available) - PDF: Original formatted document (limited availability) - Text: Plain text version (limited availability) IMPORTANT for get_package_content: - Not all content types are available for every package - The API returns 400 Bad Request if format is not available - Always use get_package_summary() first to check available formats in 'download' field - Default to 'html' or 'xml' formats for maximum compatibility - Handle HTTPStatusError gracefully when requesting unavailable formats Best practice workflow: 1. Use get_package_summary() to check available formats 2. Choose format based on availability and needs 3. Use get_package_content() with error handling Note: PDF content is returned as base64 encoded data.""", ) @packages.tool() async def get_packages_by_collection( collection: Annotated[ str, Field(description="Collection code (e.g., 'BILLS', 'PLAW', 'CFR')") ], congress: Annotated[ int | None, Field(description="Congress number for filtering", ge=1, le=200) ] = None, doc_class: Annotated[str, Field(description="Document class for filtering")] = "", start_date: Annotated[ str, Field(description="Start date for filtering (YYYY-MM-DD)") ] = "", end_date: Annotated[ str, Field(description="End date for filtering (YYYY-MM-DD)") ] = "", page_size: Annotated[ int, Field(description="Number of results per page", ge=1, le=100) ] = 50, offset_mark: Annotated[ str, Field(description="Pagination offset mark for next page") ] = "*", ctx: Context | None = None, ) -> dict: """Get a list of packages from a specific collection. Returns package summaries including package IDs, titles, and metadata for packages within the specified collection. Returns: dict: A dictionary containing the packages list and pagination information. Raises: ValueError: If the GOVINFO_API_KEY environment variable is not set. """ if ctx: await ctx.info(f"Fetching packages from collection: {collection}") else: logger.info(f"Fetching packages from collection: {collection}") if not API_KEY: error_msg = "GOVINFO_API_KEY not found in environment variables" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise ValueError(error_msg) # Use a default lastModifiedStartDate if not provided (1 year ago) if not start_date: default_date = datetime.now(UTC) - timedelta(days=365) last_modified_start = default_date.strftime("%Y-%m-%dT%H:%M:%SZ") else: # Convert YYYY-MM-DD to ISO 8601 format last_modified_start = f"{start_date}T00:00:00Z" params = { "pageSize": page_size, "offsetMark": offset_mark, } # Add optional filters if congress is not None: params["congress"] = congress if doc_class: params["docClass"] = doc_class headers = {"X-Api-Key": API_KEY} try: async with httpx.AsyncClient() as client: if end_date: # Use date range endpoint last_modified_end = f"{end_date}T23:59:59Z" url = f"https://api.govinfo.gov/collections/{collection}/{last_modified_start}/{last_modified_end}" else: # Use single date endpoint url = f"https://api.govinfo.gov/collections/{collection}/{last_modified_start}" response = await client.get( url, params=params, headers=headers, timeout=30.0, ) response.raise_for_status() data = response.json() if ctx: await ctx.info(f"Found {len(data.get('packages', []))} packages") else: logger.info(f"Found {len(data.get('packages', []))} packages") return data except httpx.HTTPStatusError as e: error_msg = f"HTTP error: {e}" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise e except Exception as e: error_msg = f"Packages fetch error: {e}" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise e @packages.tool() async def get_package_summary( package_id: Annotated[ str, Field(description="Package ID (e.g., 'BILLS-116hr1-ih')") ], ctx: Context | None = None, ) -> dict: """Get summary information for a specific package. Returns metadata about a package including its title, dates, collection, and available download formats. Returns: dict: A dictionary containing package metadata such as title, dates, collection information, and available download formats. Raises: ValueError: If the GOVINFO_API_KEY environment variable is not set. """ if ctx: await ctx.info(f"Fetching summary for package: {package_id}") else: logger.info(f"Fetching summary for package: {package_id}") if not API_KEY: error_msg = "GOVINFO_API_KEY not found in environment variables" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise ValueError(error_msg) headers = {"X-Api-Key": API_KEY} try: async with httpx.AsyncClient() as client: response = await client.get( f"https://api.govinfo.gov/packages/{package_id}/summary", headers=headers, timeout=30.0, ) response.raise_for_status() data = response.json() if ctx: await ctx.info(f"Retrieved summary for package: {package_id}") else: logger.info(f"Retrieved summary for package: {package_id}") return data except httpx.HTTPStatusError as e: error_msg = f"HTTP error: {e}" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise e except Exception as e: error_msg = f"Package summary fetch error: {e}" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise e @packages.tool() async def get_package_content( package_id: Annotated[ str, Field(description="Package ID (e.g., 'BILLS-116hr1-ih')") ], content_type: Annotated[ str, Field(description="Content type: 'html', 'xml', 'pdf', or 'text'") ] = "html", ctx: Context | None = None, ) -> dict[str, str] | str: """Get the content of a specific package in the requested format. Returns the actual document content in the specified format (HTML, XML, PDF, or text). IMPORTANT: Not all content types are available for every package. The GovInfo API returns a 400 Bad Request error if the requested format is not available for a specific package. To avoid errors: 1. First call get_package_summary() to check available formats in the 'download' field 2. Start with 'html' or 'xml' as these are most commonly available 3. Handle HTTPStatusError exceptions gracefully for unsupported formats Content Type Availability by Collection: - BILLS: html, xml commonly available; text, pdf may be limited - PLAW: html, xml commonly available; text, pdf availability varies - CFR: html, xml typically available; text format often not available - FR: html, xml commonly available; pdf often available Best Practices: - Use get_package_summary() first to check the 'download' field for available formats - Always handle HTTPStatusError (400) which indicates format not available - Default to 'html' format for maximum compatibility - Use 'xml' for structured data parsing - Only request 'pdf' or 'text' after verifying availability Args: package_id: Package ID (e.g., 'BILLS-116hr1-ih') content_type: Content format - 'html' (default), 'xml', 'pdf', or 'text' ctx: Optional context for logging Returns: For text-based formats (HTML, XML, text): The raw content as a string. For binary formats (PDF): A dictionary with 'content_type' and 'base64_content' keys. Raises: ValueError: If GOVINFO_API_KEY is not found in environment variables. Example: # Safe approach - check available formats first summary = await get_package_summary("BILLS-116hr1-ih") available_formats = [dl.get('type') for dl in summary.get('download', [])] if 'pdf' in available_formats: content = await get_package_content("BILLS-116hr1-ih", "pdf") else: content = await get_package_content("BILLS-116hr1-ih", "html") """ if ctx: await ctx.info(f"Fetching {content_type} content for package: {package_id}") else: logger.info(f"Fetching {content_type} content for package: {package_id}") if not API_KEY: error_msg = "GOVINFO_API_KEY not found in environment variables" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise ValueError(error_msg) headers = {"X-Api-Key": API_KEY} # Map content type to appropriate endpoint content_endpoints = {"html": "htm", "xml": "xml", "pdf": "pdf", "text": "txt"} endpoint = content_endpoints.get(content_type.lower(), "htm") try: async with httpx.AsyncClient() as client: response = await client.get( f"https://api.govinfo.gov/packages/{package_id}/{endpoint}", headers=headers, timeout=60.0, # Longer timeout for content downloads ) response.raise_for_status() # Return raw content for text-based formats if content_type.lower() in {"html", "xml", "text"}: content = response.text if ctx: await ctx.info( f"Retrieved {content_type} content for package: {package_id}" ) else: logger.info( f"Retrieved {content_type} content for package: {package_id}" ) return content # For binary formats like PDF, return as base64 encoded content = base64.b64encode(response.content).decode("utf-8") if ctx: await ctx.info( f"Retrieved {content_type} content for package: {package_id}" ) else: logger.info( f"Retrieved {content_type} content for package: {package_id}" ) return {"content_type": content_type, "base64_content": content} except httpx.HTTPStatusError as e: if e.response.status_code == 400: error_msg = ( f"Content type '{content_type}' is not available for package '{package_id}'. " f"Use get_package_summary() to check available formats in the 'download' field. " f"HTTP error: {e}" ) else: error_msg = f"HTTP error: {e}" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise e except Exception as e: error_msg = f"Package content fetch error: {e}" if ctx: await ctx.error(error_msg) else: logger.error(error_msg) raise e

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Travis-Prall/govinfo-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server