OpenDiscourse MCP

commands.py•5.18 kB

"""Command utilities for the ingestion system.""" import asyncio import logging from pathlib import Path import aiohttp from .xml_validator import XMLValidator logger = logging.getLogger(__name__) SCHEMA_URLS = { "bill": "https://www.govinfo.gov/bulkdata/xml/BILLS.xsd", "billstatus": "https://www.govinfo.gov/bulkdata/xml/BILLSTATUS.xsd", "plaw": "https://www.govinfo.gov/bulkdata/xml/PLAW.xsd", "statute": "https://www.govinfo.gov/bulkdata/xml/STATUTE.xsd", "fr": "https://www.govinfo.gov/bulkdata/xml/FR.xsd", "crec": "https://www.govinfo.gov/bulkdata/xml/CREC.xsd", } async def download_schemas(schema_dir: str | None = None) -> None: """Download all required XSD schemas from govinfo.gov. This function downloads XML schema definitions for all supported document types (BILLS, BILLSTATUS, PLAW, etc.) and caches them locally for validation. Args: schema_dir: Directory to save schemas. If None, defaults to 'schemas' in the ingestion module directory. Raises: Exception: If schema download fails for multiple schemas. Example: >>> await download_schemas("/path/to/schemas") # Downloads all schemas to the specified directory """ validator = XMLValidator(schema_dir) async with aiohttp.ClientSession() as session: tasks = [] for name, url in SCHEMA_URLS.items(): tasks.append(validator.download_schema(session, url, name)) results = await asyncio.gather(*tasks, return_exceptions=True) success = sum(1 for r in results if r is True) logger.info(f"Downloaded {success}/{len(SCHEMA_URLS)} schemas successfully") # Log any failures for (name, url), result in zip(SCHEMA_URLS.items(), results, strict=False): if result is not True: logger.error(f"Failed to download schema {name} from {url}: {result}") async def validate_xml_files( xml_dir: str, schema_name: str, schema_dir: str | None = None ) -> dict[str, int]: """Validate all XML files in a directory against a specific schema. This function scans a directory for XML files and validates each one against the specified XSD schema. It returns statistics about the validation process including counts of valid, invalid, and error files. Args: xml_dir: Directory containing XML files to validate. schema_name: Name of the schema to validate against (e.g., 'bill'). schema_dir: Directory containing schemas. If None, defaults to 'schemas' in the ingestion module directory. Returns: Dictionary with validation statistics: - 'total': Total number of XML files found - 'valid': Number of files that passed validation - 'invalid': Number of files that failed validation - 'errors': Number of files that had processing errors Raises: ValueError: If xml_dir does not exist. Example: >>> stats = await validate_xml_files("/data/xmls", "bill") >>> print(f"Validated {stats['total']} files, {stats['valid']} valid") """ validator = XMLValidator(schema_dir) xml_path = Path(xml_dir) if not xml_path.exists(): logger.error(f"XML directory does not exist: {xml_dir}") return {"total": 0, "valid": 0, "invalid": 0, "errors": 0} xml_files = list(xml_path.glob("*.xml")) if not xml_files: logger.warning(f"No XML files found in {xml_dir}") return {"total": 0, "valid": 0, "invalid": 0, "errors": 0} stats = {"total": len(xml_files), "valid": 0, "invalid": 0, "errors": 0} logger.info(f"Validating {len(xml_files)} XML files against schema: {schema_name}") for xml_file in xml_files: try: content = xml_file.read_text(encoding="utf-8") is_valid, errors = validator.validate_xml(content, schema_name) if is_valid: stats["valid"] += 1 else: stats["invalid"] += 1 logger.error(f"Invalid XML: {xml_file.name} - {errors[:1]}") except Exception as e: stats["errors"] += 1 logger.error(f"Error validating {xml_file.name}: {str(e)}") logger.info( f"Validation complete: {stats['valid']} valid, {stats['invalid']} invalid, {stats['errors']} errors" ) return stats def list_available_schemas(schema_dir: str | None = None) -> list[str]: """List all available XSD schemas in the schema directory. This function scans the schema directory and returns a list of available schema names (without the .xsd extension). Useful for checking which schemas are downloaded and ready for validation. Args: schema_dir: Directory containing schemas. If None, defaults to 'schemas' in the ingestion module directory. Returns: List of schema names available for validation. Example: >>> schemas = list_available_schemas() >>> print(f"Available schemas: {schemas}") ['bill', 'billstatus', 'plaw'] """ validator = XMLValidator(schema_dir) return list(validator.schemas.keys())

Latest Blog Posts

Model Context Protocol Proxies: Enabling Enterprise Control with Virtual MCPs
By Om-Shree-0709 on December 9, 2025.
AI Security
Virtual MCP
Kubernetes Operator
The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/cbwinslow/opendiscourse_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server