Document Parser MCP

batch_processing.py•2.22 KiB

#!/usr/bin/env python3 """ Example: Batch document processing. This demonstrates how to process multiple documents concurrently. """ import asyncio import logging from pathlib import Path from document_parser.config.settings import get_settings from document_parser.engine.processor import DocumentProcessor async def process_single_document( processor: DocumentProcessor, file_path: str, output_dir: Path, ) -> None: """ Process a single document and save the result. Args: processor: Document processor instance file_path: Path to input file output_dir: Directory for output files """ logger = logging.getLogger(__name__) logger.info(f"Processing: {file_path}") try: # Process document result = await processor.process_document(source=file_path) # Save result output_file = output_dir / f"{Path(file_path).stem}.md" output_file.write_text(result, encoding="utf-8") logger.info(f"Saved to: {output_file}") except Exception as e: logger.error(f"Error processing {file_path}: {e}") async def main(): """Main batch processing function.""" # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Load configuration settings = get_settings() # Create processor processor = DocumentProcessor(settings) # Setup directories input_dir = Path("input_documents") output_dir = Path("output_markdown") output_dir.mkdir(exist_ok=True) # Find all supported documents patterns = ["*.pdf", "*.docx", "*.pptx"] documents = [] for pattern in patterns: documents.extend(input_dir.glob(pattern)) logger.info(f"Found {len(documents)} documents to process") # Process documents concurrently (limited by config) max_concurrent = settings.server.max_concurrent_jobs for i in range(0, len(documents), max_concurrent): batch = documents[i : i + max_concurrent] tasks = [ process_single_document(processor, str(doc), output_dir) for doc in batch ] await asyncio.gather(*tasks) logger.info("Batch processing complete!") if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kgand/document-parser-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

batch_processing.py•2.22 KiB