server.py•5.22 kB
"""
MCP Invoice Server implementation.
Provides tools for OCR processing and PDF merging.
"""
import os
import asyncio
from typing import List, Tuple, Dict, Any
import logging
from mcp.server.fastmcp import FastMCP
from .ocr import VisionOCR
from .pdf import PDFProcessor
class InvoiceServer:
"""MCP server that provides invoice and receipt processing capabilities."""
def __init__(self, debug_mode: bool = False):
"""Initialize the Invoice server.
Args:
debug_mode: If True, include position information in OCR results.
"""
self.debug_mode = debug_mode
self.mcp = FastMCP("invoice-server")
# Initialize processing engines
self.ocr = VisionOCR()
self.pdf_processor = PDFProcessor()
# Configure logging
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)
self._register_tools()
def _register_tools(self):
"""Register all tools with the MCP server."""
# OCR tools
@self.mcp.tool()
async def process_file(file_path: str) -> Dict[str, Any]:
"""Extract text from a PDF or image file (JPEG, PNG) using OCR.
Args:
file_path: Absolute path to the file to process
Returns:
Dictionary with file path and extracted text
"""
result = await self.process_file(file_path)
return {"file_path": result[0], "text": result[1]}
@self.mcp.tool()
async def process_directory(directory_path: str) -> List[Dict[str, Any]]:
"""Extract text from all PDF and image files (JPEG, PNG) in a directory using OCR.
Args:
directory_path: Absolute path to the directory containing files to process
Returns:
List of dictionaries, each with file path and extracted text
"""
results = await self.process_directory(directory_path)
return [{"file_path": path, "text": text} for path, text in results]
# PDF merging tool
@self.mcp.tool()
async def merge_pdfs(file_paths: List[str], output_path: str) -> Dict[str, Any]:
"""Merge multiple files (PDFs and images) into a single PDF file.
Args:
file_paths: List of absolute paths to files to merge
output_path: Absolute path where the merged PDF should be saved
Returns:
Dictionary with the output file path
"""
result = await self.merge_pdfs(file_paths, output_path)
return {"output_file": result}
async def process_file(self, file_path: str) -> Tuple[str, Any]:
"""Process a single file with OCR.
Args:
file_path: Absolute path to the file to process.
Returns:
Tuple containing file path and extracted text.
"""
loop = asyncio.get_event_loop()
# Run OCR in a thread pool to not block the event loop
return await loop.run_in_executor(
None,
lambda: self.ocr.process(file_path, detail=self.debug_mode)
)
async def process_directory(self, directory_path: str) -> List[Tuple[str, Any]]:
"""Process all files in a directory with OCR.
Args:
directory_path: Absolute path to the directory containing files to process.
Returns:
List of tuples, each containing file path and extracted text.
"""
loop = asyncio.get_event_loop()
# Run OCR in a thread pool to not block the event loop
return await loop.run_in_executor(
None,
lambda: self.ocr.process_directory(directory_path, detail=self.debug_mode)
)
async def merge_pdfs(self, file_paths: List[str], output_path: str) -> str:
"""Merge multiple files (PDFs and images) into a single PDF.
Args:
file_paths: List of absolute paths to files to merge.
output_path: Absolute path where the merged PDF should be saved.
Returns:
Path to the merged PDF file.
"""
loop = asyncio.get_event_loop()
# Run PDF merging in a thread pool to not block the event loop
return await loop.run_in_executor(
None,
lambda: self.pdf_processor.merge_pdfs(file_paths, output_path)
)
async def run(self):
"""Start the MCP server."""
await self.mcp.run(transport='stdio')
async def main():
"""Main entry point for the MCP Invoice server."""
# Check if debug mode is enabled
debug_mode = os.environ.get("MCP_INVOICE_DEBUG", "").lower() in ["true", "1", "yes"]
# Create and run the server
server = InvoiceServer(debug_mode=debug_mode)
await server.run()
if __name__ == "__main__":
# Use stdio transport protocol, required by MCP
asyncio.run(main())