Skip to main content
Glama
test_pdf_ocr.py4.82 kB
#!/usr/bin/env python3 """ Test script for PDF OCR functionality """ import asyncio import sys import time from pathlib import Path # Add project to path sys.path.append('/home/arne/src/nanonets_mcp') async def test_pdf_support(): """Test PDF processing capabilities""" try: from nanonets_mcp.server import mcp, PDF_SUPPORT print("🧪 Testing PDF OCR functionality") print("=" * 50) # Test 1: Check PDF support print(f"PDF Support Available: {PDF_SUPPORT}") if not PDF_SUPPORT: print("❌ PDF dependencies not installed") print("Install with: pip install pdf2image PyMuPDF") return False # Test 2: List available tools tools = await mcp.list_tools() tool_names = [tool.name for tool in tools] print(f"Available tools: {tool_names}") pdf_tool_available = 'ocr_pdf_to_markdown' in tool_names print(f"PDF OCR tool available: {pdf_tool_available}") # Test 3: Get supported formats formats_result = await mcp.call_tool('get_supported_formats', {}) # Handle MCP result format if hasattr(formats_result, '__iter__') and len(formats_result) >= 1: formats = formats_result[0] if hasattr(formats, 'text'): # If it's wrapped in a TextContent object import json try: formats = json.loads(formats.text) except: formats = {"supported_formats": ["Unable to parse"]} elif hasattr(formats, '__dict__'): formats = formats.__dict__ else: formats = formats_result supported_formats = formats.get('supported_formats', []) if isinstance(formats, dict) else ["Unknown"] print(f"Supported formats: {supported_formats}") # Test 4: Create a simple test PDF (if we had one) # For now, just verify the tool can be called print("\n✅ PDF OCR functionality is properly configured!") return True except Exception as e: print(f"❌ Error testing PDF functionality: {e}") import traceback traceback.print_exc() return False async def test_with_sample_pdf(): """Test with a sample PDF file if available""" try: from nanonets_mcp.server import mcp # Look for any PDF files in tests directory pdf_files = list(Path('/home/arne/src/nanonets_mcp/tests').glob('*.pdf')) if not pdf_files: print("ℹ️ No PDF files found in tests directory for testing") print(" To test with a real PDF, add a PDF file to the tests/ directory") return True print(f"\n📄 Found PDF file: {pdf_files[0]}") print("🤖 Starting PDF OCR processing...") start_time = time.time() # Read PDF file as base64 pdf_path = str(pdf_files[0]) result = await mcp.call_tool('ocr_pdf_to_markdown', { 'pdf_data': pdf_path }) end_time = time.time() processing_time = end_time - start_time print(f"✅ PDF OCR completed in {processing_time:.2f} seconds") # Show first 300 characters of result if hasattr(result, '__iter__') and len(result) >= 1: content = result[0] if hasattr(content, 'text'): text = content.text else: text = str(content) else: text = str(result) print("📄 PDF OCR Result (first 300 chars):") print("=" * 50) print(text[:300] + "..." if len(text) > 300 else text) print("=" * 50) return True except Exception as e: print(f"❌ Error testing with sample PDF: {e}") import traceback traceback.print_exc() return False async def main(): """Main test function""" print("🧪 Nanonets MCP Server PDF Test Suite") print("=" * 50) # Test 1: Basic PDF support basic_ok = await test_pdf_support() # Test 2: Sample PDF processing (if available) sample_ok = await test_with_sample_pdf() if basic_ok else True print("\n📊 Test Results:") print("=" * 50) print(f"PDF Support: {'✅ PASS' if basic_ok else '❌ FAIL'}") print(f"Sample PDF Test: {'✅ PASS' if sample_ok else '❌ FAIL'}") if basic_ok and sample_ok: print("\n🎉 ALL TESTS PASSED! PDF OCR functionality is ready!") else: print("\n⚠️ Some tests failed. Check the logs above.") if __name__ == "__main__": asyncio.run(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ArneJanning/nanonets-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server