Nanonets MCP Server

test_pdf_ocr.py•4.7 KiB

#!/usr/bin/env python3 """ Test script for PDF OCR functionality """ import asyncio import sys import time from pathlib import Path # Add project to path sys.path.append('/home/arne/src/nanonets_mcp') async def test_pdf_support(): """Test PDF processing capabilities""" try: from nanonets_mcp.server import mcp, PDF_SUPPORT print("🧪 Testing PDF OCR functionality") print("=" * 50) # Test 1: Check PDF support print(f"PDF Support Available: {PDF_SUPPORT}") if not PDF_SUPPORT: print("❌ PDF dependencies not installed") print("Install with: pip install pdf2image PyMuPDF") return False # Test 2: List available tools tools = await mcp.list_tools() tool_names = [tool.name for tool in tools] print(f"Available tools: {tool_names}") pdf_tool_available = 'ocr_pdf_to_markdown' in tool_names print(f"PDF OCR tool available: {pdf_tool_available}") # Test 3: Get supported formats formats_result = await mcp.call_tool('get_supported_formats', {}) # Handle MCP result format if hasattr(formats_result, '__iter__') and len(formats_result) >= 1: formats = formats_result[0] if hasattr(formats, 'text'): # If it's wrapped in a TextContent object import json try: formats = json.loads(formats.text) except: formats = {"supported_formats": ["Unable to parse"]} elif hasattr(formats, '__dict__'): formats = formats.__dict__ else: formats = formats_result supported_formats = formats.get('supported_formats', []) if isinstance(formats, dict) else ["Unknown"] print(f"Supported formats: {supported_formats}") # Test 4: Create a simple test PDF (if we had one) # For now, just verify the tool can be called print("\n✅ PDF OCR functionality is properly configured!") return True except Exception as e: print(f"❌ Error testing PDF functionality: {e}") import traceback traceback.print_exc() return False async def test_with_sample_pdf(): """Test with a sample PDF file if available""" try: from nanonets_mcp.server import mcp # Look for any PDF files in tests directory pdf_files = list(Path('/home/arne/src/nanonets_mcp/tests').glob('*.pdf')) if not pdf_files: print("ℹ️ No PDF files found in tests directory for testing") print(" To test with a real PDF, add a PDF file to the tests/ directory") return True print(f"\n📄 Found PDF file: {pdf_files[0]}") print("🤖 Starting PDF OCR processing...") start_time = time.time() # Read PDF file as base64 pdf_path = str(pdf_files[0]) result = await mcp.call_tool('ocr_pdf_to_markdown', { 'pdf_data': pdf_path }) end_time = time.time() processing_time = end_time - start_time print(f"✅ PDF OCR completed in {processing_time:.2f} seconds") # Show first 300 characters of result if hasattr(result, '__iter__') and len(result) >= 1: content = result[0] if hasattr(content, 'text'): text = content.text else: text = str(content) else: text = str(result) print("📄 PDF OCR Result (first 300 chars):") print("=" * 50) print(text[:300] + "..." if len(text) > 300 else text) print("=" * 50) return True except Exception as e: print(f"❌ Error testing with sample PDF: {e}") import traceback traceback.print_exc() return False async def main(): """Main test function""" print("🧪 Nanonets MCP Server PDF Test Suite") print("=" * 50) # Test 1: Basic PDF support basic_ok = await test_pdf_support() # Test 2: Sample PDF processing (if available) sample_ok = await test_with_sample_pdf() if basic_ok else True print("\n📊 Test Results:") print("=" * 50) print(f"PDF Support: {'✅ PASS' if basic_ok else '❌ FAIL'}") print(f"Sample PDF Test: {'✅ PASS' if sample_ok else '❌ FAIL'}") if basic_ok and sample_ok: print("\n🎉 ALL TESTS PASSED! PDF OCR functionality is ready!") else: print("\n⚠️ Some tests failed. Check the logs above.") if __name__ == "__main__": asyncio.run(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ArneJanning/nanonets-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_pdf_ocr.py•4.7 KiB