MCP Orchestration Server

test_image_to_text.py•11 kB

#!/usr/bin/env python3 """ Test Image-to-Text Functionality Test all available image processing and OCR capabilities """ import os import sys import requests from pathlib import Path def test_document_processor_image_capability(): """Test document processor's image processing capability.""" print("📄 TESTING DOCUMENT PROCESSOR IMAGE CAPABILITY") print("=" * 60) try: # Import document processor directly sys.path.append('.') from agents.core.document_processor import DocumentProcessorAgent from agents.base_agent import MCPMessage from datetime import datetime # Create document processor doc_processor = DocumentProcessorAgent() print("✅ Document processor created successfully") # Check capabilities info = doc_processor.get_info() print(f"📋 Agent: {info['name']}") print(f"📝 Description: {info['description']}") capabilities = [cap.name for cap in doc_processor.capabilities] print(f"⚡ Capabilities: {', '.join(capabilities)}") # Check if image processing is supported doc_cap = doc_processor.capabilities[0] input_types = doc_cap.input_types print(f"📥 Input types: {', '.join(input_types)}") if "image" in input_types: print("🖼️ ✅ IMAGE PROCESSING SUPPORTED!") return True else: print("🖼️ ❌ Image processing not explicitly listed") return False except Exception as e: print(f"❌ Error testing document processor: {e}") return False def test_ocr_module(): """Test the OCR module directly.""" print("\n🔧 TESTING OCR MODULE") print("=" * 60) try: # Import OCR module sys.path.append('data/multimodal') from image_ocr import extract_text_from_image print("✅ OCR module imported successfully") # Check if Tesseract is available try: import pytesseract print("✅ Tesseract OCR available") # Try to get Tesseract version version = pytesseract.get_tesseract_version() print(f"📋 Tesseract version: {version}") return True except Exception as e: print(f"⚠️ Tesseract not available: {e}") print("💡 Install Tesseract: https://github.com/tesseract-ocr/tesseract") return False except Exception as e: print(f"❌ Error testing OCR module: {e}") return False def test_javascript_image_agent(): """Test JavaScript image OCR agent.""" print("\n🟨 TESTING JAVASCRIPT IMAGE AGENT") print("=" * 60) js_agent_path = Path("agents/image/image_ocr_agent.js") if js_agent_path.exists(): print(f"✅ JavaScript image agent found: {js_agent_path}") # Read the file to check capabilities try: with open(js_agent_path, 'r') as f: content = f.read() # Check for key features features = [] if 'OCR' in content: features.append("OCR functionality") if 'supportedFormats' in content: features.append("Multiple image formats") if 'preProcessImage' in content: features.append("Image preprocessing") if 'analyzeText' in content: features.append("Text analysis") print(f"⚡ Features found: {', '.join(features)}") # Extract supported formats import re formats_match = re.search(r"supportedFormats.*?\[(.*?)\]", content, re.DOTALL) if formats_match: formats_str = formats_match.group(1) formats = re.findall(r"'([^']+)'", formats_str) print(f"📁 Supported formats: {', '.join(formats)}") return True except Exception as e: print(f"❌ Error reading JavaScript agent: {e}") return False else: print(f"❌ JavaScript image agent not found: {js_agent_path}") return False def test_image_upload_capability(): """Test image upload capability through MCP server.""" print("\n🌐 TESTING IMAGE UPLOAD CAPABILITY") print("=" * 60) try: # Test document analysis endpoint response = requests.post( "http://localhost:8000/api/mcp/analyze", json={ "documents": [ { "filename": "test_image.png", "content": "This is a test image content", "type": "image" } ], "query": "Extract text from this image", "rag_mode": True }, timeout=10 ) if response.status_code == 200: result = response.json() print(f"✅ Server response: {result.get('status', 'unknown')}") print(f"💬 Message: {result.get('message', 'No message')}") if result.get('status') == 'success': print("🎉 Image analysis endpoint working!") return True else: print("⚠️ Image analysis endpoint responded but may need configuration") return False else: print(f"❌ Server error: {response.status_code}") return False except Exception as e: print(f"❌ Error testing image upload: {e}") return False def show_image_processing_summary(): """Show summary of image processing capabilities.""" print("\n📊 IMAGE-TO-TEXT CAPABILITIES SUMMARY") print("=" * 60) capabilities = { "🖼️ Image Formats Supported": [ "PNG - Portable Network Graphics", "JPG/JPEG - Joint Photographic Experts Group", "BMP - Bitmap Image File", "TIFF - Tagged Image File Format", "GIF - Graphics Interchange Format" ], "🔧 OCR Technologies": [ "Tesseract OCR - Open source OCR engine", "OpenCV - Computer vision preprocessing", "PIL/Pillow - Image processing library", "Custom preprocessing algorithms" ], "⚡ Processing Features": [ "Text extraction from images", "Image preprocessing for better accuracy", "Multiple OCR methods for fallback", "Confidence scoring", "Text analysis and summarization", "Language detection" ], "🤖 Available Agents": [ "Document Processor Agent (Python) - Active", "Image OCR Agent (JavaScript) - Available", "OCR Module (Python) - Direct access" ], "🌐 Integration Methods": [ "MCP Server API endpoints", "Direct agent calls", "Document analysis workflow", "File upload processing" ] } for category, items in capabilities.items(): print(f"\n{category}:") for item in items: print(f" • {item}") def show_usage_examples(): """Show usage examples for image-to-text.""" print("\n💡 USAGE EXAMPLES") print("=" * 60) examples = [ { "method": "MCP Server Command", "example": "python mcp_client.py -c 'Extract text from image.png'", "description": "Process image through MCP server" }, { "method": "Document Analysis API", "example": "POST /api/mcp/analyze with image document", "description": "Upload image via web interface" }, { "method": "Direct OCR Module", "example": "from image_ocr import extract_text_from_image", "description": "Direct Python module usage" }, { "method": "Natural Language", "example": "'Read the text from this screenshot'", "description": "Natural language image processing" } ] for i, example in enumerate(examples, 1): print(f"\n{i}. {example['method']}:") print(f" 📝 Example: {example['example']}") print(f" 💡 Description: {example['description']}") def main(): """Main test function.""" print("🖼️ IMAGE-TO-TEXT FUNCTIONALITY TEST") print("=" * 80) print("🎯 Testing all available image processing capabilities") print("=" * 80) test_results = {} # Test 1: Document Processor Image Capability test_results["document_processor"] = test_document_processor_image_capability() # Test 2: OCR Module test_results["ocr_module"] = test_ocr_module() # Test 3: JavaScript Image Agent test_results["javascript_agent"] = test_javascript_image_agent() # Test 4: Image Upload Capability test_results["upload_capability"] = test_image_upload_capability() # Summary print("\n" + "=" * 80) print("📊 IMAGE-TO-TEXT TEST RESULTS") print("=" * 80) passed_tests = sum(test_results.values()) total_tests = len(test_results) for test_name, result in test_results.items(): status = "✅ AVAILABLE" if result else "❌ NEEDS SETUP" print(f"{status} {test_name.replace('_', ' ').title()}") print(f"\n📈 Availability Score: {passed_tests}/{total_tests} ({(passed_tests/total_tests)*100:.1f}%)") if passed_tests >= 2: print("\n🎉 IMAGE-TO-TEXT FUNCTIONALITY AVAILABLE!") print("🖼️ You have working image processing capabilities") # Show capabilities summary show_image_processing_summary() # Show usage examples show_usage_examples() print("\n🚀 READY TO USE:") print(" 📸 Upload images for text extraction") print(" 🔍 Process screenshots and documents") print(" 📝 Extract text from photos") print(" 🤖 Automated image analysis") else: print("\n🔧 IMAGE-TO-TEXT NEEDS SETUP") print("💡 Install Tesseract OCR for full functionality") print("🔗 https://github.com/tesseract-ocr/tesseract") return passed_tests >= 2 if __name__ == "__main__": try: success = main() if success: print("\n🎉 Image-to-text functionality confirmed!") else: print("\n🔧 Image-to-text needs setup. Install Tesseract OCR.") except Exception as e: print(f"\n❌ Test failed: {e}") print("💡 Make sure all dependencies are available")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Nisarg-123-web/MCP2'

If you have feedback or need assistance with the MCP directory API, please join our Discord server