OpenRouter MCP Multimodal Server

mcp-ocr-fallback
src
tool-handlers

multi-image-analysis.ts•15.9 kB

import fetch from 'node-fetch'; // Remove the sharp import to avoid conflicts with our dynamic import // import sharp from 'sharp'; import { McpError, ErrorCode } from '@modelcontextprotocol/sdk/types.js'; import OpenAI from 'openai'; import path from 'path'; import { promises as fs } from 'fs'; import { tmpdir } from 'os'; // Remove uuid import as we'll use a simple random string generator instead // import { v4 as uuidv4 } from 'uuid'; // Setup sharp with fallback let sharp: any; try { sharp = require('sharp'); } catch (e) { console.error('Warning: sharp module not available, using fallback image processing'); // Mock implementation that just passes through the base64 data sharp = (buffer: Buffer) => ({ metadata: async () => ({ width: 800, height: 600 }), resize: () => ({ jpeg: () => ({ toBuffer: async () => buffer }) }), jpeg: () => ({ toBuffer: async () => buffer }) }); } // Default model for image analysis const DEFAULT_FREE_MODEL = 'qwen/qwen2.5-vl-32b-instruct:free'; // Image processing constants const MAX_DIMENSION = 800; const JPEG_QUALITY = 80; const MAX_RETRY_ATTEMPTS = 3; const RETRY_DELAY = 1000; // ms // Simple random ID generator to replace uuid function generateRandomId(): string { return Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15); } export interface MultiImageAnalysisToolRequest { images: Array<{ url: string; alt?: string; }>; prompt: string; markdown_response?: boolean; model?: string; } /** * Sleep function for retry mechanisms */ const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); /** * Normalizes a file path to be OS-neutral * Handles Windows backslashes, drive letters, etc. */ function normalizePath(filePath: string): string { // Skip normalization for URLs and data URLs if (filePath.startsWith('http://') || filePath.startsWith('https://') || filePath.startsWith('data:')) { return filePath; } // Handle Windows paths and convert them to a format that's usable // First normalize the path according to the OS let normalized = path.normalize(filePath); // Make sure any Windows backslashes are handled normalized = normalized.replace(/\\/g, '/'); return normalized; } /** * Get MIME type from file extension or data URL */ function getMimeType(url: string): string { if (url.startsWith('data:')) { const match = url.match(/^data:([^;]+);/); return match ? match[1] : 'application/octet-stream'; } const extension = path.extname(url.split('?')[0]).toLowerCase(); switch (extension) { case '.png': return 'image/png'; case '.jpg': case '.jpeg': return 'image/jpeg'; case '.webp': return 'image/webp'; case '.gif': return 'image/gif'; case '.bmp': return 'image/bmp'; case '.svg': return 'image/svg+xml'; default: return 'application/octet-stream'; } } /** * Fetch image from various sources: data URLs, file paths, or remote URLs */ async function fetchImageAsBuffer(url: string): Promise<Buffer> { try { // Handle data URLs if (url.startsWith('data:')) { const matches = url.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/); if (!matches || matches.length !== 3) { throw new Error('Invalid data URL format'); } return Buffer.from(matches[2], 'base64'); } // Normalize the path before proceeding const normalizedUrl = normalizePath(url); // Handle file URLs with file:// protocol if (normalizedUrl.startsWith('file://')) { const filePath = normalizedUrl.replace('file://', ''); try { return await fs.readFile(filePath); } catch (error) { console.error(`Error reading file at ${filePath}:`, error); throw new Error(`Failed to read file: ${filePath}`); } } // Handle absolute and relative file paths if (normalizedUrl.startsWith('/') || normalizedUrl.startsWith('./') || normalizedUrl.startsWith('../') || /^[A-Za-z]:\\/.test(normalizedUrl) || /^[A-Za-z]:\//.test(normalizedUrl)) { try { // Try with normalized path return await fs.readFile(normalizedUrl); } catch (error) { // Fallback to original path if normalized path doesn't work if (normalizedUrl !== url) { try { return await fs.readFile(url); } catch (secondError) { console.error(`Failed to read file with both normalized path (${normalizedUrl}) and original path (${url})`); throw new Error(`Failed to read file: ${url}`); } } console.error(`Error reading file at ${normalizedUrl}:`, error); throw new Error(`Failed to read file: ${normalizedUrl}`); } } // Handle http/https URLs if (normalizedUrl.startsWith('http://') || normalizedUrl.startsWith('https://')) { for (let attempt = 0; attempt < MAX_RETRY_ATTEMPTS; attempt++) { try { // Use AbortController for timeout instead of timeout option const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 15000); const response = await fetch(normalizedUrl, { signal: controller.signal, headers: { 'User-Agent': 'OpenRouter-MCP-Server/1.0' } }); // Clear the timeout to prevent memory leaks clearTimeout(timeoutId); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } return Buffer.from(await response.arrayBuffer()); } catch (error) { console.error(`Error fetching URL (attempt ${attempt + 1}/${MAX_RETRY_ATTEMPTS}): ${normalizedUrl}`, error); if (attempt < MAX_RETRY_ATTEMPTS - 1) { // Exponential backoff with jitter const delay = RETRY_DELAY * Math.pow(2, attempt) * (0.5 + Math.random() * 0.5); await sleep(delay); } else { throw error; } } } } // If we get here, the URL format is unsupported throw new Error(`Unsupported URL format: ${url}`); } catch (error) { console.error(`Error fetching image from ${url}:`, error); throw error; } // TypeScript requires a return statement here, but this is unreachable return Buffer.from([]); } /** * Fallback image processing when sharp isn't available */ function processImageFallback(buffer: Buffer, mimeType: string): Promise<string> { return Promise.resolve(buffer.toString('base64')); } /** * Process and optimize image for API consumption */ async function processImage(buffer: Buffer, mimeType: string): Promise<string> { try { if (typeof sharp !== 'function') { console.warn('Using fallback image processing (sharp not available)'); return processImageFallback(buffer, mimeType); } // Create a temporary directory for processing if needed const tempDir = path.join(tmpdir(), `openrouter-mcp-${generateRandomId()}`); await fs.mkdir(tempDir, { recursive: true }); // Get image info let sharpInstance = sharp(buffer); let metadata; try { metadata = await sharpInstance.metadata(); } catch (error) { console.warn('Error getting image metadata, using fallback:', error); return processImageFallback(buffer, mimeType); } // Skip processing for small images if (metadata.width && metadata.height && metadata.width <= MAX_DIMENSION && metadata.height <= MAX_DIMENSION && (mimeType === 'image/jpeg' || mimeType === 'image/webp')) { return buffer.toString('base64'); } // Resize larger images if (metadata.width && metadata.height) { const largerDimension = Math.max(metadata.width, metadata.height); if (largerDimension > MAX_DIMENSION) { const resizeOptions = metadata.width > metadata.height ? { width: MAX_DIMENSION } : { height: MAX_DIMENSION }; sharpInstance = sharpInstance.resize(resizeOptions); } } try { // Convert to JPEG for consistency and small size const processedBuffer = await sharpInstance .jpeg({ quality: JPEG_QUALITY }) .toBuffer(); return processedBuffer.toString('base64'); } catch (error) { console.warn('Error in final image processing, using fallback:', error); return processImageFallback(buffer, mimeType); } } catch (error) { console.error('Error processing image, using fallback:', error); return processImageFallback(buffer, mimeType); } } /** * Find a suitable free model with vision capabilities, defaulting to Qwen */ export async function findSuitableFreeModel(openai: OpenAI): Promise<string> { try { // First try with an exact match for our preferred model const preferredModel = DEFAULT_FREE_MODEL; try { // Check if our preferred model is available const modelInfo = await openai.models.retrieve(preferredModel); if (modelInfo && modelInfo.id) { console.error(`Using preferred model: ${preferredModel}`); return preferredModel; } } catch (error) { console.error(`Preferred model ${preferredModel} not available, searching for alternatives...`); } // Query available models const modelsResponse = await openai.models.list(); if (!modelsResponse?.data || modelsResponse.data.length === 0) { console.error('No models found, using default fallback model'); return DEFAULT_FREE_MODEL; } // First, try to find free vision models const freeVisionModels = modelsResponse.data .filter(model => { const modelId = model.id.toLowerCase(); return modelId.includes('free') && (modelId.includes('vl') || modelId.includes('vision') || modelId.includes('claude') || modelId.includes('gemini') || modelId.includes('gpt-4') || modelId.includes('qwen')); }) .map(model => { // Extract context length if available let contextLength = 0; try { const modelAny = model as any; if (typeof modelAny.context_length === 'number') { contextLength = modelAny.context_length; } else if (modelAny.context_window) { contextLength = parseInt(modelAny.context_window, 10); } } catch (e) { console.error(`Error parsing context length for model ${model.id}:`, e); } return { id: model.id, contextLength: contextLength || 0 }; }); if (freeVisionModels.length > 0) { // Sort by context length and pick the one with the largest context window freeVisionModels.sort((a, b) => b.contextLength - a.contextLength); const selectedModel = freeVisionModels[0].id; console.error(`Selected free vision model: ${selectedModel} with context length: ${freeVisionModels[0].contextLength}`); return selectedModel; } // If no free vision models found, fallback to our default console.error('No free vision models found, using default fallback model'); return DEFAULT_FREE_MODEL; } catch (error) { console.error('Error finding suitable model:', error); return DEFAULT_FREE_MODEL; } } /** * Main handler for multi-image analysis */ export async function handleMultiImageAnalysis( request: { params: { arguments: MultiImageAnalysisToolRequest } }, openai: OpenAI, defaultModel?: string ) { const args = request.params.arguments; try { // Validate inputs if (!args.images || !Array.isArray(args.images) || args.images.length === 0) { throw new McpError(ErrorCode.InvalidParams, 'At least one image is required'); } if (!args.prompt) { throw new McpError(ErrorCode.InvalidParams, 'A prompt for analyzing the images is required'); } console.error(`Processing ${args.images.length} images`); // Process each image and convert to base64 if needed const processedImages = await Promise.all( args.images.map(async (image, index) => { try { // Skip processing if already a data URL if (image.url.startsWith('data:')) { console.error(`Image ${index + 1} is already in base64 format`); return image; } console.error(`Processing image ${index + 1}: ${image.url.substring(0, 100)}${image.url.length > 100 ? '...' : ''}`); // Get MIME type const mimeType = getMimeType(image.url); // Fetch and process the image const buffer = await fetchImageAsBuffer(image.url); const base64 = await processImage(buffer, mimeType); return { url: `data:${mimeType === 'application/octet-stream' ? 'image/jpeg' : mimeType};base64,${base64}`, alt: image.alt }; } catch (error: any) { console.error(`Error processing image ${index + 1}:`, error); throw new Error(`Failed to process image ${index + 1}: ${image.url}. Error: ${error.message}`); } }) ); // Select model with priority: // 1. User-specified model // 2. Default model from environment // 3. Default free vision model let model = args.model || defaultModel || DEFAULT_FREE_MODEL; // If a model is specified but not our default free model, verify it exists if (model !== DEFAULT_FREE_MODEL) { try { await openai.models.retrieve(model); } catch (error) { console.error(`Specified model ${model} not found, falling back to auto-selection`); model = await findSuitableFreeModel(openai); } } console.error(`Making API call with model: ${model}`); // Build content array for the API call const content: Array<{ type: string; text?: string; image_url?: { url: string } }> = [ { type: 'text', text: args.prompt } ]; // Add each processed image to the content array processedImages.forEach(image => { content.push({ type: 'image_url', image_url: { url: image.url } }); }); // Make the API call const completion = await openai.chat.completions.create({ model, messages: [{ role: 'user', content }] as any }); // Get response text and format if requested let responseText = completion.choices[0].message.content || ''; // Format as markdown if requested if (args.markdown_response) { // Simple formatting enhancements responseText = responseText // Add horizontal rule after sections .replace(/^(#{1,3}.*)/gm, '$1\n\n---') // Ensure proper spacing for lists .replace(/^(\s*[-*•]\s.+)$/gm, '\n$1') // Convert plain URLs to markdown links .replace(/(https?:\/\/[^\s]+)/g, '[$1]($1)'); } // Return the analysis result return { content: [ { type: 'text', text: responseText, }, ], metadata: { model: completion.model, usage: completion.usage } }; } catch (error: any) { console.error('Error in multi-image analysis:', error); if (error instanceof McpError) { throw error; } return { content: [ { type: 'text', text: `Error analyzing images: ${error.message}`, }, ], isError: true, metadata: { error_type: error.constructor.name, error_message: error.message } }; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hoangdn3/mcp-ocr-fallback'

If you have feedback or need assistance with the MCP directory API, please join our Discord server