analyze_image
Analyze images using AI vision models to extract information from screenshots, photos, diagrams, and web content. Supports base64, file paths, and URL inputs with customizable prompts and output formats.
Instructions
Analyze images using OpenRouter's vision models. Supports various input formats including base64, file paths, and URLs.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| data | Yes | The image data (base64 string, file path, or URL) | |
| format | No | Output format (default: text) | |
| maxTokens | No | Maximum tokens in response (default: 4000) | |
| mimeType | No | MIME type of the image (required for base64 input) | |
| prompt | No | Custom prompt for image analysis (optional) | |
| temperature | No | Sampling temperature (default: 0.1) | |
| type | Yes | The type of image input |
Implementation Reference
- src/tools/analyze-image.ts:7-112 (handler)Main handler function for the 'analyze_image' tool. Processes input arguments, handles image processing with timeouts, validation, and delegates to OpenRouterClient for analysis.export async function handleAnalyzeImage( args: any, config: Config, openRouterClient: OpenRouterClient, logger: Logger ) { const imageProcessor = ImageProcessor.getInstance(); try { const imageInput: ImageInput = { type: args.type as 'base64' | 'file' | 'url', data: args.data as string, mimeType: args.mimeType as string, }; const options: ImageAnalysisOptions = { prompt: args.prompt as string, format: args.format as 'text' | 'json', maxTokens: args.maxTokens as number, temperature: args.temperature as number, }; logger.info(`Starting image analysis for type: ${imageInput.type}`); // Add timeout for image processing (30 seconds) const processTimeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Image processing timed out after 30 seconds')), 30000); }); // Process the image with timeout const processedImage = await Promise.race([ imageProcessor.processImage(imageInput), processTimeoutPromise ]) as { data: string; mimeType: string; size: number }; // Validate image type if (!imageProcessor.isValidImageType(processedImage.mimeType)) { throw new Error(`Unsupported image type: ${processedImage.mimeType}`); } // Check file size const serverConfig = config.getServerConfig(); const maxImageSize = serverConfig.maxImageSize || 10485760; if (processedImage.size > maxImageSize) { throw new Error(`Image size ${processedImage.size} exceeds maximum allowed size ${maxImageSize}`); } // Add timeout for the API call (120 seconds) const apiTimeoutPromise = new Promise<never>((_, reject) => { setTimeout(() => reject(new Error('Image analysis timed out after 2 minutes')), 120000); }); // Analyze the image with timeout const analysisPromise = openRouterClient.analyzeImage( processedImage.data, processedImage.mimeType, options.prompt || 'Analyze this image in detail. Describe what you see, including objects, people, text, and any notable features.', options ); const result = await Promise.race([analysisPromise, apiTimeoutPromise]); if (!result.success) { throw new Error(result.error || 'Failed to analyze image'); } logger.info(`Image analysis completed successfully`, { model: result.model, usage: result.usage, }); return { content: [ { type: 'text', text: result.analysis || 'No analysis available', }, ], }; } catch (error) { logger.error('Image analysis failed', error); // Check if it's a timeout error if (error instanceof Error && error.message.includes('timed out')) { return { content: [ { type: 'text', text: `Error: ${error.message}. The image may be too large or the server is experiencing delays.`, }, ], isError: true, }; } return { content: [ { type: 'text', text: `Error: ${(error as Error).message}`, }, ], isError: true, }; } }
- src/index.ts:48-89 (schema)Tool schema definition for 'analyze_image' including input schema, properties, and requirements, provided in the ListTools response.{ name: 'analyze_image', description: 'Analyze images using OpenRouter\'s vision models. Supports various input formats including base64, file paths, and URLs.', inputSchema: { type: 'object', properties: { type: { type: 'string', enum: ['base64', 'file', 'url'], description: 'The type of image input', }, data: { type: 'string', description: 'The image data (base64 string, file path, or URL)', }, mimeType: { type: 'string', description: 'MIME type of the image (required for base64 input)', }, prompt: { type: 'string', description: 'Custom prompt for image analysis (optional)', }, format: { type: 'string', enum: ['text', 'json'], description: 'Output format (default: text)', }, maxTokens: { type: 'number', description: 'Maximum tokens in response (default: 4000)', }, temperature: { type: 'number', minimum: 0, maximum: 2, description: 'Sampling temperature (default: 0.1)', }, }, required: ['type', 'data'], }, },
- src/index.ts:192-193 (registration)Registration of the handler in the centralized CallToolRequestHandler switch statement.case 'analyze_image': return await handleAnalyzeImage(args, config, openRouterClient, logger);
- Core helper method in OpenRouterClient that performs the actual image analysis API request to OpenRouter's chat completions endpoint with vision support.public async analyzeImage( imageData: string, mimeType: string, prompt: string, options: { format?: 'text' | 'json'; maxTokens?: number; temperature?: number; } = {} ): Promise<ImageAnalysisResult> { try { this.logger.debug(`Analyzing image with model: ${this.config.model}`); // Validate inputs if (!imageData || imageData.length === 0) { throw new Error('No image data provided'); } if (!mimeType) { throw new Error('No MIME type provided'); } // Check image data size (base64 encoded) if (imageData.length > 20 * 1024 * 1024) { // 20MB base64 limit throw new Error(`Image data too large: ${imageData.length} characters. Maximum allowed is 20MB.`); } // Validate prompt length const promptText = prompt || 'Analyze this image in detail. Describe what you see, including objects, people, text, and any notable features.'; if (promptText.length > 10000) { throw new Error(`Prompt too long: ${promptText.length} characters. Maximum allowed is 10000.`); } const requestBody = { model: this.config.model, messages: [ { role: 'user', content: [ { type: 'text', text: promptText, }, { type: 'image_url', image_url: { url: `data:${mimeType};base64,${imageData}`, }, }, ], }, ], max_tokens: Math.min(options.maxTokens || 4000, 8000), // Cap at 8000 tokens temperature: options.temperature || 0.1, response_format: options.format === 'json' ? { type: 'json_object' } : undefined, }; this.logger.debug(`Sending request to OpenRouter API`, { model: this.config.model, imageSize: imageData.length, promptLength: promptText.length, maxTokens: requestBody.max_tokens, }); const response = await this.client.post('/chat/completions', requestBody); const choice = response.data.choices?.[0]; if (!choice) { throw new Error('No response from model'); } const content = choice.message?.content; if (!content) { throw new Error('Empty response from model'); } let analysis: string; let structuredData: any; if (options.format === 'json') { try { structuredData = JSON.parse(content); analysis = JSON.stringify(structuredData, null, 2); } catch { // If JSON parsing fails, treat as text analysis = content; structuredData = { analysis: content }; } } else { analysis = content; structuredData = { analysis }; } const usage = response.data.usage; this.logger.info(`Image analysis completed successfully`, { model: this.config.model, usage, }); return { success: true, analysis, structuredData, model: this.config.model, usage: usage ? { promptTokens: usage.prompt_tokens, completionTokens: usage.completion_tokens, totalTokens: usage.total_tokens, } : undefined, }; } catch (error) { this.logger.error('Failed to analyze image', error); const errorMessage = this.extractErrorMessage(error); return { success: false, error: errorMessage, }; } }