analyze_image
Analyze images from base64, files, or URLs using vision models to extract information, answer questions, or process visual content.
Instructions
Analyze images using OpenRouter's vision models. Supports various input formats including base64, file paths, and URLs.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| type | Yes | The type of image input | |
| data | Yes | The image data (base64 string, file path, or URL) | |
| mimeType | No | MIME type of the image (required for base64 input) | |
| prompt | No | Custom prompt for image analysis (optional) | |
| format | No | Output format (default: text) | |
| maxTokens | No | Maximum tokens in response (default: 4000) | |
| temperature | No | Sampling temperature (default: 0.1) |
Implementation Reference
- src/tools/analyze-image.ts:7-112 (handler)The main execution function for the 'analyze_image' tool. It processes image inputs from various sources, validates them, applies timeouts, and delegates to OpenRouterClient for AI analysis.export async function handleAnalyzeImage( args: any, config: Config, openRouterClient: OpenRouterClient, logger: Logger ) { const imageProcessor = ImageProcessor.getInstance(); try { const imageInput: ImageInput = { type: args.type as 'base64' | 'file' | 'url', data: args.data as string, mimeType: args.mimeType as string, }; const options: ImageAnalysisOptions = { prompt: args.prompt as string, format: args.format as 'text' | 'json', maxTokens: args.maxTokens as number, temperature: args.temperature as number, }; logger.info(`Starting image analysis for type: ${imageInput.type}`); // Add timeout for image processing (30 seconds) const processTimeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Image processing timed out after 30 seconds')), 30000); }); // Process the image with timeout const processedImage = await Promise.race([ imageProcessor.processImage(imageInput), processTimeoutPromise ]) as { data: string; mimeType: string; size: number }; // Validate image type if (!imageProcessor.isValidImageType(processedImage.mimeType)) { throw new Error(`Unsupported image type: ${processedImage.mimeType}`); } // Check file size const serverConfig = config.getServerConfig(); const maxImageSize = serverConfig.maxImageSize || 10485760; if (processedImage.size > maxImageSize) { throw new Error(`Image size ${processedImage.size} exceeds maximum allowed size ${maxImageSize}`); } // Add timeout for the API call (120 seconds) const apiTimeoutPromise = new Promise<never>((_, reject) => { setTimeout(() => reject(new Error('Image analysis timed out after 2 minutes')), 120000); }); // Analyze the image with timeout const analysisPromise = openRouterClient.analyzeImage( processedImage.data, processedImage.mimeType, options.prompt || 'Analyze this image in detail. Describe what you see, including objects, people, text, and any notable features.', options ); const result = await Promise.race([analysisPromise, apiTimeoutPromise]); if (!result.success) { throw new Error(result.error || 'Failed to analyze image'); } logger.info(`Image analysis completed successfully`, { model: result.model, usage: result.usage, }); return { content: [ { type: 'text', text: result.analysis || 'No analysis available', }, ], }; } catch (error) { logger.error('Image analysis failed', error); // Check if it's a timeout error if (error instanceof Error && error.message.includes('timed out')) { return { content: [ { type: 'text', text: `Error: ${error.message}. The image may be too large or the server is experiencing delays.`, }, ], isError: true, }; } return { content: [ { type: 'text', text: `Error: ${(error as Error).message}`, }, ], isError: true, }; } }
- src/index.ts:48-89 (registration)Registration of the 'analyze_image' tool in the MCP server's listTools handler, including name, description, and input schema definition.{ name: 'analyze_image', description: 'Analyze images using OpenRouter\'s vision models. Supports various input formats including base64, file paths, and URLs.', inputSchema: { type: 'object', properties: { type: { type: 'string', enum: ['base64', 'file', 'url'], description: 'The type of image input', }, data: { type: 'string', description: 'The image data (base64 string, file path, or URL)', }, mimeType: { type: 'string', description: 'MIME type of the image (required for base64 input)', }, prompt: { type: 'string', description: 'Custom prompt for image analysis (optional)', }, format: { type: 'string', enum: ['text', 'json'], description: 'Output format (default: text)', }, maxTokens: { type: 'number', description: 'Maximum tokens in response (default: 4000)', }, temperature: { type: 'number', minimum: 0, maximum: 2, description: 'Sampling temperature (default: 0.1)', }, }, required: ['type', 'data'], }, },
- src/index.ts:51-88 (schema)Input schema definition for the 'analyze_image' tool used in MCP protocol for validation.inputSchema: { type: 'object', properties: { type: { type: 'string', enum: ['base64', 'file', 'url'], description: 'The type of image input', }, data: { type: 'string', description: 'The image data (base64 string, file path, or URL)', }, mimeType: { type: 'string', description: 'MIME type of the image (required for base64 input)', }, prompt: { type: 'string', description: 'Custom prompt for image analysis (optional)', }, format: { type: 'string', enum: ['text', 'json'], description: 'Output format (default: text)', }, maxTokens: { type: 'number', description: 'Maximum tokens in response (default: 4000)', }, temperature: { type: 'number', minimum: 0, maximum: 2, description: 'Sampling temperature (default: 0.1)', }, }, required: ['type', 'data'], },
- Helper method in OpenRouterClient that performs the actual API call to analyze the image using vision models.public async analyzeImage( imageData: string, mimeType: string, prompt: string, options: { format?: 'text' | 'json'; maxTokens?: number; temperature?: number; } = {} ): Promise<ImageAnalysisResult> { try { this.logger.debug(`Analyzing image with model: ${this.config.model}`); // Validate inputs if (!imageData || imageData.length === 0) { throw new Error('No image data provided'); } if (!mimeType) { throw new Error('No MIME type provided'); } // Check image data size (base64 encoded) if (imageData.length > 20 * 1024 * 1024) { // 20MB base64 limit throw new Error(`Image data too large: ${imageData.length} characters. Maximum allowed is 20MB.`); } // Validate prompt length const promptText = prompt || 'Analyze this image in detail. Describe what you see, including objects, people, text, and any notable features.'; if (promptText.length > 10000) { throw new Error(`Prompt too long: ${promptText.length} characters. Maximum allowed is 10000.`); } const requestBody = { model: this.config.model, messages: [ { role: 'user', content: [ { type: 'text', text: promptText, }, { type: 'image_url', image_url: { url: `data:${mimeType};base64,${imageData}`, }, }, ], }, ], max_tokens: Math.min(options.maxTokens || 4000, 8000), // Cap at 8000 tokens temperature: options.temperature || 0.1, response_format: options.format === 'json' ? { type: 'json_object' } : undefined, }; this.logger.debug(`Sending request to OpenRouter API`, { model: this.config.model, imageSize: imageData.length, promptLength: promptText.length, maxTokens: requestBody.max_tokens, }); const response = await this.client.post('/chat/completions', requestBody); const choice = response.data.choices?.[0]; if (!choice) { throw new Error('No response from model'); } const content = choice.message?.content; if (!content) { throw new Error('Empty response from model'); } let analysis: string; let structuredData: any; if (options.format === 'json') { try { structuredData = JSON.parse(content); analysis = JSON.stringify(structuredData, null, 2); } catch { // If JSON parsing fails, treat as text analysis = content; structuredData = { analysis: content }; } } else { analysis = content; structuredData = { analysis }; } const usage = response.data.usage; this.logger.info(`Image analysis completed successfully`, { model: this.config.model, usage, }); return { success: true, analysis, structuredData, model: this.config.model, usage: usage ? { promptTokens: usage.prompt_tokens, completionTokens: usage.completion_tokens, totalTokens: usage.total_tokens, } : undefined, }; } catch (error) { this.logger.error('Failed to analyze image', error); const errorMessage = this.extractErrorMessage(error); return { success: false, error: errorMessage, }; } }
- src/types/index.ts:1-12 (schema)TypeScript type definitions for image input and analysis options used throughout the tool implementation.export interface ImageAnalysisOptions { format?: 'text' | 'json'; prompt?: string; maxTokens?: number; temperature?: number; } export interface ImageInput { type: 'base64' | 'file' | 'url'; data: string; mimeType?: string; }