AI Vision MCP Server

server.ts•16.7 KiB

/** * Main MCP Server implementation */ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { z } from 'zod'; import { ConfigService } from './services/ConfigService.js'; import { FileService } from './services/FileService.js'; import { VisionProviderFactory } from './providers/factory/ProviderFactory.js'; import { analyze_image, compare_images, analyze_video, detect_objects_in_image, } from './tools/index.js'; import { VisionError } from './types/Errors.js'; // Create MCP server const server = new McpServer({ name: 'ai-vision-mcp', version: '0.0.5', }); // Helper function to initialize services (lazy loading) function getServices() { try { // Initialize configuration const configService = ConfigService.getInstance(); const config = configService.getConfig(); // Create providers using factory const imageProvider = VisionProviderFactory.createProviderWithValidation( config, 'image' ); const videoProvider = VisionProviderFactory.createProviderWithValidation( config, 'video' ); // Create file services for handling file uploads const imageFileService = new FileService( configService, 'image', imageProvider as any ); const videoFileService = new FileService( configService, 'video', videoProvider as any ); return { config, configService, imageProvider, videoProvider, imageFileService, videoFileService, }; } catch (error) { console.error('Failed to initialize services:', error); throw error; } } // Register analyze_image tool server.registerTool( 'analyze_image', { title: 'Analyze Image', description: 'Analyze an image using AI vision models. Supports URLs, base64 data, and local file paths.', inputSchema: { imageSource: z .string() .describe( 'Image source - can be a URL, base64 data (data:image/...), or local file path' ), prompt: z .string() .describe( 'The prompt describing how you want to compare the images. If the task is **front-end or UI comparison**, the prompt you provide must be: "Compare the given screenshots and describe differences in layout structure, component arrangement, color scheme, typography, and visual hierarchy. Pay attention to common sections such as the navbar, header, footer, and main content areas to identify style or layout inconsistencies." + your additional requirements. \ For **other tasks**, the prompt you provide must clearly describe what to compare, identify, or analyze between the images.' ), options: z .object({ temperature: z .number() .min(0) .max(2) .optional() .describe( 'Controls randomness in the response (0.0 = deterministic, 2.0 = very random)' ), topP: z .number() .min(0) .max(1) .optional() .describe('Nucleus sampling parameter (0.0-1.0)'), topK: z .number() .int() .min(1) .max(100) .optional() .describe('Top-k sampling parameter (1-100)'), maxTokens: z .number() .int() .min(1) .max(8192) .optional() .describe( 'Maximum number of tokens to generate in the response. For detailed image analysis, 1000-2000 tokens typically sufficient.' ), }) .optional(), }, }, async ({ imageSource, prompt, options }) => { try { const validatedArgs = { imageSource, prompt, options, }; // Initialize services on-demand const { config, imageProvider, imageFileService } = getServices(); const result = await analyze_image( validatedArgs, config, imageProvider, imageFileService ); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2), }, ], }; } catch (error) { console.error('Error executing analyze_image tool:', error); let errorMessage = 'An unknown error occurred'; if (error instanceof VisionError) { errorMessage = `${error.name}: ${error.message}`; if (error.provider) { errorMessage += ` (Provider: ${error.provider})`; } } else if (error instanceof Error) { errorMessage = error.message; } return { content: [ { type: 'text', text: JSON.stringify( { error: true, message: errorMessage, tool: 'analyze_image', }, null, 2 ), }, ], isError: true, }; } } ); // Register compare_images tool server.registerTool( 'compare_images', { title: 'Compare Images', description: 'Compare multiple images using AI vision models. Supports URLs, base64 data, and local file paths.', inputSchema: { imageSources: z .array(z.string()) .min(2) .describe( 'Array of image sources (URLs, base64 data, or file paths) - minimum 2 images. Maximum determined by MAX_IMAGES_FOR_COMPARISON environment variable (default: 4)' ), prompt: z .string() .describe('The prompt describing how you want to compare the images. If the task is **front-end or UI consistency**, the prompt you provide must specify what to evaluate — such as layout alignment, component structure, spacing, typography, color consistency, and visual hierarchy. Pay special attention to shared sections like the **navbar**, **header**, **footer**, and **main content areas** to identify layout shifts or inconsistent styles between versions. \ For **other tasks**, the prompt you provide must clearly describe what aspects to compare or analyze — such as visual differences, content changes, design variations, or quality degradation.'), options: z .object({ temperature: z .number() .min(0) .max(2) .optional() .describe( 'Controls randomness in the response (0.0 = deterministic, 2.0 = very random)' ), topP: z .number() .min(0) .max(1) .optional() .describe('Nucleus sampling parameter (0.0-1.0)'), topK: z .number() .int() .min(1) .max(100) .optional() .describe('Top-k sampling parameter (1-100)'), maxTokens: z .number() .int() .min(1) .max(8192) .optional() .describe( 'Maximum number of tokens to generate in the response. For comparing multiple images, recommend 1500-3000 tokens for comprehensive analysis.' ), }) .optional(), }, }, async ({ imageSources, prompt, options }) => { try { // Initialize services on-demand to get config const { config, imageProvider, imageFileService } = getServices(); // Dynamic validation using config const maxImages = config.MAX_IMAGES_FOR_COMPARISON || 4; if (imageSources.length > maxImages) { return { content: [ { type: 'text', text: JSON.stringify( { error: true, message: `Maximum ${maxImages} images allowed for comparison, received ${imageSources.length}. Configure MAX_IMAGES_FOR_COMPARISON environment variable to change this limit.`, tool: 'compare_images', }, null, 2 ), }, ], isError: true, }; } const validatedArgs = { imageSources, prompt, options, }; const result = await compare_images( validatedArgs, config, imageProvider, imageFileService ); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2), }, ], }; } catch (error) { console.error('Error executing compare_images tool:', error); let errorMessage = 'An unknown error occurred'; if (error instanceof VisionError) { errorMessage = `${error.name}: ${error.message}`; if (error.provider) { errorMessage += ` (Provider: ${error.provider})`; } } else if (error instanceof Error) { errorMessage = error.message; } return { content: [ { type: 'text', text: JSON.stringify( { error: true, message: errorMessage, tool: 'compare_images', }, null, 2 ), }, ], isError: true, }; } } ); // Register detect_objects_in_image tool server.registerTool( 'detect_objects_in_image', { title: 'Detect Objects in Image', description: 'Detect objects in an image using AI vision models and generate annotated images with bounding boxes. Supports URLs, base64 data, and local file paths. File handling: explicit filePath → exact path, otherwise → temp directory. Uses optimized default parameters for object detection.', inputSchema: { imageSource: z .string() .describe( 'Image source - can be a URL, base64 data (data:image/...), or local file path' ), prompt: z .string() .describe( 'Text prompt describing what to detect or recognize in the image. Avoid including any instructions about output structure or formatting — these are automatically managed by the workflow.' ), outputFilePath: z .string() .optional() .describe( "Optional explicit output path for the annotated image. If provided, the image is saved to this exact path. Relative paths are resolved against the MCP server's current working directory." ), }, }, async ({ imageSource, prompt, outputFilePath }) => { try { const validatedArgs = { imageSource, prompt, outputFilePath, // Remove options parameter - use environment variable configuration instead }; // Initialize services on-demand const { config, imageProvider, imageFileService } = getServices(); const result = await detect_objects_in_image( validatedArgs, config, imageProvider, imageFileService ); // Handle different response types if ('file' in result) { // Case 1: Explicit file path provided return { content: [ { type: 'text' as const, text: JSON.stringify( { detections: result.detections, file: result.file, image_metadata: result.image_metadata, summary: result.summary, metadata: result.metadata, }, null, 2 ), }, ], }; } else if ('tempFile' in result) { // Case 2: Auto-saved to temp directory return { content: [ { type: 'text' as const, text: JSON.stringify( { detections: result.detections, tempFile: result.tempFile, image_metadata: result.image_metadata, summary: result.summary, metadata: result.metadata, }, null, 2 ), }, ], }; } else { // Case 3: File saving skipped due to permission error return { content: [ { type: 'text' as const, text: JSON.stringify( { detections: result.detections, image_metadata: result.image_metadata, summary: result.summary, metadata: result.metadata, }, null, 2 ), }, ], }; } } catch (error) { console.error('Error executing detect_objects_in_image tool:', error); let errorMessage = 'An unknown error occurred'; if (error instanceof VisionError) { errorMessage = `${error.name}: ${error.message}`; if (error.provider) { errorMessage += ` (Provider: ${error.provider})`; } } else if (error instanceof Error) { errorMessage = error.message; } return { content: [ { type: 'text' as const, text: JSON.stringify( { error: true, message: errorMessage, tool: 'detect_objects_in_image', }, null, 2 ), }, ], isError: true, }; } } ); // Register analyze_video tool server.registerTool( 'analyze_video', { title: 'Analyze Video', description: 'Analyze a video using AI vision models. Supports URLs and local file paths.', inputSchema: { videoSource: z .string() .describe('Video source - can be a URL or local file path'), prompt: z .string() .describe( 'The prompt describing what you want to know about the video.' ), options: z .object({ temperature: z .number() .min(0) .max(2) .optional() .describe( 'Controls randomness in the response (0.0 = deterministic, 2.0 = very random)' ), topP: z .number() .min(0) .max(1) .optional() .describe('Nucleus sampling parameter (0.0-1.0)'), topK: z .number() .int() .min(1) .max(100) .optional() .describe('Top-k sampling parameter (1-100)'), maxTokens: z .number() .int() .min(1) .max(8192) .optional() .describe( 'Maximum number of tokens to generate in the response. For video analysis, recommend 2000-4000 tokens for comprehensive temporal understanding.' ), }) .optional(), }, }, async ({ videoSource, prompt, options }) => { try { const validatedArgs = { videoSource, prompt, options, }; // Initialize services on-demand const { config, videoProvider, videoFileService } = getServices(); const result = await analyze_video( validatedArgs, config, videoProvider, videoFileService ); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2), }, ], }; } catch (error) { console.error('Error executing analyze_video tool:', error); let errorMessage = 'An unknown error occurred'; if (error instanceof VisionError) { errorMessage = `${error.name}: ${error.message}`; if (error.provider) { errorMessage += ` (Provider: ${error.provider})`; } } else if (error instanceof Error) { errorMessage = error.message; } return { content: [ { type: 'text', text: JSON.stringify( { error: true, message: errorMessage, tool: 'analyze_video', }, null, 2 ), }, ], isError: true, }; } } ); // Graceful shutdown process.on('SIGINT', async () => { console.log('Shutting down MCP server...'); await server.close(); process.exit(0); }); process.on('SIGTERM', async () => { console.log('Shutting down MCP server...'); await server.close(); process.exit(0); }); // Start server async function main() { try { const transport = new StdioServerTransport(); await server.connect(transport); console.error('AI Vision MCP Server started successfully'); } catch (error) { console.error('Failed to start MCP server:', error); process.exit(1); } } main();

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/honeyvig/ai-vision-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.ts•16.7 KiB