Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
VLService.ts5.88 kB
/** * @file src/indexing/VLService.ts * @description Vision-Language model service for generating image descriptions * * Supports: * - llama.cpp (qwen2.5-vl via OpenAI-compatible API) * - Ollama (future support) */ import { createSecureFetchOptions } from '../utils/fetch-helper.js'; export interface VLConfig { provider: string; api: string; apiPath: string; apiKey: string; model: string; contextSize: number; maxTokens: number; temperature: number; } export interface VLDescriptionResult { description: string; model: string; tokensUsed: number; processingTimeMs: number; } export class VLService { private config: VLConfig; private enabled: boolean = false; constructor(config: VLConfig) { this.config = config; this.enabled = true; } /** * Generate a text description of an image using vision-language model * * Sends image to VL model (e.g., qwen2.5-vl) to generate natural language * description. Used for making images searchable via text embeddings. * * @param imageDataURL - Image as data URL (data:image/jpeg;base64,...) * @param prompt - Instruction prompt for VL model * @returns Description result with text, model info, and timing * @throws {Error} If VL service is disabled or API call fails * * @example * const vlService = new VLService({ * provider: 'llama.cpp', * api: 'http://localhost:8080', * apiPath: '/v1/chat/completions', * apiKey: 'none', * model: 'qwen2.5-vl', * contextSize: 4096, * maxTokens: 500, * temperature: 0.7 * }); * * const result = await vlService.describeImage(dataURL); * console.log('Description:', result.description); * console.log('Processing time:', result.processingTimeMs, 'ms'); * * @example * // Custom prompt for specific analysis * const result = await vlService.describeImage( * imageDataURL, * 'Describe the architecture diagram. What components are shown?' * ); * console.log('Analysis:', result.description); * * @example * // Use description for embedding * const vlResult = await vlService.describeImage(imageDataURL); * const embedding = await embeddingsService.generateEmbedding(vlResult.description); * await storeImageEmbedding(imagePath, embedding); */ async describeImage( imageDataURL: string, prompt: string = "Describe this image in detail. What do you see?" ): Promise<VLDescriptionResult> { if (!this.enabled) { throw new Error('VL Service is not enabled'); } const startTime = Date.now(); try { const description = await this.callVLAPI(imageDataURL, prompt); const processingTimeMs = Date.now() - startTime; return { description, model: this.config.model, tokensUsed: 0, // Will be populated from API response if available processingTimeMs }; } catch (error) { console.error('❌ VL Service error:', error); throw new Error(`Failed to generate image description: ${error}`); } } /** * Call VL API (OpenAI-compatible format) */ private async callVLAPI(imageDataURL: string, prompt: string): Promise<string> { const url = `${this.config.api}${this.config.apiPath}`; const requestBody = { model: this.config.model, messages: [ { role: 'user', content: [ { type: 'text', text: prompt }, { type: 'image_url', image_url: { url: imageDataURL } } ] } ], max_tokens: this.config.maxTokens, temperature: this.config.temperature }; // VL image processing can take 30-60 seconds, use longer timeout const timeoutMs = parseInt(process.env.MIMIR_EMBEDDINGS_VL_TIMEOUT || '120000', 10); // 2 minutes default const fetchOptions = createSecureFetchOptions(url, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.config.apiKey}` }, body: JSON.stringify(requestBody), signal: AbortSignal.timeout(timeoutMs) }); const response = await fetch(url, fetchOptions); if (!response.ok) { const errorText = await response.text(); throw new Error(`VL API error (${response.status}): ${errorText}`); } const data = await response.json(); // Extract description from OpenAI-compatible response if (data.choices && data.choices[0] && data.choices[0].message) { return data.choices[0].message.content; } throw new Error('Invalid response format from VL API'); } /** * Test VL service connectivity and availability * * Sends a minimal test image to verify the VL API is accessible * and responding correctly. Use during initialization. * * @returns true if connection successful, false otherwise * * @example * const vlService = new VLService(config); * const isAvailable = await vlService.testConnection(); * if (isAvailable) { * console.log('VL service ready'); * } else { * console.warn('VL service unavailable'); * } * * @example * // Check before enabling image indexing * if (await vlService.testConnection()) { * await indexImagesWithDescriptions(); * } else { * console.log('Skipping image indexing - VL service offline'); * } */ async testConnection(): Promise<boolean> { try { // Create a tiny 1x1 test image const testImageBase64 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=='; const testDataURL = `data:image/png;base64,${testImageBase64}`; await this.describeImage(testDataURL, 'What color is this?'); return true; } catch (error) { console.error('VL Service connection test failed:', error); return false; } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server