folder-mcp

Overview Schema Related Servers Score Discussions

ollama-embedding-service.ts•10.3 KiB

/** * Real Ollama Embedding Service * * Provides actual API integration with Ollama for generating embeddings * using real HTTP calls to localhost:11434/api/embeddings */ import type { TextChunk } from '../../types/index.js'; import type { EmbeddingOperations, EmbeddingVector, EmbeddingResult, BatchEmbeddingOperations } from '../../domain/embeddings/index.js'; /** * Ollama API configuration */ interface OllamaConfig { baseUrl: string; timeout: number; retries: number; model: string; } /** * Ollama embedding API request */ interface OllamaEmbeddingRequest { model: string; prompt: string; } /** * Ollama embedding API response */ interface OllamaEmbeddingResponse { embedding: number[]; } /** * Real Ollama Embedding Service that makes actual API calls */ export class OllamaEmbeddingService implements EmbeddingOperations, BatchEmbeddingOperations { private readonly config: OllamaConfig; private initialized = false; constructor(config?: Partial<OllamaConfig>) { // Normalize OLLAMA_HOST if provided const defaultBaseUrl = process.env.OLLAMA_HOST || 'http://localhost:11434'; const normalizedBaseUrl = defaultBaseUrl.trim().replace(/\/+$/, '').replace(/\/api$/, ''); this.config = { baseUrl: normalizedBaseUrl, timeout: 30000, // 30 seconds for embedding generation retries: 3, model: 'mxbai-embed-large', // Default to a popular embedding model ...config }; // Also normalize config.baseUrl if provided if (this.config.baseUrl) { this.config.baseUrl = this.config.baseUrl.trim().replace(/\/+$/, '').replace(/\/api$/, ''); } } /** * Initialize the service and verify Ollama connectivity */ async initialize(): Promise<void> { if (this.initialized) { return; } // Check if Ollama is accessible try { const response = await fetch(`${this.config.baseUrl}/api/tags`, { method: 'GET', signal: AbortSignal.timeout(this.config.timeout) }); if (!response.ok) { throw new Error(`Ollama API not accessible: HTTP ${response.status}`); } // Verify the embedding model is available const data = await response.json() as { models?: { name: string }[] }; const availableModels = data.models?.map((m) => m.name) || []; if (!availableModels.includes(this.config.model)) { console.warn(`Warning: Embedding model '${this.config.model}' not found in Ollama. Available models:`, availableModels); // Continue anyway - the model might be pulled automatically } this.initialized = true; } catch (error) { throw new Error(`Failed to initialize Ollama embedding service: ${error instanceof Error ? error.message : 'Unknown error'}`); } } /** * Generate a single embedding using real Ollama API */ async generateSingleEmbedding(text: string): Promise<EmbeddingVector> { if (!this.initialized) { await this.initialize(); } // Handle empty text if (!text || text.trim().length === 0) { throw new Error('Cannot generate embedding for empty text'); } const startTime = Date.now(); try { const response = await this.callOllamaEmbeddingAPI(text); const processingTime = Date.now() - startTime; return { vector: response.embedding, dimensions: response.embedding.length, model: this.config.model, createdAt: new Date().toISOString(), metadata: { generatedAt: new Date().toISOString(), modelVersion: this.config.model, tokensUsed: Math.ceil(text.length / 4), // Rough estimate confidence: 1.0 // Real embeddings have full confidence } }; } catch (error) { throw new Error(`Failed to generate embedding: ${error instanceof Error ? error.message : 'Unknown error'}`); } } /** * Generate embeddings for multiple chunks using real Ollama API */ async generateEmbeddings(chunks: TextChunk[]): Promise<EmbeddingVector[]> { if (!this.initialized) { await this.initialize(); } const results: EmbeddingVector[] = []; // Process chunks sequentially to avoid overwhelming the API for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]; if (!chunk) continue; // Skip undefined chunks try { const embedding = await this.generateSingleEmbedding(chunk.content); // Use chunk index as identifier since TextChunk doesn't have id property embedding.chunkId = `chunk_${i}_${chunk.chunkIndex}`; results.push(embedding); } catch (error) { // For batch processing, continue with other chunks even if one fails console.warn(`Failed to generate embedding for chunk ${i}:`, error); // Create a zero vector as fallback results.push({ vector: new Array(384).fill(0), // Default to 384 dimensions dimensions: 384, model: this.config.model, createdAt: new Date().toISOString(), chunkId: `chunk_${i}_${chunk.chunkIndex}`, metadata: { generatedAt: new Date().toISOString(), modelVersion: this.config.model, tokensUsed: 0, confidence: 0 } }); } } return results; } /** * Calculate cosine similarity between two embedding vectors */ calculateSimilarity(vector1: EmbeddingVector, vector2: EmbeddingVector): number { if (vector1.vector.length !== vector2.vector.length) { throw new Error('Vectors must have the same dimensions for similarity calculation'); } // Calculate cosine similarity let dotProduct = 0; let norm1 = 0; let norm2 = 0; for (let i = 0; i < vector1.vector.length; i++) { const v1 = vector1.vector[i] ?? 0; const v2 = vector2.vector[i] ?? 0; dotProduct += v1 * v2; norm1 += v1 * v1; norm2 += v2 * v2; } if (norm1 === 0 || norm2 === 0) { return 0; // No similarity if either vector is zero } return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2)); } /** * Process embeddings in batches with configurable batch size */ async processBatch(chunks: TextChunk[], batchSize: number = 16): Promise<EmbeddingResult[]> { if (!this.initialized) { await this.initialize(); } const results: EmbeddingResult[] = []; // Process in batches to manage memory and API load for (let i = 0; i < chunks.length; i += batchSize) { const batch = chunks.slice(i, i + batchSize); // Process batch items concurrently (but limit to batch size) const batchPromises = batch .filter(chunk => chunk !== undefined) // Filter out undefined chunks .map(async (chunk, batchIndex): Promise<EmbeddingResult> => { const globalIndex = i + batchIndex; const startTime = Date.now(); try { const embedding = await this.generateSingleEmbedding(chunk.content); embedding.chunkId = `chunk_${globalIndex}_${chunk.chunkIndex}`; return { chunk, embedding, processingTime: Date.now() - startTime, success: true }; } catch (error) { return { chunk, embedding: { vector: new Array(384).fill(0), dimensions: 384, model: this.config.model, createdAt: new Date().toISOString(), chunkId: `chunk_${globalIndex}_${chunk.chunkIndex}`, metadata: { generatedAt: new Date().toISOString(), modelVersion: this.config.model, tokensUsed: 0, confidence: 0 } }, processingTime: Date.now() - startTime, success: false, error: error instanceof Error ? error.message : 'Unknown error' }; } }); const batchResults = await Promise.all(batchPromises); results.push(...batchResults); // Small delay between batches to be API-friendly if (i + batchSize < chunks.length) { await new Promise(resolve => setTimeout(resolve, 100)); } } return results; } /** * Estimate processing time based on chunk count */ estimateProcessingTime(chunkCount: number): number { // Real API calls are slower - estimate 500ms per chunk return chunkCount * 500; } /** * Check if the service is initialized */ isInitialized(): boolean { return this.initialized; } /** * Get current model information */ getModelConfig(): any { return { model: this.config.model, baseUrl: this.config.baseUrl, timeout: this.config.timeout, retries: this.config.retries }; } /** * Make actual HTTP call to Ollama embedding API */ private async callOllamaEmbeddingAPI(text: string): Promise<OllamaEmbeddingResponse> { const request: OllamaEmbeddingRequest = { model: this.config.model, prompt: text }; let lastError: Error | null = null; // Retry logic for (let attempt = 1; attempt <= this.config.retries; attempt++) { try { const response = await fetch(`${this.config.baseUrl}/api/embeddings`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(request), signal: AbortSignal.timeout(this.config.timeout) }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const data = await response.json() as unknown; if (!data || typeof data !== 'object' || !('embedding' in data) || !Array.isArray((data as any).embedding)) { throw new Error('Invalid response format from Ollama API'); } return data as OllamaEmbeddingResponse; } catch (error) { lastError = error instanceof Error ? error : new Error(String(error)); if (attempt === this.config.retries) { break; // Don't wait after the final attempt } // Exponential backoff const delay = Math.pow(2, attempt - 1) * 1000; // 1s, 2s, 4s, etc. await new Promise(resolve => setTimeout(resolve, delay)); } } throw lastError || new Error('Failed to call Ollama API after retries'); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/okets/folder-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ollama-embedding-service.ts•10.3 KiB