Skip to main content
Glama
embedding-manager.js•9.31 kB
import { pipeline, env } from '@xenova/transformers'; import crypto from 'crypto'; import { join } from 'path'; import { EmbeddingCacheManager } from './cache-manager.js'; import { TIME_CONSTANTS, LIMITS } from './constants.js'; import { VectorOperations, getRustInfo } from '../rust-bindings/index.js'; // Store process reference at module level to avoid conflicts const nodeProcess = process; // Configure Transformers.js for local model storage env.localURL = join(nodeProcess.cwd(), '.models'); env.allowRemoteModels = true; /** * Manages local embedding generation using Transformers.js * Provides privacy-first semantic vector generation * * Uses Xenova/all-mpnet-base-v2 model for optimal accuracy: * - 87-88% accuracy on semantic similarity tasks * - 512 token context window * - WebGPU acceleration for 3.5x speedup * - Q8 quantization maintains 98% accuracy */ export class LocalEmbeddingManager { constructor(options = {}) { this.modelName = options.modelName || 'Xenova/all-mpnet-base-v2'; this.pipeline = null; this.initPromise = null; this.cacheManager = new EmbeddingCacheManager({ cacheDir: options.cacheDir || join(nodeProcess.cwd(), '.embedding-cache'), maxMemoryEntries: options.maxCacheSize || 1000, ttl: options.ttl || 7 * TIME_CONSTANTS.HOURS_24, // 7 days default }); // Initialize high-performance vector operations this.vectorOps = new VectorOperations({ useSimd: options.useSimd !== false, useParallel: options.useParallel !== false, similarityThreshold: options.similarityThreshold || 0.7, }); // Get Rust performance info this.rustInfo = getRustInfo(); // WebGPU configuration for better performance this.pipelineConfig = { quantized: true, quantization: options.quantization || 'q8', // Q8 quantization for 3.5x speedup with 98% accuracy retention device: options.device || 'webgpu', // Enable WebGPU acceleration when available }; } /** * Initialize the embedding pipeline * @returns {Promise<void>} */ async initialize() { if (this.initPromise) { return this.initPromise; } this.initPromise = this._initialize(); return this.initPromise; } async _initialize() { // Initializing embedding model silently for production // Initialize cache manager await this.cacheManager.initialize(); // Initialize the feature extraction pipeline with enhanced configuration this.pipeline = await pipeline('feature-extraction', this.modelName, this.pipelineConfig); // Embedding model loaded successfully } /** * Generate embedding for text content * @param {string} content - Text to embed * @param {string} filePath - File path for caching * @returns {Promise<{vector: number[], cached: boolean}>} */ async generateEmbedding(content, filePath) { if (!this.pipeline) { await this.initialize(); } // Generate content hash for caching const contentHash = this.hashContent(content); // Check cache const cachedData = await this.cacheManager.getEmbedding(contentHash, filePath); if (cachedData && cachedData.vector) { return { vector: cachedData.vector, cached: true, }; } try { // Generate embedding const output = await this.pipeline(content, { pooling: 'mean', normalize: true, }); // Convert to array const vector = Array.from(output.data); // Cache the result await this.cacheManager.setEmbedding(contentHash, filePath, vector); return { vector, cached: false, }; } catch (error) { // Error generating embedding - using fallback // Return a fallback empty embedding return { vector: new Array(LIMITS.EMBEDDING_DIMENSION).fill(0), cached: false, error: error.message, }; } } /** * Generate embeddings for multiple texts * @param {Array<{content: string, path: string}>} items - Items to embed * @returns {Promise<Array<{path: string, vector: number[], cached: boolean}>>} */ async generateBatchEmbeddings(items) { if (!this.pipeline) { await this.initialize(); } const results = []; const uncachedItems = []; // Check cache for each item const cacheCheckItems = items.map((item) => ({ contentHash: this.hashContent(item.content), filePath: item.path, })); const cachedResults = await this.cacheManager.getBatch(cacheCheckItems); for (const item of items) { const contentHash = this.hashContent(item.content); const cacheKey = `${item.path}:${contentHash}`; const cachedData = cachedResults.get(cacheKey); if (cachedData && cachedData.vector) { results.push({ path: item.path, vector: cachedData.vector, cached: true, }); } else { uncachedItems.push({ ...item, contentHash }); } } // Process uncached items if (uncachedItems.length > 0) { try { // Batch process for efficiency const contents = uncachedItems.map((item) => item.content); const outputs = await this.pipeline(contents, { pooling: 'mean', normalize: true, }); // Process results for (let i = 0; i < uncachedItems.length; i++) { const item = uncachedItems[i]; const start = i * LIMITS.EMBEDDING_DIMENSION; const end = (i + 1) * LIMITS.EMBEDDING_DIMENSION; const vector = Array.from(outputs.data.slice(start, end)); await this.cacheManager.setEmbedding(item.contentHash, item.path, vector); results.push({ path: item.path, vector, cached: false, }); } } catch (error) { // Error generating batch embeddings - using fallback // Add fallback embeddings for failed items for (const item of uncachedItems) { results.push({ path: item.path, vector: new Array(LIMITS.EMBEDDING_DIMENSION).fill(0), cached: false, error: error.message, }); } } } return results; } /** * Calculate cosine similarity between two vectors * @param {number[]} vecA - First vector * @param {number[]} vecB - Second vector * @returns {Promise<number>} Similarity score (0-1) */ async cosineSimilarity(vecA, vecB) { try { return await this.vectorOps.cosineSimilarity(vecA, vecB); } catch { // Fallback to original JavaScript implementation let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < vecA.length; i++) { dotProduct += vecA[i] * vecB[i]; normA += vecA[i] * vecA[i]; normB += vecB[i] * vecB[i]; } normA = Math.sqrt(normA); normB = Math.sqrt(normB); if (normA === 0 || normB === 0) { return 0; } return dotProduct / (normA * normB); } } /** * Find similar vectors from a collection * @param {number[]} queryVector - Query vector * @param {Array<{path: string, vector: number[]}>} vectors - Collection to search * @param {number} topK - Number of results to return * @param {number} threshold - Minimum similarity threshold * @returns {Promise<Array<{path: string, similarity: number}>>} */ async findSimilar( queryVector, vectors, topK = LIMITS.SIMILAR_FILES_DEFAULT, threshold = LIMITS.SIMILARITY_THRESHOLD, ) { try { // Use high-performance Rust implementation const vectorData = vectors.map((item) => item.vector); const paths = vectors.map((item) => item.path); const results = await this.vectorOps.findSimilarVectors(queryVector, vectorData, paths, topK); // Filter by threshold and format results return results .filter((result) => result.similarity >= threshold) .map((result) => ({ path: result.path, similarity: result.similarity, })); } catch { // Fallback to original JavaScript implementation const similarities = []; for (const item of vectors) { try { const similarity = await this.cosineSimilarity(queryVector, item.vector); similarities.push({ path: item.path, similarity, }); } catch { // Skip items that cause errors continue; } } return similarities .filter((item) => item.similarity >= threshold) .sort((a, b) => b.similarity - a.similarity) .slice(0, topK); } } /** * Hash content for caching * @param {string} content - Content to hash * @returns {string} SHA-256 hash */ hashContent(content) { return crypto.createHash('sha256').update(content).digest('hex'); } /** * Clear the cache */ async clearCache() { await this.cacheManager.clear(true); } /** * Get cache statistics * @returns {Object} Cache stats */ getCacheStats() { return this.cacheManager.getStats(); } /** * Cleanup resources */ async cleanup() { // Cache manager handles its own cleanup this.pipeline = null; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/moikas-code/moidvk'

If you have feedback or need assistance with the MCP directory API, please join our Discord server