Skip to main content
Glama

DocuMCP

by YannickTM
embeddings-buildin.ts3.27 kB
import "dotenv/config"; import { pipeline } from "@huggingface/transformers"; import { logger } from "../logger.js"; /** * Configuration for buildin embedding generation */ export interface BuildinEmbeddingConfig { dimension: number; model: string; } /** * Result from embedding generation */ export interface EmbeddingResult { embedding: number[]; error?: string; } /** * Load buildin embedding configuration from environment variables */ export function loadEmbeddingConfig(): BuildinEmbeddingConfig { // Get buildin model from environment or default to sentence-transformers/all-MiniLM-L6-v2 const model = process.env.EMBEDDING_MODEL || "sentence-transformers/all-MiniLM-L6-v2"; // Get embedding dimension based on the model: // - all-MiniLM-L6-v2: 384 dimensions // - other models: 1024 dimensions (default fallback) or from environment let dimension: number; if (model.includes("all-MiniLM-L6-v2")) { dimension = 384; // This model has 384 dimensions } else if (model.includes("Qwen3-Embedding-0.6B")) { dimension = 1024; // This model has 1024 dimensions } else { dimension = parseInt(process.env.EMBEDDING_DIMENSION || "1024"); } return { dimension, model, }; } // Cache for the embedding pipeline let embeddingPipeline: any = null; /** * Get the buildin embedding pipeline */ async function getEmbeddingPipeline(model: string) { if (!embeddingPipeline) { logger.warn(`Initializing buildin embedding pipeline with model: ${model}`); embeddingPipeline = await pipeline("feature-extraction", model); } return embeddingPipeline; } /** * Generate embeddings using buildin models via @xenova/transformers */ async function generateEmbedding( text: string, model: string, ): Promise<number[]> { try { const pipe = await getEmbeddingPipeline(model); // Generate embedding const result = await pipe(text, { pooling: "mean", normalize: true, }); // Extract the embedding data from result return Array.from(result.data); } catch (error) { logger.error("Error generating buildin embedding:", error as Error); throw new Error( `Failed to generate buildin embedding: ${(error as Error).message}`, ); } } /** * Create a text embedding using buildin model */ export async function createEmbedding(text: string): Promise<EmbeddingResult> { try { // Load configuration const config = loadEmbeddingConfig(); // Generate buildin embedding const embedding = await generateEmbedding(text, config.model); return { embedding }; } catch (error) { logger.error("Error creating buildin embedding:", error as Error); return { embedding: new Array(loadEmbeddingConfig().dimension).fill(0), error: (error as Error).message, }; } } /** * Create text embeddings for a batch of texts using buildin model */ export async function createEmbeddings( texts: string[], ): Promise<EmbeddingResult[]> { const results: EmbeddingResult[] = []; for (const text of texts) { results.push(await createEmbedding(text)); } return results; } /** * Get the current embedding dimension */ export function getEmbeddingDimension(): number { return loadEmbeddingConfig().dimension; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/YannickTM/docu-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server