Skip to main content
Glama
EmbeddingConfig.ts14.1 kB
/** * Shared embedding model configuration service. * Provides synchronous parsing of embedding model configuration and known dimensions lookup. * Eliminates code duplication between DocumentStore and telemetry systems. * * All model lookups are case-insensitive to handle variations in model name capitalization. * Uses class-based approach to avoid mutable global state and improve testability. */ /** * Supported embedding model providers. */ export type EmbeddingProvider = | "openai" | "vertex" | "gemini" | "aws" | "microsoft" | "sagemaker"; /** * Embedding model configuration parsed from environment variables. */ export interface EmbeddingModelConfig { /** The provider (e.g., "openai", "gemini") */ provider: EmbeddingProvider; /** The model name (e.g., "text-embedding-3-small") */ model: string; /** Known dimensions for this model, or null if unknown */ dimensions: number | null; /** The full model specification string (e.g., "openai:text-embedding-3-small") */ modelSpec: string; } /** * Embedding configuration manager that handles model parsing and dimension caching. * Encapsulates state to avoid global variable issues and improve testability. */ export class EmbeddingConfig { private static instance: EmbeddingConfig | null = null; /** * Get the singleton instance of EmbeddingConfig. * Creates the instance if it doesn't exist. */ static getInstance(): EmbeddingConfig { if (EmbeddingConfig.instance === null) { EmbeddingConfig.instance = new EmbeddingConfig(); } return EmbeddingConfig.instance; } /** * Reset the singleton instance (useful for testing). */ static resetInstance(): void { EmbeddingConfig.instance = null; } /** * Known dimensions for common embedding models. * This avoids expensive API calls for dimension detection in telemetry. * * Note: The "openai" provider also supports OpenAI-compatible APIs like: * - Ollama (local models) * - LMStudio (local models) * - Any service implementing OpenAI's embedding API */ private readonly knownModelDimensions: Record<string, number> = { // OpenAI models (also works with Ollama, LMStudio, and other OpenAI-compatible APIs) "text-embedding-3-small": 1536, "text-embedding-3-large": 3072, "text-embedding-ada-002": 1536, // Google Vertex AI models "text-embedding-004": 768, "textembedding-gecko@003": 768, "textembedding-gecko@002": 768, "textembedding-gecko@001": 768, // Google Gemini models (with MRL support) "text-embedding-preview-0409": 768, "embedding-001": 768, // AWS Bedrock models // Amazon Titan models "amazon.titan-embed-text-v1": 1536, "amazon.titan-embed-text-v2:0": 1024, "amazon.titan-embed-image-v1": 1024, // Image embedding model // Cohere models "cohere.embed-english-v3": 1024, "cohere.embed-multilingual-v3": 1024, // SageMaker models (hosted on AWS SageMaker) "intfloat/multilingual-e5-large": 1024, // Additional AWS models that might be supported // Note: Some of these might be placeholders - verify dimensions before use // "amazon.nova-embed-multilingual-v1:0": 4096, // Commented out as noted in source // MTEB Leaderboard models (source: https://huggingface.co/spaces/mteb/leaderboard) // Top performing models from Massive Text Embedding Benchmark "sentence-transformers/all-MiniLM-L6-v2": 384, "gemini-embedding-001": 3072, "Qwen/Qwen3-Embedding-8B": 4096, "Qwen/Qwen3-Embedding-4B": 2560, "Qwen/Qwen3-Embedding-0.6B": 1024, "Linq-AI-Research/Linq-Embed-Mistral": 4096, "Alibaba-NLP/gte-Qwen2-7B-instruct": 3584, "intfloat/multilingual-e5-large-instruct": 1024, "Salesforce/SFR-Embedding-Mistral": 4096, "text-multilingual-embedding-002": 768, "GritLM/GritLM-7B": 4096, "GritLM/GritLM-8x7B": 4096, "intfloat/e5-mistral-7b-instruct": 4096, "Cohere/Cohere-embed-multilingual-v3.0": 1024, "Alibaba-NLP/gte-Qwen2-1.5B-instruct": 8960, "Lajavaness/bilingual-embedding-large": 1024, "Salesforce/SFR-Embedding-2_R": 4096, "NovaSearch/stella_en_1.5B_v5": 8960, "NovaSearch/jasper_en_vision_language_v1": 8960, "nvidia/NV-Embed-v2": 4096, "OrdalieTech/Solon-embeddings-large-0.1": 1024, "BAAI/bge-m3": 1024, "HIT-TMG/KaLM-embedding-multilingual-mini-v1": 896, "jinaai/jina-embeddings-v3": 1024, "Alibaba-NLP/gte-multilingual-base": 768, "Lajavaness/bilingual-embedding-base": 768, "HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1": 896, "nvidia/NV-Embed-v1": 4096, "Cohere/Cohere-embed-multilingual-light-v3.0": 384, "manu/bge-m3-custom-fr": 1024, "Lajavaness/bilingual-embedding-small": 384, "Snowflake/snowflake-arctic-embed-l-v2.0": 1024, "intfloat/multilingual-e5-base": 768, "voyage-3-lite": 512, "voyage-3": 1024, "intfloat/multilingual-e5-small": 384, "Alibaba-NLP/gte-Qwen1.5-7B-instruct": 4096, "Snowflake/snowflake-arctic-embed-m-v2.0": 768, "deepvk/USER-bge-m3": 1024, "Cohere/Cohere-embed-english-v3.0": 1024, "Omartificial-Intelligence-Space/Arabic-labse-Matryoshka": 768, "ibm-granite/granite-embedding-278m-multilingual": 768, "NovaSearch/stella_en_400M_v5": 4096, "omarelshehy/arabic-english-sts-matryoshka": 1024, "sentence-transformers/paraphrase-multilingual-mpnet-base-v2": 768, "Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka": 768, "Haon-Chen/speed-embedding-7b-instruct": 4096, "sentence-transformers/LaBSE": 768, "WhereIsAI/UAE-Large-V1": 1024, "ibm-granite/granite-embedding-107m-multilingual": 384, "mixedbread-ai/mxbai-embed-large-v1": 1024, "intfloat/e5-large-v2": 1024, "avsolatorio/GIST-large-Embedding-v0": 1024, "sdadas/mmlw-e5-large": 1024, "nomic-ai/nomic-embed-text-v1": 768, "nomic-ai/nomic-embed-text-v1-ablated": 768, "intfloat/e5-base-v2": 768, "BAAI/bge-large-en-v1.5": 1024, "intfloat/e5-large": 1024, "Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet": 384, "Cohere/Cohere-embed-english-light-v3.0": 384, "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2": 768, "Gameselo/STS-multilingual-mpnet-base-v2": 768, "thenlper/gte-large": 1024, "avsolatorio/GIST-Embedding-v0": 768, "nomic-ai/nomic-embed-text-v1-unsupervised": 768, "infgrad/stella-base-en-v2": 768, "avsolatorio/NoInstruct-small-Embedding-v0": 384, "dwzhu/e5-base-4k": 768, "sdadas/mmlw-e5-base": 768, "voyage-multilingual-2": 1024, "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised": 4096, "BAAI/bge-base-en-v1.5": 768, "avsolatorio/GIST-small-Embedding-v0": 384, "sdadas/mmlw-roberta-large": 1024, "nomic-ai/nomic-embed-text-v1.5": 768, "minishlab/potion-multilingual-128M": 256, "shibing624/text2vec-base-multilingual": 384, "thenlper/gte-base": 768, "intfloat/e5-small-v2": 384, "intfloat/e5-base": 768, "sentence-transformers/static-similarity-mrl-multilingual-v1": 1024, "manu/sentence_croissant_alpha_v0.3": 2048, "BAAI/bge-small-en-v1.5": 512, "thenlper/gte-small": 384, "sdadas/mmlw-e5-small": 384, "manu/sentence_croissant_alpha_v0.4": 2048, "manu/sentence_croissant_alpha_v0.2": 2048, "abhinand/MedEmbed-small-v0.1": 384, "ibm-granite/granite-embedding-125m-english": 768, "intfloat/e5-small": 384, "voyage-large-2-instruct": 1024, "sdadas/mmlw-roberta-base": 768, "Snowflake/snowflake-arctic-embed-l": 1024, "Mihaiii/Ivysaur": 384, "Snowflake/snowflake-arctic-embed-m-long": 768, "bigscience/sgpt-bloom-7b1-msmarco": 4096, "avsolatorio/GIST-all-MiniLM-L6-v2": 384, "sergeyzh/LaBSE-ru-turbo": 768, "sentence-transformers/all-mpnet-base-v2": 768, "Snowflake/snowflake-arctic-embed-m": 768, "Snowflake/snowflake-arctic-embed-s": 384, "sentence-transformers/all-MiniLM-L12-v2": 384, "Mihaiii/gte-micro-v4": 384, "Snowflake/snowflake-arctic-embed-m-v1.5": 768, "cointegrated/LaBSE-en-ru": 768, "Mihaiii/Bulbasaur": 384, "ibm-granite/granite-embedding-30m-english": 384, "deepfile/embedder-100p": 768, "Jaume/gemma-2b-embeddings": 2048, "OrlikB/KartonBERT-USE-base-v1": 768, "izhx/udever-bloom-7b1": 4096, "izhx/udever-bloom-1b1": 1024, "brahmairesearch/slx-v0.1": 384, "Mihaiii/Wartortle": 384, "izhx/udever-bloom-3b": 2048, "deepvk/USER-base": 768, "ai-forever/ru-en-RoSBERTa": 1024, "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse": 4096, "Mihaiii/Venusaur": 384, "Snowflake/snowflake-arctic-embed-xs": 384, "jinaai/jina-embedding-b-en-v1": 768, "Mihaiii/gte-micro": 384, "aari1995/German_Semantic_STS_V2": 1024, "Mihaiii/Squirtle": 384, "OrlikB/st-polish-kartonberta-base-alpha-v1": 768, "sergeyzh/rubert-tiny-turbo": 312, "minishlab/potion-base-8M": 256, "minishlab/M2V_base_glove_subword": 256, "jinaai/jina-embedding-s-en-v1": 512, "minishlab/potion-base-4M": 128, "minishlab/M2V_base_output": 256, "DeepPavlov/rubert-base-cased-sentence": 768, "jinaai/jina-embeddings-v2-small-en": 512, "cointegrated/rubert-tiny2": 312, "minishlab/M2V_base_glove": 256, "cointegrated/rubert-tiny": 312, "silma-ai/silma-embeddding-matryoshka-v0.1": 768, "DeepPavlov/rubert-base-cased": 768, "Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet": 768, "izhx/udever-bloom-560m": 1024, "minishlab/potion-base-2M": 64, "DeepPavlov/distilrubert-small-cased-conversational": 768, "consciousAI/cai-lunaris-text-embeddings": 1024, "deepvk/deberta-v1-base": 768, "Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka": 768, "Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka": 768, "ai-forever/sbert_large_mt_nlu_ru": 1024, "ai-forever/sbert_large_nlu_ru": 1024, "malenia1/ternary-weight-embedding": 1024, "jinaai/jina-embeddings-v2-base-en": 768, "VPLabs/SearchMap_Preview": 4096, "Hum-Works/lodestone-base-4096-v1": 768, "jinaai/jina-embeddings-v4": 2048, }; /** * Lowercase lookup map for case-insensitive model dimension queries. * Built lazily from knownModelDimensions to ensure consistency. */ private modelLookup: Map<string, number>; constructor() { this.modelLookup = new Map(); for (const [model, dimensions] of Object.entries(this.knownModelDimensions)) { this.modelLookup.set(model.toLowerCase(), dimensions); } } /** * Parse embedding model configuration from a provided model specification. * This is a synchronous operation that extracts provider, model, and known dimensions. * * Supports various providers: * - openai: OpenAI models and OpenAI-compatible APIs (Ollama, LMStudio, etc.) * - vertex: Google Cloud Vertex AI * - gemini: Google Generative AI * - aws: AWS Bedrock models * - microsoft: Azure OpenAI * - sagemaker: AWS SageMaker hosted models * * @param modelSpec Model specification (e.g., "openai:text-embedding-3-small"), defaults to "text-embedding-3-small" * @returns Parsed embedding model configuration */ parse(modelSpec?: string): EmbeddingModelConfig { const spec = modelSpec || "text-embedding-3-small"; // Parse provider and model from string (e.g., "gemini:embedding-001" or just "text-embedding-3-small") // Handle models that contain colons in their names (e.g., "aws:amazon.titan-embed-text-v2:0") const colonIndex = spec.indexOf(":"); let provider: EmbeddingProvider; let model: string; if (colonIndex === -1) { // No colon found, default to OpenAI provider = "openai"; model = spec; } else { // Split only on the first colon provider = spec.substring(0, colonIndex) as EmbeddingProvider; model = spec.substring(colonIndex + 1); } // Look up known dimensions (case-insensitive) const dimensions = this.modelLookup?.get(model.toLowerCase()) || null; return { provider, model, dimensions, modelSpec: spec, }; } /** * Get the known dimensions for a specific model. * Returns null if the model dimensions are not known. * Uses case-insensitive lookup. * * @param model The model name (e.g., "text-embedding-3-small") * @returns Known dimensions or null */ getKnownDimensions(model: string): number | null { return this.modelLookup?.get(model.toLowerCase()) || null; } /** * Add or update known dimensions for a model. * This can be used to cache discovered dimensions. * Stores both original case and lowercase for consistent lookup. * * @param model The model name * @param dimensions The dimensions to cache */ setKnownDimensions(model: string, dimensions: number): void { this.knownModelDimensions[model] = dimensions; // Update lowercase lookup map if (this.modelLookup) { this.modelLookup.set(model.toLowerCase(), dimensions); } } /** * Static method to parse embedding model configuration using the singleton instance. * This maintains backward compatibility while using the class-based approach. */ static parseEmbeddingConfig(modelSpec?: string): EmbeddingModelConfig { return EmbeddingConfig.getInstance().parse(modelSpec); } /** * Static method to get known model dimensions using the singleton instance. * This maintains backward compatibility while using the class-based approach. */ static getKnownModelDimensions(model: string): number | null { return EmbeddingConfig.getInstance().getKnownDimensions(model); } /** * Static method to set known model dimensions using the singleton instance. * This maintains backward compatibility while using the class-based approach. */ static setKnownModelDimensions(model: string, dimensions: number): void { EmbeddingConfig.getInstance().setKnownDimensions(model, dimensions); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arabold/docs-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server