Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
embed.go23.4 kB
// Package embed provides embedding generation clients for vector search. // // This package supports multiple embedding providers: // - Ollama: Local open-source models (mxbai-embed-large, nomic-embed-text) // - OpenAI: Cloud API (text-embedding-3-small, text-embedding-3-large) // // Embeddings convert text into high-dimensional vectors that capture semantic meaning. // Similar texts have similar vectors, enabling semantic search. // // Example Usage: // // // Use local Ollama // config := embed.DefaultOllamaConfig() // embedder := embed.NewOllama(config) // // embedding, err := embedder.Embed(ctx, "graph database") // if err != nil { // log.Fatal(err) // } // fmt.Printf("Embedding dimensions: %d\n", len(embedding)) // // Output: 1024 (for mxbai-embed-large) // // // Or use OpenAI // config := embed.DefaultOpenAIConfig("sk-...") // embedder := embed.NewOpenAI(config) // // // Batch processing for efficiency // texts := []string{"memory", "storage", "database"} // embeddings, err := embedder.EmbedBatch(ctx, texts) // // ELI12 (Explain Like I'm 12): // // Embeddings are like a "smell" or "vibe" for text. Similar things have similar // smells. "Cat" and "kitten" smell similar. "Cat" and "car" smell different. // // The computer represents each text as a list of 1024 numbers (a vector). // When you search, it finds texts with similar number patterns (similar vibes). // // Think of it like this: // - "Happy" might be [0.8, 0.2, 0.1, ...] (lots of positive vibes) // - "Joyful" might be [0.7, 0.3, 0.1, ...] (similar vibes!) // - "Sad" might be [0.1, 0.1, 0.9, ...] (very different vibes) // // The search system measures how close these number lists are to find similar meanings! package embed import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "time" ) // Embedder generates vector embeddings from text. // // Implementations must be safe for concurrent use from multiple goroutines. // // Example: // // var embedder embed.Embedder // embedder = embed.NewOllama(nil) // Uses defaults // // // Single embedding // vec, err := embedder.Embed(ctx, "hello world") // // // Batch for efficiency // vecs, err := embedder.EmbedBatch(ctx, []string{"one", "two", "three"}) type Embedder interface { // Embed generates embedding for single text Embed(ctx context.Context, text string) ([]float32, error) // EmbedBatch generates embeddings for multiple texts EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) // Dimensions returns the embedding vector dimension Dimensions() int // Model returns the model name Model() string } // Config holds embedding provider configuration. // // Fields: // - Provider: "ollama" or "openai" // - APIURL: Base URL for API (e.g., http://localhost:11434) // - APIPath: Endpoint path (e.g., /api/embeddings) // - APIKey: Authentication key (OpenAI only) // - Model: Model name (e.g., mxbai-embed-large) // - Dimensions: Expected vector size for validation // - Timeout: HTTP request timeout // // Example: // // // Custom Ollama config // config := &embed.Config{ // Provider: "ollama", // APIURL: "http://192.168.1.100:11434", // Model: "nomic-embed-text", // Dimensions: 768, // Timeout: 60 * time.Second, // } type Config struct { Provider string // ollama, openai APIURL string // e.g., http://localhost:11434 APIPath string // e.g., /api/embeddings or /v1/embeddings APIKey string // For OpenAI Model string // e.g., mxbai-embed-large Dimensions int // Expected dimensions (for validation) Timeout time.Duration // Request timeout } // DefaultOllamaConfig returns configuration for local Ollama with mxbai-embed-large. // // Default settings: // - Provider: ollama // - API URL: http://localhost:11434 // - Model: mxbai-embed-large (1024 dimensions) // - Timeout: 30 seconds // // This assumes Ollama is running locally. To start Ollama: // // $ ollama pull mxbai-embed-large // $ ollama serve // // Example: // // config := embed.DefaultOllamaConfig() // embedder := embed.NewOllama(config) // // embedding, err := embedder.Embed(ctx, "test") // if err != nil { // log.Fatal(err) // } func DefaultOllamaConfig() *Config { return &Config{ Provider: "ollama", APIURL: "http://localhost:11434", APIPath: "/api/embeddings", Model: "mxbai-embed-large", Dimensions: 1024, Timeout: 30 * time.Second, } } // DefaultOpenAIConfig returns configuration for OpenAI's text-embedding-3-small. // // Default settings: // - Provider: openai // - API URL: https://api.openai.com // - Model: text-embedding-3-small (1536 dimensions) // - Timeout: 30 seconds // // Requires an OpenAI API key. Get one at: https://platform.openai.com/api-keys // // Example: // // apiKey := os.Getenv("OPENAI_API_KEY") // config := embed.DefaultOpenAIConfig(apiKey) // embedder := embed.NewOpenAI(config) // // embedding, err := embedder.Embed(ctx, "test") // if err != nil { // log.Fatal(err) // } // // Cost: // - text-embedding-3-small: $0.02 per 1M tokens (~750k words) // - text-embedding-3-large: $0.13 per 1M tokens (3072 dimensions) func DefaultOpenAIConfig(apiKey string) *Config { return &Config{ Provider: "openai", APIURL: "https://api.openai.com", APIPath: "/v1/embeddings", APIKey: apiKey, Model: "text-embedding-3-small", Dimensions: 1536, Timeout: 30 * time.Second, } } // OllamaEmbedder implements Embedder for local Ollama models. // // Ollama runs open-source embedding models locally: // - mxbai-embed-large: 1024 dimensions, excellent quality // - nomic-embed-text: 768 dimensions, faster // - all-minilm: 384 dimensions, very fast but lower quality // // Install models: // // $ ollama pull mxbai-embed-large // $ ollama pull nomic-embed-text // // Thread-safe: Can be used concurrently from multiple goroutines. // // Example: // // embedder := embed.NewOllama(nil) // Uses defaults // // vec, err := embedder.Embed(ctx, "hello world") // if err != nil { // return err // } // // fmt.Printf("Model: %s, Dimensions: %d\n", // embedder.Model(), embedder.Dimensions()) type OllamaEmbedder struct { config *Config client *http.Client } // NewOllama creates a new Ollama embedder. // // If config is nil, DefaultOllamaConfig() is used. // // Example: // // // Use defaults (localhost:11434, mxbai-embed-large) // embedder := embed.NewOllama(nil) // // // Custom config // config := &embed.Config{ // Provider: "ollama", // APIURL: "http://192.168.1.100:11434", // Model: "nomic-embed-text", // Dimensions: 768, // } // embedder = embed.NewOllama(config) // // Returns an embedder ready to generate embeddings. // // Example 1 - Default Configuration (mxbai-embed-large): // // // Uses localhost:11434 by default // embedder := embed.NewOllama(nil) // // vec, err := embedder.Embed(ctx, "Hello world") // if err != nil { // log.Fatal(err) // } // // fmt.Printf("Generated %d-dimensional embedding\n", len(vec)) // // Output: Generated 1024-dimensional embedding // // Example 2 - Custom Model (nomic-embed-text): // // config := embed.DefaultOllamaConfig() // config.Model = "nomic-embed-text" // config.Dimensions = 768 // // embedder := embed.NewOllama(config) // // // Good for English text // vec, _ := embedder.Embed(ctx, "The quick brown fox") // fmt.Printf("Nomic embedding: %d dims\n", len(vec)) // 768 // // Example 3 - Remote Ollama Server: // // config := embed.DefaultOllamaConfig() // config.APIURL = "http://ollama-server.internal:11434" // config.Timeout = 60 * time.Second // // embedder := embed.NewOllama(config) // // // Connect to remote Ollama instance // vec, err := embedder.Embed(ctx, "distributed embeddings") // if err != nil { // log.Printf("Remote Ollama error: %v", err) // } // // Example 4 - Batch Processing for Efficiency: // // embedder := embed.NewOllama(nil) // // documents := []string{ // "Document 1 about AI", // "Document 2 about ML", // "Document 3 about NLP", // } // // // Process in batch // embeddings, err := embedder.EmbedBatch(ctx, documents) // if err != nil { // log.Fatal(err) // } // // // Store embeddings in database // for i, emb := range embeddings { // storeEmbedding(documents[i], emb) // } // // ELI12: // // NewOllama is like hiring a translator who works at a local translation office: // // - Ollama is the office (running on your computer or server) // - You give them text: "Hello world" // - They give back a list of 1024 numbers (the "meaning" as numbers) // - These numbers help computers understand if texts are similar // // Why use Ollama? // - FREE (no API costs like OpenAI) // - PRIVATE (data stays on your server) // - FAST (no internet latency) // - OFFLINE (works without internet) // // How it works: // 1. Install Ollama: `ollama run mxbai-embed-large` // 2. Ollama runs on localhost:11434 // 3. Send text, get back 1024 numbers // 4. Use numbers to find similar text // // Models Available: // - mxbai-embed-large: 1024 dims, best quality (default) // - nomic-embed-text: 768 dims, good balance // - all-minilm: 384 dims, fast & small // // Use Cases: // - Document similarity search // - Semantic search engines // - Recommendation systems // - Duplicate detection // - Content clustering // // Performance: // - ~50-200ms per embedding (depends on model) // - Batch processing is more efficient // - GPU acceleration if available // - Memory: ~500MB-2GB for model // // Thread Safety: // Safe to call from multiple goroutines. func NewOllama(config *Config) *OllamaEmbedder { if config == nil { config = DefaultOllamaConfig() } return &OllamaEmbedder{ config: config, client: &http.Client{ Timeout: config.Timeout, }, } } // ollamaRequest is the request format for Ollama. type ollamaRequest struct { Model string `json:"model"` Prompt string `json:"prompt"` } // ollamaResponse is the response format from Ollama. type ollamaResponse struct { Embedding []float32 `json:"embedding"` } // Embed generates a vector embedding for a single text string. // // The embedding is a float32 slice of length specified by Dimensions(). // Empty or very short text may produce low-quality embeddings. // // Example: // // embedder := embed.NewOllama(nil) // // vec, err := embedder.Embed(ctx, "machine learning") // if err != nil { // log.Fatal(err) // } // // fmt.Printf("Generated %d-dimensional vector\n", len(vec)) // // Output: Generated 1024-dimensional vector // // Returns the embedding vector, or an error if the API request fails. func (e *OllamaEmbedder) Embed(ctx context.Context, text string) ([]float32, error) { req := ollamaRequest{ Model: e.config.Model, Prompt: text, } body, err := json.Marshal(req) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } url := e.config.APIURL + e.config.APIPath httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") resp, err := e.client.Do(httpReq) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("ollama returned %d: %s", resp.StatusCode, string(bodyBytes)) } var ollamaResp ollamaResponse if err := json.NewDecoder(resp.Body).Decode(&ollamaResp); err != nil { return nil, fmt.Errorf("failed to decode response: %w", err) } return ollamaResp.Embedding, nil } // EmbedBatch generates embeddings for multiple texts efficiently. // // For Ollama, this currently makes one request per text. Future versions // may support true batch processing for better performance. // // Example: // // embedder := embed.NewOllama(nil) // // texts := []string{ // "graph database", // "vector search", // "semantic similarity", // } // // vecs, err := embedder.EmbedBatch(ctx, texts) // if err != nil { // log.Fatal(err) // } // // for i, vec := range vecs { // fmt.Printf("Text %d: %d dimensions\n", i, len(vec)) // } // // Returns a slice of embeddings (one per input text), or an error if any request fails. func (e *OllamaEmbedder) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) { results := make([][]float32, len(texts)) for i, text := range texts { embedding, err := e.Embed(ctx, text) if err != nil { return nil, fmt.Errorf("failed to embed text %d: %w", i, err) } results[i] = embedding } return results, nil } // Dimensions returns the expected embedding dimensions. func (e *OllamaEmbedder) Dimensions() int { return e.config.Dimensions } // Model returns the model name. func (e *OllamaEmbedder) Model() string { return e.config.Model } // OpenAIEmbedder implements Embedder for OpenAI's embedding API. // // Supported models: // - text-embedding-3-small: 1536 dimensions, $0.02/1M tokens // - text-embedding-3-large: 3072 dimensions, $0.13/1M tokens // - text-embedding-ada-002: 1536 dimensions (legacy) // // Thread-safe: Can be used concurrently from multiple goroutines. // // Example: // // apiKey := os.Getenv("OPENAI_API_KEY") // embedder := embed.NewOpenAI(embed.DefaultOpenAIConfig(apiKey)) // // vec, err := embedder.Embed(ctx, "hello world") // if err != nil { // return err // } type OpenAIEmbedder struct { config *Config client *http.Client } // NewOpenAI creates a new OpenAI embedder. // // If config is nil, DefaultOpenAIConfig("") is used (will fail without API key). // // Example: // // apiKey := os.Getenv("OPENAI_API_KEY") // config := embed.DefaultOpenAIConfig(apiKey) // embedder := embed.NewOpenAI(config) // // // Or use custom model // config.Model = "text-embedding-3-large" // config.Dimensions = 3072 // embedder = embed.NewOpenAI(config) // // Returns an embedder ready to generate embeddings. // // Example 1 - Basic Setup with API Key: // // apiKey := os.Getenv("OPENAI_API_KEY") // sk-... // embedder := embed.NewOpenAI(embed.DefaultOpenAIConfig(apiKey)) // // vec, err := embedder.Embed(ctx, "artificial intelligence") // if err != nil { // log.Fatal(err) // } // // fmt.Printf("Generated %d-dimensional embedding\n", len(vec)) // // Output: Generated 1536-dimensional embedding // // Example 2 - High-Dimensional Model (text-embedding-3-large): // // config := embed.DefaultOpenAIConfig(apiKey) // config.Model = "text-embedding-3-large" // config.Dimensions = 3072 // Maximum quality // // embedder := embed.NewOpenAI(config) // // // Higher quality embeddings for critical applications // vec, _ := embedder.Embed(ctx, "complex semantic meaning") // fmt.Printf("High-quality: %d dims\n", len(vec)) // 3072 // // Example 3 - Cost-Optimized (text-embedding-3-small): // // config := embed.DefaultOpenAIConfig(apiKey) // config.Model = "text-embedding-3-small" // config.Dimensions = 1536 // // embedder := embed.NewOpenAI(config) // // // 5x cheaper than text-embedding-3-large // // $0.02 per 1M tokens vs $0.13 per 1M tokens // vec, _ := embedder.Embed(ctx, "cost effective") // // Example 4 - Production with Error Handling: // // config := embed.DefaultOpenAIConfig(apiKey) // config.Timeout = 30 * time.Second // // embedder := embed.NewOpenAI(config) // // texts := []string{"doc1", "doc2", "doc3"} // embeddings, err := embedder.EmbedBatch(ctx, texts) // if err != nil { // // Handle rate limits, quota errors, etc. // if strings.Contains(err.Error(), "rate_limit") { // time.Sleep(1 * time.Second) // embeddings, err = embedder.EmbedBatch(ctx, texts) // Retry // } // } // // Example 5 - Multilingual with Azure OpenAI: // // config := &embed.Config{ // Provider: "openai", // APIURL: "https://your-resource.openai.azure.com", // APIPath: "/openai/deployments/embedding/embeddings?api-version=2023-05-15", // APIKey: azureAPIKey, // Model: "text-embedding-ada-002", // Dimensions: 1536, // } // // embedder := embed.NewOpenAI(config) // // // Works with multiple languages // embeddings, _ := embedder.EmbedBatch(ctx, []string{ // "Hello world", // English // "Bonjour le monde", // French // "Hola mundo", // Spanish // "こんにちは世界", // Japanese // }) // // ELI12: // // NewOpenAI is like hiring a professional translator from a famous company: // // - OpenAI is like Google Translate (but specifically for AI) // - You send text to their servers // - They send back numbers that represent the "meaning" // - You pay a tiny amount per text ($0.02 per 1 million words!) // // Why use OpenAI instead of Ollama? // - QUALITY: Often more accurate // - CONVENIENCE: No setup required // - LANGUAGES: Supports 100+ languages well // - UPDATES: Always latest models // // BUT: // - COST: You pay for each request (though it's cheap) // - PRIVACY: Your text goes to OpenAI servers // - INTERNET: Needs internet connection // - RATE LIMITS: Max requests per minute // // Models & Pricing (2024): // // text-embedding-3-small: // - 1536 dimensions // - $0.02 per 1M tokens (~750k words) // - Best for: Cost-sensitive applications // // text-embedding-3-large: // - 3072 dimensions (can truncate to 256-3072) // - $0.13 per 1M tokens // - Best for: Maximum quality // // text-embedding-ada-002 (legacy): // - 1536 dimensions // - $0.10 per 1M tokens // - Still works but use v3 instead // // Rate Limits: // - Free tier: 3 RPM (requests per minute) // - Paid tier 1: 3,000 RPM // - Paid tier 2+: 5,000+ RPM // // Use Cases: // - Same as Ollama: similarity search, recommendations, etc. // - Multilingual applications // - When you need the absolute best quality // - When you don't want to run your own infrastructure // // Performance: // - ~100-300ms per request (network latency) // - Batch up to 2048 texts per request // - Use batch processing to reduce costs // // Thread Safety: // Safe to call from multiple goroutines. func NewOpenAI(config *Config) *OpenAIEmbedder { if config == nil { config = DefaultOpenAIConfig("") } return &OpenAIEmbedder{ config: config, client: &http.Client{ Timeout: config.Timeout, }, } } // openaiRequest is the request format for OpenAI. type openaiRequest struct { Model string `json:"model"` Input []string `json:"input"` } // openaiResponse is the response format from OpenAI. type openaiResponse struct { Data []struct { Embedding []float32 `json:"embedding"` Index int `json:"index"` } `json:"data"` } // Embed generates a vector embedding for a single text string. // // Internally calls EmbedBatch with a single-element slice for API consistency. // // Example: // // apiKey := os.Getenv("OPENAI_API_KEY") // embedder := embed.NewOpenAI(embed.DefaultOpenAIConfig(apiKey)) // // vec, err := embedder.Embed(ctx, "artificial intelligence") // if err != nil { // log.Fatal(err) // } // // fmt.Printf("Generated %d-dimensional OpenAI embedding\n", len(vec)) // // Output: Generated 1536-dimensional OpenAI embedding // // Returns the embedding vector, or an error if the API request fails. func (e *OpenAIEmbedder) Embed(ctx context.Context, text string) ([]float32, error) { embeddings, err := e.EmbedBatch(ctx, []string{text}) if err != nil { return nil, err } if len(embeddings) == 0 { return nil, fmt.Errorf("no embedding returned") } return embeddings[0], nil } // EmbedBatch generates embeddings for multiple texts in a single API call. // // OpenAI's API supports true batch processing, making this more efficient // than calling Embed() multiple times. // // Maximum batch size: 2048 texts per request. // // Example: // // apiKey := os.Getenv("OPENAI_API_KEY") // embedder := embed.NewOpenAI(embed.DefaultOpenAIConfig(apiKey)) // // texts := []string{ // "First document about machine learning", // "Second document about neural networks", // "Third document about deep learning", // } // // vecs, err := embedder.EmbedBatch(ctx, texts) // if err != nil { // log.Fatal(err) // } // // for i, vec := range vecs { // fmt.Printf("Document %d: %d dimensions\n", i+1, len(vec)) // } // // Returns a slice of embeddings (one per input text), or an error if the API request fails. func (e *OpenAIEmbedder) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) { req := openaiRequest{ Model: e.config.Model, Input: texts, } body, err := json.Marshal(req) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } url := e.config.APIURL + e.config.APIPath httpReq, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("Authorization", "Bearer "+e.config.APIKey) resp, err := e.client.Do(httpReq) if err != nil { return nil, fmt.Errorf("failed to send request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) return nil, fmt.Errorf("openai returned %d: %s", resp.StatusCode, string(bodyBytes)) } var openaiResp openaiResponse if err := json.NewDecoder(resp.Body).Decode(&openaiResp); err != nil { return nil, fmt.Errorf("failed to decode response: %w", err) } results := make([][]float32, len(openaiResp.Data)) for _, data := range openaiResp.Data { results[data.Index] = data.Embedding } return results, nil } // Dimensions returns the expected embedding dimensions. func (e *OpenAIEmbedder) Dimensions() int { return e.config.Dimensions } // Model returns the model name. func (e *OpenAIEmbedder) Model() string { return e.config.Model } // NewEmbedder creates an embedder based on the provider specified in config. // // Supported providers: // - "local": Local GGUF models via llama.cpp (GPU-accelerated) // - "ollama": External Ollama server // - "openai": OpenAI cloud API // // This is a convenience function for dynamic provider selection. // // Example: // // // Dynamic provider selection // provider := os.Getenv("EMBEDDING_PROVIDER") // "local", "ollama", or "openai" // // var config *embed.Config // switch provider { // case "local": // config = &embed.Config{ // Provider: "local", // Model: "bge-m3", // Dimensions: 1024, // } // case "openai": // apiKey := os.Getenv("OPENAI_API_KEY") // config = embed.DefaultOpenAIConfig(apiKey) // default: // config = embed.DefaultOllamaConfig() // } // // embedder, err := embed.NewEmbedder(config) // if err != nil { // log.Fatal(err) // } // // // Use embedder regardless of provider // vec, err := embedder.Embed(ctx, "test") // // Returns an Embedder interface, or an error if the provider is unknown or // configuration is invalid (e.g., OpenAI without API key, local without model). func NewEmbedder(config *Config) (Embedder, error) { switch config.Provider { case "local": return NewLocalGGUF(config) case "ollama": return NewOllama(config), nil case "openai": if config.APIKey == "" { return nil, fmt.Errorf("OpenAI requires an API key") } return NewOpenAI(config), nil default: return nil, fmt.Errorf("unknown provider: %s (supported: local, ollama, openai)", config.Provider) } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server