M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

Overview Schema Related Servers Score Discussions

cached_embedder.go•6.63 KiB

// Package embed provides embedding generation with caching support. // // CachedEmbedder wraps any Embedder with an LRU cache to avoid redundant // embedding computations. This provides significant performance improvements // for repeated queries without any changes to existing code. // // Example: // // // Wrap any embedder with caching // base := embed.NewOllama(nil) // cached := embed.NewCachedEmbedder(base, 10000) // Cache 10K embeddings // // // Use exactly like the original - caching is transparent // vec, err := cached.Embed(ctx, "hello world") // vec2, err := cached.Embed(ctx, "hello world") // Cache hit! // // Performance: // - Cache hit: ~1µs (vs 50-200ms for actual embedding) // - Memory: ~4KB per cached embedding (1024 dims × 4 bytes) // - 10K cache = ~40MB memory package embed import ( "container/list" "context" "hash/fnv" "strconv" "sync" "sync/atomic" ) // CachedEmbedder wraps an Embedder with LRU caching. // // The cache is keyed by FNV-1a hash of the input text, providing: // - Exact match caching (same text = same embedding) // - Efficient lookup (O(1) for cache hits) // - Bounded memory usage (LRU eviction) // - Fast hashing (FNV-1a is non-cryptographic but fast) // // Thread-safe: All methods can be called from multiple goroutines. type CachedEmbedder struct { base Embedder mu sync.RWMutex cache map[string]*list.Element lru *list.List maxSize int // Statistics hits uint64 misses uint64 } // cacheEntry holds a cached embedding with its key type cacheEntry struct { key string embedding []float32 } // NewCachedEmbedder wraps an existing embedder with LRU caching. // // Parameters: // - base: The underlying embedder to wrap // - maxSize: Maximum number of embeddings to cache (0 = 10000 default) // // Example: // // // Wrap Ollama with 10K cache // ollama := embed.NewOllama(nil) // cached := embed.NewCachedEmbedder(ollama, 10000) // // // Or use default cache size // cached = embed.NewCachedEmbedder(ollama, 0) func NewCachedEmbedder(base Embedder, maxSize int) *CachedEmbedder { if maxSize <= 0 { maxSize = 10000 // Default: 10K embeddings (~40MB for 1024-dim) } return &CachedEmbedder{ base: base, cache: make(map[string]*list.Element, maxSize), lru: list.New(), maxSize: maxSize, } } // hashText creates a cache key from text content using FNV-1a. // FNV-1a is a fast non-cryptographic hash suitable for cache keys. func hashText(text string) string { h := fnv.New64a() h.Write([]byte(text)) return strconv.FormatUint(h.Sum64(), 36) } // Embed generates or retrieves a cached embedding for the text. // // On cache hit, returns immediately without calling the underlying embedder. // On cache miss, calls the base embedder and caches the result. func (c *CachedEmbedder) Embed(ctx context.Context, text string) ([]float32, error) { key := hashText(text) // Check cache first (read lock) c.mu.RLock() if elem, ok := c.cache[key]; ok { c.mu.RUnlock() atomic.AddUint64(&c.hits, 1) // Move to front (promote in LRU) c.mu.Lock() c.lru.MoveToFront(elem) entry := elem.Value.(*cacheEntry) c.mu.Unlock() return entry.embedding, nil } c.mu.RUnlock() atomic.AddUint64(&c.misses, 1) // Cache miss - generate embedding embedding, err := c.base.Embed(ctx, text) if err != nil { return nil, err } // Add to cache c.mu.Lock() defer c.mu.Unlock() // Double-check (another goroutine might have added it) if elem, ok := c.cache[key]; ok { c.lru.MoveToFront(elem) return elem.Value.(*cacheEntry).embedding, nil } // Evict if at capacity for c.lru.Len() >= c.maxSize { c.evictOldest() } // Add new entry entry := &cacheEntry{key: key, embedding: embedding} elem := c.lru.PushFront(entry) c.cache[key] = elem return embedding, nil } // EmbedBatch generates embeddings for multiple texts with caching. // // Each text is checked against the cache individually. Only cache misses // are sent to the underlying embedder. func (c *CachedEmbedder) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) { results := make([][]float32, len(texts)) var misses []int var missTexts []string // Check cache for each text for i, text := range texts { key := hashText(text) c.mu.RLock() if elem, ok := c.cache[key]; ok { entry := elem.Value.(*cacheEntry) results[i] = entry.embedding atomic.AddUint64(&c.hits, 1) c.mu.RUnlock() // Promote in LRU c.mu.Lock() c.lru.MoveToFront(elem) c.mu.Unlock() } else { c.mu.RUnlock() atomic.AddUint64(&c.misses, 1) misses = append(misses, i) missTexts = append(missTexts, text) } } // Generate embeddings for cache misses if len(missTexts) > 0 { embeddings, err := c.base.EmbedBatch(ctx, missTexts) if err != nil { return nil, err } // Add to results and cache c.mu.Lock() for j, embedding := range embeddings { i := misses[j] results[i] = embedding // Cache the result key := hashText(missTexts[j]) if _, ok := c.cache[key]; !ok { for c.lru.Len() >= c.maxSize { c.evictOldest() } entry := &cacheEntry{key: key, embedding: embedding} elem := c.lru.PushFront(entry) c.cache[key] = elem } } c.mu.Unlock() } return results, nil } // Dimensions returns the embedding vector dimension. func (c *CachedEmbedder) Dimensions() int { return c.base.Dimensions() } // Model returns the model name. func (c *CachedEmbedder) Model() string { return c.base.Model() } // Stats returns cache statistics. func (c *CachedEmbedder) Stats() CacheStats { hits := atomic.LoadUint64(&c.hits) misses := atomic.LoadUint64(&c.misses) c.mu.RLock() size := c.lru.Len() c.mu.RUnlock() total := hits + misses var hitRate float64 if total > 0 { hitRate = float64(hits) / float64(total) * 100 } return CacheStats{ Size: size, MaxSize: c.maxSize, Hits: hits, Misses: misses, HitRate: hitRate, } } // CacheStats holds cache performance statistics. type CacheStats struct { Size int `json:"size"` // Current number of cached embeddings MaxSize int `json:"max_size"` // Maximum cache capacity Hits uint64 `json:"hits"` // Number of cache hits Misses uint64 `json:"misses"` // Number of cache misses HitRate float64 `json:"hit_rate"` // Hit rate percentage (0-100) } // Clear removes all cached embeddings. func (c *CachedEmbedder) Clear() { c.mu.Lock() defer c.mu.Unlock() c.cache = make(map[string]*list.Element, c.maxSize) c.lru.Init() } // evictOldest removes the least recently used entry. // Caller must hold the write lock. func (c *CachedEmbedder) evictOldest() { elem := c.lru.Back() if elem != nil { entry := elem.Value.(*cacheEntry) delete(c.cache, entry.key) c.lru.Remove(elem) } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

cached_embedder.go•6.63 KiB