Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
search.go41.6 kB
// Package search provides unified hybrid search with Reciprocal Rank Fusion (RRF). // // This package implements the same hybrid search approach used by production systems // like Azure AI Search, Elasticsearch, and Weaviate: combining vector similarity search // with BM25 full-text search using Reciprocal Rank Fusion. // // Search Capabilities: // - Vector similarity search (cosine similarity with HNSW index) // - BM25 full-text search (keyword matching with TF-IDF) // - RRF hybrid search (fuses vector + BM25 results) // - Adaptive weighting based on query characteristics // - Automatic fallback when one method fails // // Example Usage: // // // Create search service // svc := search.NewService(storageEngine) // // // Build indexes from existing nodes // if err := svc.BuildIndexes(ctx); err != nil { // log.Fatal(err) // } // // // Perform hybrid search // query := "machine learning algorithms" // embedding := embedder.Embed(ctx, query) // Get from embed package // opts := search.DefaultSearchOptions() // // response, err := svc.Search(ctx, query, embedding, opts) // if err != nil { // log.Fatal(err) // } // // for _, result := range response.Results { // fmt.Printf("[%.3f] %s\n", result.RRFScore, result.Title) // } // // How RRF Works: // // RRF (Reciprocal Rank Fusion) combines rankings from multiple search methods. // Instead of merging scores directly (which can be incomparable), RRF uses rank // positions to create a unified ranking. // // Formula: RRF_score = Σ (weight / (k + rank)) // // Where: // - k is a constant (typically 60) to reduce the impact of high ranks // - rank is the position in the result list (1-indexed) // - weight allows emphasizing one method over another // // Example: A document ranked #1 in vector search and #3 in BM25: // // RRF = (1.0 / (60 + 1)) + (1.0 / (60 + 3)) // = (1.0 / 61) + (1.0 / 63) // = 0.0164 + 0.0159 // = 0.0323 // // Documents that appear in both result sets get boosted scores. // // ELI12 (Explain Like I'm 12): // // Imagine two friends ranking pizza places: // - Friend A (vector search) ranks by taste similarity to your favorite // - Friend B (BM25) ranks by matching your description "spicy pepperoni" // // They might disagree! Friend A says place X is #1 (tastes similar), while // Friend B says it's #5 (doesn't match keywords well). // // RRF solves this by: // 1. If a place appears in BOTH lists, it gets bonus points // 2. Higher ranks (being at the top) give more points // 3. The magic number 60 prevents #1 from completely dominating // // This way, a place that's #2 in both lists beats a place that's #1 in one // but missing from the other! package search import ( "context" "fmt" "log" "math" "sort" "strings" "sync" "time" "github.com/orneryd/nornicdb/pkg/gpu" "github.com/orneryd/nornicdb/pkg/math/vector" "github.com/orneryd/nornicdb/pkg/storage" ) // SearchableProperties defines PRIORITY properties for full-text search ranking. // These properties are indexed first for better BM25 ranking. // Note: ALL node properties are indexed, but these get priority weighting. // These match Mimir's Neo4j fulltext index configuration. var SearchableProperties = []string{ "content", "text", "title", "name", "description", "path", "workerRole", "requirements", } // SearchResult represents a unified search result. type SearchResult struct { ID string `json:"id"` NodeID storage.NodeID `json:"nodeId"` Type string `json:"type"` Labels []string `json:"labels"` Title string `json:"title,omitempty"` Description string `json:"description,omitempty"` ContentPreview string `json:"content_preview,omitempty"` Properties map[string]any `json:"properties,omitempty"` // Scoring Score float64 `json:"score"` Similarity float64 `json:"similarity,omitempty"` // RRF metadata RRFScore float64 `json:"rrf_score,omitempty"` VectorRank int `json:"vector_rank,omitempty"` BM25Rank int `json:"bm25_rank,omitempty"` } // SearchResponse is the response from a search operation. type SearchResponse struct { Status string `json:"status"` Query string `json:"query"` Results []SearchResult `json:"results"` TotalCandidates int `json:"total_candidates"` Returned int `json:"returned"` SearchMethod string `json:"search_method"` FallbackTriggered bool `json:"fallback_triggered"` Message string `json:"message,omitempty"` Metrics *SearchMetrics `json:"metrics,omitempty"` } // SearchMetrics contains timing and statistics. type SearchMetrics struct { VectorSearchTimeMs int `json:"vector_search_time_ms"` BM25SearchTimeMs int `json:"bm25_search_time_ms"` FusionTimeMs int `json:"fusion_time_ms"` TotalTimeMs int `json:"total_time_ms"` VectorCandidates int `json:"vector_candidates"` BM25Candidates int `json:"bm25_candidates"` FusedCandidates int `json:"fused_candidates"` } // SearchOptions configures the search behavior. type SearchOptions struct { // Limit is the maximum number of results to return Limit int // MinSimilarity is the minimum similarity threshold for vector search MinSimilarity float64 // Types filters results by node type (labels) Types []string // RRF configuration RRFK float64 // RRF constant (default: 60) VectorWeight float64 // Weight for vector results (default: 1.0) BM25Weight float64 // Weight for BM25 results (default: 1.0) MinRRFScore float64 // Minimum RRF score threshold (default: 0.01) // MMR (Maximal Marginal Relevance) diversification // When enabled, results are re-ranked to balance relevance with diversity MMREnabled bool // Enable MMR diversification (default: false) MMRLambda float64 // Balance: 1.0 = pure relevance, 0.0 = pure diversity (default: 0.7) // Cross-encoder reranking (Stage 2) // When enabled, top candidates are re-scored using a cross-encoder model // for higher accuracy at the cost of latency RerankEnabled bool // Enable cross-encoder reranking (default: false) RerankTopK int // How many candidates to rerank (default: 100) RerankMinScore float64 // Minimum cross-encoder score to include (default: 0) } // DefaultSearchOptions returns sensible defaults. func DefaultSearchOptions() *SearchOptions { return &SearchOptions{ Limit: 50, MinSimilarity: 0.5, RRFK: 60, VectorWeight: 1.0, BM25Weight: 1.0, MinRRFScore: 0.01, MMREnabled: false, MMRLambda: 0.7, // Balanced: 70% relevance, 30% diversity RerankEnabled: false, RerankTopK: 100, RerankMinScore: 0.0, } } // Service provides unified hybrid search with automatic index management. // // The Service maintains: // - Vector index (HNSW for fast approximate nearest neighbor search) // - Full-text index (BM25 with inverted index) // - Connection to storage engine for node data enrichment // // Thread-safe: Multiple goroutines can call Search() concurrently. // // Example: // // svc := search.NewService(engine) // defer svc.Close() // // // Index existing data // if err := svc.BuildIndexes(ctx); err != nil { // log.Fatal(err) // } // // // Index new nodes as they're created // node := &storage.Node{...} // if err := svc.IndexNode(node); err != nil { // log.Printf("Failed to index: %v", err) // } type Service struct { engine storage.Engine vectorIndex *VectorIndex fulltextIndex *FulltextIndex crossEncoder *CrossEncoder mu sync.RWMutex // GPU k-means clustering for accelerated search (optional) clusterIndex *gpu.ClusterIndex clusterEnabled bool } // NewService creates a new search Service with empty indexes. // // The service is created with: // - 1024-dimensional vector index (default for mxbai-embed-large) // - Empty full-text index // - Reference to storage engine for data enrichment // // Call BuildIndexes() after creation to populate indexes from existing data. // // Example: // // engine, _ := storage.NewMemoryEngine() // svc := search.NewService(engine) // // // Build indexes from all nodes // if err := svc.BuildIndexes(context.Background()); err != nil { // log.Fatal(err) // } // // Returns a new Service ready for indexing and searching. // // Example 1 - Basic Setup: // // engine := storage.NewMemoryEngine() // svc := search.NewService(engine) // defer svc.Close() // // // Build indexes from existing nodes // if err := svc.BuildIndexes(ctx); err != nil { // log.Fatal(err) // } // // // Now ready to search // results, _ := svc.Search(ctx, "machine learning", nil, nil) // // Example 2 - With Embedder Integration: // // engine := storage.NewBadgerEngine("./data") // svc := search.NewService(engine) // // // Create embedder // embedder := embed.NewOllama(embed.DefaultOllamaConfig()) // // // Index documents with embeddings // for _, doc := range documents { // node := &storage.Node{ // ID: storage.NodeID(doc.ID), // Labels: []string{"Document"}, // Properties: map[string]any{ // "title": doc.Title, // "content": doc.Content, // }, // Embedding: embedder.Embed(ctx, doc.Content), // } // engine.CreateNode(node) // svc.IndexNode(node) // } // // Example 3 - Real-time Indexing: // // svc := search.NewService(engine) // // // Index as nodes are created // onCreate := func(node *storage.Node) { // if err := svc.IndexNode(node); err != nil { // log.Printf("Index failed: %v", err) // } // } // // // Hook into storage engine // engine.OnNodeCreate(onCreate) // // ELI12: // // Think of NewService like building a library with two special catalogs: // 1. A "similarity catalog" (vector index) - finds books that are LIKE what you want // 2. A "keyword catalog" (fulltext index) - finds books with specific words // // When you search, the library assistant checks BOTH catalogs and shows you // the books that appear in both lists first. That's hybrid search! // // Performance: // - Vector index: HNSW algorithm, O(log n) search // - Fulltext index: Inverted index, O(k + m) where k = unique terms, m = matches // - Memory: ~4KB per 1000-dim embedding + ~500 bytes per document // // Thread Safety: // // Safe for concurrent searches from multiple goroutines. func NewService(engine storage.Engine) *Service { return &Service{ engine: engine, vectorIndex: NewVectorIndex(1024), // Default to 1024 dimensions (mxbai-embed-large) fulltextIndex: NewFulltextIndex(), } } // EnableClustering enables GPU k-means clustering for accelerated vector search. // This provides 10-50x speedup on large datasets (10K+ embeddings). // // Parameters: // - gpuManager: GPU manager for acceleration (can be nil for CPU-only) // - numClusters: Number of k-means clusters (0 for auto based on dataset size) // // Call this BEFORE BuildIndexes(), then call TriggerClustering() after indexing. // // Example: // // gpuManager, _ := gpu.NewManager(nil) // svc.EnableClustering(gpuManager, 100) // 100 clusters // svc.BuildIndexes(ctx) // svc.TriggerClustering() // Run k-means func (s *Service) EnableClustering(gpuManager *gpu.Manager, numClusters int) { s.mu.Lock() defer s.mu.Unlock() if numClusters <= 0 { numClusters = 100 // Default } kmeansConfig := &gpu.KMeansConfig{ NumClusters: numClusters, MaxIterations: 50, Tolerance: 0.001, InitMethod: "kmeans++", } embConfig := gpu.DefaultEmbeddingIndexConfig(1024) // Match vector index dimensions s.clusterIndex = gpu.NewClusterIndex(gpuManager, embConfig, kmeansConfig) s.clusterEnabled = true mode := "CPU" if gpuManager != nil { mode = "GPU" } log.Printf("[K-MEANS] ✅ Clustering ENABLED | mode=%s clusters=%d max_iter=%d init=%s", mode, numClusters, kmeansConfig.MaxIterations, kmeansConfig.InitMethod) } // MinEmbeddingsForClustering is the minimum number of embeddings needed // before k-means clustering provides any benefit. Below this threshold, // brute-force search is faster than cluster overhead. const MinEmbeddingsForClustering = 1000 // TriggerClustering runs k-means clustering on all indexed embeddings. // Call this after BuildIndexes() completes to organize embeddings into clusters. // // Returns nil (not error) if there are too few embeddings - clustering will // be skipped silently as brute-force search is faster for small datasets. // Returns error only if clustering is not enabled or fails unexpectedly. func (s *Service) TriggerClustering() error { s.mu.Lock() defer s.mu.Unlock() if s.clusterIndex == nil { log.Printf("[K-MEANS] ❌ SKIPPED | reason=not_enabled") return fmt.Errorf("clustering not enabled - call EnableClustering() first") } embeddingCount := s.clusterIndex.Count() // Skip clustering if too few embeddings - not worth the overhead if embeddingCount < MinEmbeddingsForClustering { log.Printf("[K-MEANS] ⏭️ SKIPPED | embeddings=%d threshold=%d reason=too_few_embeddings", embeddingCount, MinEmbeddingsForClustering) return nil } log.Printf("[K-MEANS] 🔄 STARTING | embeddings=%d", embeddingCount) startTime := time.Now() if err := s.clusterIndex.Cluster(); err != nil { log.Printf("[K-MEANS] ❌ FAILED | embeddings=%d error=%v", embeddingCount, err) return fmt.Errorf("clustering failed: %w", err) } elapsed := time.Since(startTime) stats := s.clusterIndex.ClusterStats() log.Printf("[K-MEANS] ✅ COMPLETE | clusters=%d embeddings=%d iterations=%d duration=%v avg_cluster_size=%.1f", stats.NumClusters, stats.EmbeddingCount, stats.Iterations, elapsed, stats.AvgClusterSize) return nil } // IsClusteringEnabled returns true if GPU clustering is enabled. func (s *Service) IsClusteringEnabled() bool { s.mu.RLock() defer s.mu.RUnlock() return s.clusterEnabled && s.clusterIndex != nil } // ClusterStats returns k-means clustering statistics. func (s *Service) ClusterStats() *gpu.ClusterStats { s.mu.RLock() defer s.mu.RUnlock() if s.clusterIndex == nil { return nil } stats := s.clusterIndex.ClusterStats() return &stats } // EmbeddingCount returns the total number of nodes with embeddings in the vector index. func (s *Service) EmbeddingCount() int { s.mu.RLock() defer s.mu.RUnlock() if s.vectorIndex == nil { return 0 } return s.vectorIndex.Count() } // IndexNode adds a node to all search indexes. func (s *Service) IndexNode(node *storage.Node) error { s.mu.Lock() defer s.mu.Unlock() // Add to vector index if node has embedding if len(node.Embedding) > 0 { // DEBUG: Print embedding info // fmt.Printf("DEBUG: IndexNode %s has %d-dim embedding\n", node.ID, len(node.Embedding)) if err := s.vectorIndex.Add(string(node.ID), node.Embedding); err != nil { return err } // Also add to cluster index if enabled if s.clusterIndex != nil { if err := s.clusterIndex.Add(string(node.ID), node.Embedding); err != nil { // Log but don't fail - cluster index is optional log.Printf("Warning: failed to add to cluster index: %v", err) } } } // Add to fulltext index text := s.extractSearchableText(node) if text != "" { s.fulltextIndex.Index(string(node.ID), text) } return nil } // RemoveNode removes a node from all search indexes. func (s *Service) RemoveNode(nodeID storage.NodeID) error { s.mu.Lock() defer s.mu.Unlock() s.vectorIndex.Remove(string(nodeID)) s.fulltextIndex.Remove(string(nodeID)) return nil } // NodeIterator is an interface for streaming node iteration. type NodeIterator interface { IterateNodes(fn func(*storage.Node) bool) error } // BuildIndexes builds search indexes from all nodes in the engine. // Prefers streaming iteration to avoid loading all nodes into memory. func (s *Service) BuildIndexes(ctx context.Context) error { // Try streaming iterator first (memory efficient) if iterator, ok := s.engine.(NodeIterator); ok { count := 0 err := iterator.IterateNodes(func(node *storage.Node) bool { select { case <-ctx.Done(): return false // Stop iteration default: _ = s.IndexNode(node) count++ if count%100 == 0 { fmt.Printf("📊 Indexed %d nodes...\n", count) } return true // Continue } }) if err != nil { return err } fmt.Printf("📊 Indexed %d total nodes\n", count) return nil } // Use streaming fallback with chunked processing count := 0 err := storage.StreamNodesWithFallback(ctx, s.engine, 1000, func(node *storage.Node) error { if err := s.IndexNode(node); err != nil { return nil // Continue on indexing errors } count++ if count%100 == 0 { fmt.Printf("📊 Indexed %d nodes...\n", count) } return nil }) if err != nil { return err } fmt.Printf("📊 Indexed %d total nodes\n", count) return nil } // Search performs hybrid search with automatic fallback. // // Search strategy: // 1. Try RRF hybrid search (vector + BM25) if embedding provided // 2. Fall back to vector-only if RRF returns no results // 3. Fall back to BM25-only if vector search fails or no embedding // // This ensures you always get results even if one index is empty or fails. // // Parameters: // - ctx: Context for cancellation // - query: Text query for BM25 search // - embedding: Vector embedding for similarity search (can be nil) // - opts: Search options (use DefaultSearchOptions() if unsure) // // Example: // // svc := search.NewService(engine) // // // Hybrid search (best results) // query := "graph database memory" // embedding, _ := embedder.Embed(ctx, query) // opts := search.DefaultSearchOptions() // opts.Limit = 10 // // resp, err := svc.Search(ctx, query, embedding, opts) // if err != nil { // return err // } // // fmt.Printf("Found %d results using %s\n", // resp.Returned, resp.SearchMethod) // // for i, result := range resp.Results { // fmt.Printf("%d. [RRF: %.4f] %s\n", // i+1, result.RRFScore, result.Title) // fmt.Printf(" Vector rank: #%d, BM25 rank: #%d\n", // result.VectorRank, result.BM25Rank) // } // // Returns a SearchResponse with ranked results and metadata about the search method used. func (s *Service) Search(ctx context.Context, query string, embedding []float32, opts *SearchOptions) (*SearchResponse, error) { if opts == nil { opts = DefaultSearchOptions() } // If no embedding provided, fall back to full-text only if len(embedding) == 0 { return s.fullTextSearchOnly(ctx, query, opts) } // Try RRF hybrid search response, err := s.rrfHybridSearch(ctx, query, embedding, opts) if err == nil && len(response.Results) > 0 { return response, nil } // Fallback to vector-only response, err = s.vectorSearchOnly(ctx, embedding, opts) if err == nil && len(response.Results) > 0 { response.FallbackTriggered = true response.Message = "RRF search returned no results, fell back to vector search" return response, nil } // Final fallback to full-text return s.fullTextSearchOnly(ctx, query, opts) } // rrfHybridSearch performs Reciprocal Rank Fusion combining vector and BM25 results. func (s *Service) rrfHybridSearch(ctx context.Context, query string, embedding []float32, opts *SearchOptions) (*SearchResponse, error) { s.mu.RLock() defer s.mu.RUnlock() // Get more candidates for better fusion candidateLimit := opts.Limit * 2 // Step 1: Vector search vectorResults, err := s.vectorIndex.Search(ctx, embedding, candidateLimit, opts.MinSimilarity) if err != nil { return nil, err } // Step 2: BM25 full-text search bm25Results := s.fulltextIndex.Search(query, candidateLimit) // Step 3: Filter by type if specified if len(opts.Types) > 0 { vectorResults = s.filterByType(vectorResults, opts.Types) bm25Results = s.filterByType(bm25Results, opts.Types) } // Step 4: Fuse with RRF fusedResults := s.fuseRRF(vectorResults, bm25Results, opts) // Step 5: Apply MMR diversification if enabled searchMethod := "rrf_hybrid" message := "Reciprocal Rank Fusion (Vector + BM25)" if opts.MMREnabled && len(embedding) > 0 { fusedResults = s.applyMMR(fusedResults, embedding, opts.Limit, opts.MMRLambda) searchMethod = "rrf_hybrid+mmr" message = fmt.Sprintf("RRF + MMR diversification (λ=%.2f)", opts.MMRLambda) } // Step 6: Cross-encoder reranking (optional Stage 2) if opts.RerankEnabled && s.crossEncoder != nil && s.crossEncoder.Config().Enabled { fusedResults = s.applyCrossEncoderRerank(ctx, query, fusedResults, opts) if searchMethod == "rrf_hybrid" { searchMethod = "rrf_hybrid+rerank" message = "RRF + Cross-Encoder Reranking" } else { searchMethod += "+rerank" message += " + Cross-Encoder Reranking" } } // Step 7: Convert to SearchResult and enrich with node data results := s.enrichResults(fusedResults, opts.Limit) return &SearchResponse{ Status: "success", Query: query, Results: results, TotalCandidates: len(fusedResults), Returned: len(results), SearchMethod: searchMethod, Message: message, Metrics: &SearchMetrics{ VectorCandidates: len(vectorResults), BM25Candidates: len(bm25Results), FusedCandidates: len(fusedResults), }, }, nil } // fuseRRF implements the Reciprocal Rank Fusion (RRF) algorithm. // // RRF combines multiple ranked lists without requiring score normalization. // Each ranking method votes for documents using their rank positions. // // Formula: RRF_score(doc) = Σ (weight_i / (k + rank_i)) // // Where: // - k = constant (default 60) to smooth rank differences // - rank_i = position in list i (1-indexed: 1st place = rank 1) // - weight_i = importance weight for list i (default 1.0) // // Why k=60? // - From research by Cormack et al. (2009) // - Balances between giving too much weight to top results vs treating all ranks equally // - k=60 means rank #1 gets score 1/61=0.016, rank #2 gets 1/62=0.016 // - Difference is small, but rank #1 is still slightly better // // Example calculation: // // Document appears in: // - Vector results at rank #2 // - BM25 results at rank #5 // // RRF_score = (1.0 / (60 + 2)) + (1.0 / (60 + 5)) // = (1.0 / 62) + (1.0 / 65) // = 0.01613 + 0.01538 // = 0.03151 // // Document only in vector at rank #1: // RRF_score = (1.0 / (60 + 1)) + 0 // = 0.01639 // // First document wins! Being in both lists beats being #1 in just one. // // ELI12: // // Think of it like American Idol with two judges: // - Judge A ranks singers by vocal technique // - Judge B ranks by stage presence // // A singer ranked #2 by both judges should beat one ranked #1 by only one judge. // RRF does this math automatically! // // Reference: Cormack, Clarke & Buettcher (2009) // "Reciprocal Rank Fusion outperforms the best known automatic evaluation // measures in combining results from multiple text retrieval systems." func (s *Service) fuseRRF(vectorResults, bm25Results []indexResult, opts *SearchOptions) []rrfResult { // Create rank maps (1-indexed per RRF formula) vectorRanks := make(map[string]int) for i, r := range vectorResults { vectorRanks[r.ID] = i + 1 } bm25Ranks := make(map[string]int) for i, r := range bm25Results { bm25Ranks[r.ID] = i + 1 } // Get all unique document IDs allIDs := make(map[string]struct{}) for _, r := range vectorResults { allIDs[r.ID] = struct{}{} } for _, r := range bm25Results { allIDs[r.ID] = struct{}{} } // Calculate RRF scores var results []rrfResult k := opts.RRFK if k == 0 { k = 60 // Default } for id := range allIDs { var vectorComponent, bm25Component float64 if rank, ok := vectorRanks[id]; ok { vectorComponent = opts.VectorWeight / (k + float64(rank)) } if rank, ok := bm25Ranks[id]; ok { bm25Component = opts.BM25Weight / (k + float64(rank)) } rrfScore := vectorComponent + bm25Component // Skip below threshold if rrfScore < opts.MinRRFScore { continue } // Get original score (prefer vector if available) var originalScore float64 if idx := findResultIndex(vectorResults, id); idx >= 0 { originalScore = vectorResults[idx].Score } else if idx := findResultIndex(bm25Results, id); idx >= 0 { originalScore = bm25Results[idx].Score } results = append(results, rrfResult{ ID: id, RRFScore: rrfScore, VectorRank: vectorRanks[id], BM25Rank: bm25Ranks[id], OriginalScore: originalScore, }) } // Sort by RRF score descending sort.Slice(results, func(i, j int) bool { return results[i].RRFScore > results[j].RRFScore }) return results } // applyMMR applies Maximal Marginal Relevance diversification to search results. // // MMR re-ranks results to balance relevance with diversity, preventing redundant // results that are too similar to each other. // // Formula: MMR(d) = λ * Sim(d, query) - (1-λ) * max(Sim(d, d_i)) // // Where: // - λ (lambda) controls relevance vs diversity balance (0.0 to 1.0) // - λ = 1.0: Pure relevance (no diversity) // - λ = 0.0: Pure diversity (ignore relevance) // - λ = 0.7: Balanced (default, 70% relevance, 30% diversity) // - Sim(d, query) = similarity to the query (RRF score) // - max(Sim(d, d_i)) = max similarity to already selected results // // Algorithm: // 1. Select the most relevant document first // 2. For each remaining position: // - Calculate MMR score for all remaining docs // - Select doc with highest MMR (balancing relevance + diversity) // 3. Repeat until limit reached // // ELI12: // // Imagine picking a playlist from your library. You don't want 5 songs that // all sound the same! MMR is like saying: // - "I want songs I like (relevance)" // - "But also songs that are different from what I already picked (diversity)" // // Lambda controls how much you care about variety vs. your favorites. // // Reference: Carbonell & Goldstein (1998) // "The Use of MMR, Diversity-Based Reranking for Reordering Documents // and Producing Summaries" func (s *Service) applyMMR(results []rrfResult, queryEmbedding []float32, limit int, lambda float64) []rrfResult { if len(results) <= 1 || lambda >= 1.0 { // No diversification needed return results } // Get embeddings for all candidate documents type docWithEmbed struct { result rrfResult embedding []float32 } candidates := make([]docWithEmbed, 0, len(results)) for _, r := range results { // Get embedding from storage node, err := s.engine.GetNode(storage.NodeID(r.ID)) if err != nil || node == nil || len(node.Embedding) == 0 { // No embedding - use original score only candidates = append(candidates, docWithEmbed{ result: r, embedding: nil, }) } else { candidates = append(candidates, docWithEmbed{ result: r, embedding: node.Embedding, }) } } // MMR selection selected := make([]rrfResult, 0, limit) remaining := candidates for len(selected) < limit && len(remaining) > 0 { bestIdx := -1 bestMMR := math.Inf(-1) for i, cand := range remaining { // Relevance component: similarity to query (using RRF score as proxy) relevance := cand.result.RRFScore // Diversity component: max similarity to already selected docs maxSimToSelected := 0.0 if cand.embedding != nil && len(selected) > 0 { for _, sel := range selected { // Find embedding for selected doc for _, c := range candidates { if c.result.ID == sel.ID && c.embedding != nil { sim := vector.CosineSimilarity(cand.embedding, c.embedding) if sim > maxSimToSelected { maxSimToSelected = sim } break } } } } // MMR formula mmrScore := lambda*relevance - (1-lambda)*maxSimToSelected if mmrScore > bestMMR { bestMMR = mmrScore bestIdx = i } } if bestIdx >= 0 { selected = append(selected, remaining[bestIdx].result) // Remove selected from remaining remaining = append(remaining[:bestIdx], remaining[bestIdx+1:]...) } else { break } } return selected } // applyCrossEncoderRerank applies cross-encoder reranking to RRF results. // // This is Stage 2 of a two-stage retrieval system: // - Stage 1 (fast): Bi-encoder retrieval (vector + BM25 with RRF) // - Stage 2 (accurate): Cross-encoder reranking of top-K candidates // // Cross-encoders see query and document together, capturing fine-grained // semantic relationships that bi-encoders miss. However, they're slower // since they can't be pre-computed. // // ELI12: // // Stage 1 is like using a library catalog to find 100 potentially relevant books. // Stage 2 is like reading each book's summary to pick the best 10. func (s *Service) applyCrossEncoderRerank(ctx context.Context, query string, results []rrfResult, opts *SearchOptions) []rrfResult { if len(results) == 0 { return results } // Build candidates with content from storage candidates := make([]RerankCandidate, 0, len(results)) for _, r := range results { node, err := s.engine.GetNode(storage.NodeID(r.ID)) if err != nil || node == nil { continue } // Extract searchable content content := s.extractSearchableText(node) if content == "" { continue } candidates = append(candidates, RerankCandidate{ ID: r.ID, Content: content, Score: r.RRFScore, }) } if len(candidates) == 0 { return results } // Apply cross-encoder reranking reranked, err := s.crossEncoder.Rerank(ctx, query, candidates) if err != nil { // Fallback to original results on error return results } // Convert back to rrfResult format rerankedResults := make([]rrfResult, 0, len(reranked)) for _, r := range reranked { // Find original result to preserve VectorRank/BM25Rank var original *rrfResult for i := range results { if results[i].ID == r.ID { original = &results[i] break } } if original != nil { rerankedResults = append(rerankedResults, rrfResult{ ID: r.ID, RRFScore: r.FinalScore, // Use cross-encoder score VectorRank: original.VectorRank, BM25Rank: original.BM25Rank, OriginalScore: r.BiScore, }) } } return rerankedResults } // SetCrossEncoder configures the cross-encoder reranker. // // Example: // // svc := search.NewService(engine) // svc.SetCrossEncoder(search.NewCrossEncoder(&search.CrossEncoderConfig{ // Enabled: true, // APIURL: "http://localhost:8081/rerank", // Model: "cross-encoder/ms-marco-MiniLM-L-6-v2", // })) func (s *Service) SetCrossEncoder(ce *CrossEncoder) { s.mu.Lock() defer s.mu.Unlock() s.crossEncoder = ce } // CrossEncoderAvailable returns true if cross-encoder reranking is configured and available. func (s *Service) CrossEncoderAvailable(ctx context.Context) bool { s.mu.RLock() defer s.mu.RUnlock() return s.crossEncoder != nil && s.crossEncoder.IsAvailable(ctx) } // vectorSearchOnly performs vector-only search. func (s *Service) vectorSearchOnly(ctx context.Context, embedding []float32, opts *SearchOptions) (*SearchResponse, error) { s.mu.RLock() defer s.mu.RUnlock() var results []indexResult var err error searchMethod := "vector" message := "Vector similarity search (cosine)" searchStart := time.Now() // Use cluster-accelerated search if available and has been clustered if s.clusterIndex != nil && s.clusterIndex.IsClustered() { // Search using k-means clusters (much faster for large datasets) numClustersToSearch := 3 clusterResults, clusterErr := s.clusterIndex.SearchWithClusters(embedding, opts.Limit*2, numClustersToSearch) if clusterErr == nil && len(clusterResults) > 0 { // Convert gpu.SearchResult to indexResult for _, r := range clusterResults { results = append(results, indexResult{ ID: r.ID, Score: float64(r.Score), }) } searchMethod = "vector_clustered" message = "GPU k-means cluster-accelerated vector search" log.Printf("[K-MEANS] 🔍 SEARCH | mode=clustered clusters_searched=%d candidates=%d duration=%v", numClustersToSearch, len(results), time.Since(searchStart)) } else { // Fall back to brute force results, err = s.vectorIndex.Search(ctx, embedding, opts.Limit*2, opts.MinSimilarity) log.Printf("[K-MEANS] 🔍 SEARCH | mode=brute_force_fallback reason=%v candidates=%d duration=%v", clusterErr, len(results), time.Since(searchStart)) } } else { // Standard brute-force vector search results, err = s.vectorIndex.Search(ctx, embedding, opts.Limit*2, opts.MinSimilarity) // Only log if clustering is enabled but not yet clustered (helps debugging) if s.clusterEnabled && s.clusterIndex != nil { log.Printf("[K-MEANS] 🔍 SEARCH | mode=brute_force reason=not_yet_clustered candidates=%d duration=%v", len(results), time.Since(searchStart)) } } if err != nil { return nil, err } if len(opts.Types) > 0 { results = s.filterByType(results, opts.Types) } searchResults := s.enrichIndexResults(results, opts.Limit) return &SearchResponse{ Status: "success", Results: searchResults, TotalCandidates: len(results), Returned: len(searchResults), SearchMethod: searchMethod, Message: message, }, nil } // fullTextSearchOnly performs full-text BM25 search only. func (s *Service) fullTextSearchOnly(ctx context.Context, query string, opts *SearchOptions) (*SearchResponse, error) { s.mu.RLock() defer s.mu.RUnlock() results := s.fulltextIndex.Search(query, opts.Limit*2) if len(opts.Types) > 0 { results = s.filterByType(results, opts.Types) } searchResults := s.enrichIndexResults(results, opts.Limit) return &SearchResponse{ Status: "success", Query: query, Results: searchResults, TotalCandidates: len(results), Returned: len(searchResults), SearchMethod: "fulltext", FallbackTriggered: true, Message: "Full-text BM25 search (vector search unavailable or returned no results)", }, nil } // extractSearchableText extracts text from ALL node properties for full-text indexing. // This includes: // - Node labels (for searching by type) // - All string properties // - String representations of other property types // - Priority properties (content, title, etc.) are included first for better ranking func (s *Service) extractSearchableText(node *storage.Node) string { var parts []string // 1. Add labels first (important for type-based search) for _, label := range node.Labels { parts = append(parts, label) } // 2. Add priority searchable properties first (better ranking for these) for _, prop := range SearchableProperties { if val, ok := node.Properties[prop]; ok { if str := propertyToString(val); str != "" { parts = append(parts, str) } } } // 3. Add ALL other properties (for comprehensive search) prioritySet := make(map[string]bool) for _, p := range SearchableProperties { prioritySet[p] = true } for key, val := range node.Properties { // Skip if already added as priority property if prioritySet[key] { continue } // Add property name and value for searchability if str := propertyToString(val); str != "" { // Include property name to enable searches like "genre:action" parts = append(parts, key, str) } } return strings.Join(parts, " ") } // propertyToString converts any property value to a searchable string. func propertyToString(val interface{}) string { switch v := val.(type) { case string: return v case []string: return strings.Join(v, " ") case int, int64, int32, float64, float32: return fmt.Sprintf("%v", v) case bool: if v { return "true" } return "false" case []interface{}: var strs []string for _, item := range v { if s := propertyToString(item); s != "" { strs = append(strs, s) } } return strings.Join(strs, " ") default: // For complex types, try to get string representation if v != nil { return fmt.Sprintf("%v", v) } return "" } } // filterByType filters results to only include specified node types. func (s *Service) filterByType(results []indexResult, types []string) []indexResult { if len(types) == 0 { return results } typeSet := make(map[string]struct{}) for _, t := range types { typeSet[strings.ToLower(t)] = struct{}{} } var filtered []indexResult for _, r := range results { node, err := s.engine.GetNode(storage.NodeID(r.ID)) if err != nil { continue } // Check if any label matches for _, label := range node.Labels { if _, ok := typeSet[strings.ToLower(label)]; ok { filtered = append(filtered, r) break } } // Also check type property if nodeType, ok := node.Properties["type"].(string); ok { if _, ok := typeSet[strings.ToLower(nodeType)]; ok { filtered = append(filtered, r) } } } return filtered } // enrichResults converts RRF results to SearchResult with full node data. func (s *Service) enrichResults(rrfResults []rrfResult, limit int) []SearchResult { var results []SearchResult for i, rrf := range rrfResults { if i >= limit { break } node, err := s.engine.GetNode(storage.NodeID(rrf.ID)) if err != nil { continue } result := SearchResult{ ID: rrf.ID, NodeID: node.ID, Labels: node.Labels, Properties: node.Properties, Score: rrf.RRFScore, Similarity: rrf.OriginalScore, RRFScore: rrf.RRFScore, VectorRank: rrf.VectorRank, BM25Rank: rrf.BM25Rank, } // Extract common fields if t, ok := node.Properties["type"].(string); ok { result.Type = t } if title, ok := node.Properties["title"].(string); ok { result.Title = title } if desc, ok := node.Properties["description"].(string); ok { result.Description = desc } if content, ok := node.Properties["content"].(string); ok { result.ContentPreview = truncate(content, 200) } else if text, ok := node.Properties["text"].(string); ok { result.ContentPreview = truncate(text, 200) } results = append(results, result) } return results } // enrichIndexResults converts raw index results to SearchResult. func (s *Service) enrichIndexResults(indexResults []indexResult, limit int) []SearchResult { var results []SearchResult for i, ir := range indexResults { if i >= limit { break } node, err := s.engine.GetNode(storage.NodeID(ir.ID)) if err != nil { continue } result := SearchResult{ ID: ir.ID, NodeID: node.ID, Labels: node.Labels, Properties: node.Properties, Score: ir.Score, Similarity: ir.Score, } // Extract common fields if t, ok := node.Properties["type"].(string); ok { result.Type = t } if title, ok := node.Properties["title"].(string); ok { result.Title = title } if desc, ok := node.Properties["description"].(string); ok { result.Description = desc } if content, ok := node.Properties["content"].(string); ok { result.ContentPreview = truncate(content, 200) } else if text, ok := node.Properties["text"].(string); ok { result.ContentPreview = truncate(text, 200) } results = append(results, result) } return results } // GetAdaptiveRRFConfig returns optimized RRF weights based on query characteristics. // // This function analyzes the query and adjusts weights to favor the search method // most likely to perform well: // // - Short queries (1-2 words): Favor BM25 keyword matching // Example: "python" or "graph database" // Weights: Vector=0.5, BM25=1.5 // // - Long queries (6+ words): Favor vector semantic understanding // Example: "How do I implement a distributed consensus algorithm?" // Weights: Vector=1.5, BM25=0.5 // // - Medium queries (3-5 words): Balanced approach // Example: "machine learning algorithms" // Weights: Vector=1.0, BM25=1.0 // // Why this works: // - Short queries lack context → keywords more reliable // - Long queries have semantic meaning → embeddings capture intent better // // Example: // // // Automatic adaptation // query1 := "database" // opts1 := search.GetAdaptiveRRFConfig(query1) // fmt.Printf("Short query weights: V=%.1f, B=%.1f\n", // opts1.VectorWeight, opts1.BM25Weight) // // Output: V=0.5, B=1.5 (favors keywords) // // query2 := "What are the best practices for scaling graph databases?" // opts2 := search.GetAdaptiveRRFConfig(query2) // fmt.Printf("Long query weights: V=%.1f, B=%.1f\n", // opts2.VectorWeight, opts2.BM25Weight) // // Output: V=1.5, B=0.5 (favors semantics) // // Returns SearchOptions with adapted weights. Other options (Limit, MinSimilarity) // are set to defaults. func GetAdaptiveRRFConfig(query string) *SearchOptions { words := strings.Fields(query) wordCount := len(words) opts := DefaultSearchOptions() // Short queries (1-2 words): Emphasize keyword matching if wordCount <= 2 { opts.VectorWeight = 0.5 opts.BM25Weight = 1.5 return opts } // Long queries (6+ words): Emphasize semantic understanding if wordCount >= 6 { opts.VectorWeight = 1.5 opts.BM25Weight = 0.5 return opts } // Medium queries: Balanced return opts } // Helper types type indexResult struct { ID string Score float64 } type rrfResult struct { ID string RRFScore float64 VectorRank int BM25Rank int OriginalScore float64 } func findResultIndex(results []indexResult, id string) int { for i, r := range results { if r.ID == id { return i } } return -1 } func truncate(s string, maxLen int) string { if len(s) <= maxLen { return s } // Handle edge cases where maxLen is too small for ellipsis if maxLen <= 3 { if maxLen <= 0 { return "" } return s[:maxLen] } return s[:maxLen-3] + "..." }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server