Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
search_test.go40 kB
// Package search tests package search import ( "context" "fmt" "math" "os" "path/filepath" "testing" "time" "github.com/orneryd/nornicdb/pkg/storage" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // TestVectorIndex_Basic tests basic vector index operations. func TestVectorIndex_Basic(t *testing.T) { idx := NewVectorIndex(4) // Add vectors err := idx.Add("doc1", []float32{1, 0, 0, 0}) require.NoError(t, err) err = idx.Add("doc2", []float32{0.9, 0.1, 0, 0}) require.NoError(t, err) err = idx.Add("doc3", []float32{0, 1, 0, 0}) require.NoError(t, err) assert.Equal(t, 3, idx.Count()) assert.True(t, idx.HasVector("doc1")) assert.False(t, idx.HasVector("doc99")) // Search for similar vectors results, err := idx.Search(context.Background(), []float32{1, 0, 0, 0}, 10, 0.5) require.NoError(t, err) // doc1 should be most similar (identical), then doc2 (0.9 similarity) require.Len(t, results, 2) // doc3 is orthogonal, below threshold assert.Equal(t, "doc1", results[0].ID) assert.InDelta(t, 1.0, results[0].Score, 0.01) // Identical assert.Equal(t, "doc2", results[1].ID) } // TestVectorIndex_DimensionMismatch tests dimension validation. func TestVectorIndex_DimensionMismatch(t *testing.T) { idx := NewVectorIndex(4) err := idx.Add("doc1", []float32{1, 2, 3}) // Wrong dimension assert.ErrorIs(t, err, ErrDimensionMismatch) err = idx.Add("doc1", []float32{1, 2, 3, 4, 5}) // Wrong dimension assert.ErrorIs(t, err, ErrDimensionMismatch) err = idx.Add("doc1", []float32{1, 2, 3, 4}) // Correct dimension assert.NoError(t, err) } // TestFulltextIndex_BM25 tests BM25 full-text search. func TestFulltextIndex_BM25(t *testing.T) { idx := NewFulltextIndex() // Index some documents idx.Index("doc1", "machine learning deep neural networks") idx.Index("doc2", "deep learning with tensorflow and pytorch") idx.Index("doc3", "database systems and query optimization") idx.Index("doc4", "natural language processing with transformers") assert.Equal(t, 4, idx.Count()) // Search for "deep learning" results := idx.Search("deep learning", 10) require.NotEmpty(t, results) // doc1 and doc2 should match t.Logf("Search results for 'deep learning':") for _, r := range results { t.Logf(" %s: score=%.4f", r.ID, r.Score) } // Both doc1 and doc2 should be in results (both contain "deep" and/or "learning") ids := make(map[string]bool) for _, r := range results { ids[r.ID] = true } assert.True(t, ids["doc1"] || ids["doc2"], "Expected doc1 or doc2 in results") // Search for "database" results = idx.Search("database query", 10) require.NotEmpty(t, results) assert.Equal(t, "doc3", results[0].ID) } // TestFulltextIndex_Tokenization tests text tokenization. func TestFulltextIndex_Tokenization(t *testing.T) { tokens := tokenize("Hello, World! This is a TEST-123.") t.Logf("Tokens: %v", tokens) // Should lowercase, remove punctuation, filter stop words assert.Contains(t, tokens, "hello") assert.Contains(t, tokens, "world") assert.Contains(t, tokens, "test") assert.Contains(t, tokens, "123") // Should NOT contain stop words assert.NotContains(t, tokens, "this") assert.NotContains(t, tokens, "is") assert.NotContains(t, tokens, "a") } // TestRRFFusion tests Reciprocal Rank Fusion algorithm. func TestRRFFusion(t *testing.T) { // Create test service with mock data engine := storage.NewMemoryEngine() defer engine.Close() // Create nodes nodes := []*storage.Node{ {ID: "doc1", Labels: []string{"Node"}, Properties: map[string]any{"title": "ML Basics"}}, {ID: "doc2", Labels: []string{"Node"}, Properties: map[string]any{"title": "Deep Learning"}}, {ID: "doc3", Labels: []string{"Node"}, Properties: map[string]any{"title": "Database Design"}}, } err := engine.BulkCreateNodes(nodes) require.NoError(t, err) svc := NewService(engine) // Test RRF fusion directly vectorResults := []indexResult{ {ID: "doc1", Score: 0.95}, {ID: "doc2", Score: 0.85}, } bm25Results := []indexResult{ {ID: "doc2", Score: 5.5}, // doc2 appears in both - should rank highest {ID: "doc3", Score: 4.2}, } opts := DefaultSearchOptions() fusedResults := svc.fuseRRF(vectorResults, bm25Results, opts) t.Logf("RRF Fusion results:") for _, r := range fusedResults { t.Logf(" %s: rrf=%.4f vectorRank=%d bm25Rank=%d", r.ID, r.RRFScore, r.VectorRank, r.BM25Rank) } // doc2 should be first (appears in both lists) require.NotEmpty(t, fusedResults) assert.Equal(t, "doc2", fusedResults[0].ID) assert.Equal(t, 2, fusedResults[0].VectorRank) // Second in vector list = rank 2 (1-indexed) assert.Equal(t, 1, fusedResults[0].BM25Rank) // First in BM25 = rank 1 } // TestAdaptiveRRFConfig tests adaptive RRF configuration. func TestAdaptiveRRFConfig(t *testing.T) { // Short query - should favor BM25 shortOpts := GetAdaptiveRRFConfig("docker") assert.Equal(t, 0.5, shortOpts.VectorWeight) assert.Equal(t, 1.5, shortOpts.BM25Weight) // Long query - should favor vector longOpts := GetAdaptiveRRFConfig("how do I configure docker containers for production deployment") assert.Equal(t, 1.5, longOpts.VectorWeight) assert.Equal(t, 0.5, longOpts.BM25Weight) // Medium query - should be balanced medOpts := GetAdaptiveRRFConfig("configure docker production") assert.Equal(t, 1.0, medOpts.VectorWeight) assert.Equal(t, 1.0, medOpts.BM25Weight) } // TestSearchService_FullTextOnly tests full-text search without embeddings. func TestSearchService_FullTextOnly(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() // Create nodes with searchable properties nodes := []*storage.Node{ { ID: "node1", Labels: []string{"Node"}, Properties: map[string]any{ "type": "memory", "title": "Machine Learning Tutorial", "content": "Introduction to machine learning algorithms and neural networks", }, }, { ID: "node2", Labels: []string{"Node"}, Properties: map[string]any{ "type": "todo", "title": "Database Migration", "description": "Migrate PostgreSQL database to new server", }, }, { ID: "node3", Labels: []string{"FileChunk"}, Properties: map[string]any{ "text": "function calculateMachineLearningScore() { return 42; }", "path": "/src/ml/score.ts", }, }, } err := engine.BulkCreateNodes(nodes) require.NoError(t, err) // Create search service svc := NewService(engine) // Index nodes for full-text search for _, node := range nodes { err := svc.IndexNode(node) require.NoError(t, err) } // Search without embedding (triggers full-text fallback) ctx := context.Background() opts := DefaultSearchOptions() opts.Limit = 10 response, err := svc.Search(ctx, "machine learning", nil, opts) require.NoError(t, err) t.Logf("Full-text search results for 'machine learning':") for _, r := range response.Results { t.Logf(" %s: type=%s title=%s score=%.4f", r.ID, r.Type, r.Title, r.Score) } assert.Equal(t, "fulltext", response.SearchMethod) assert.True(t, response.FallbackTriggered) assert.NotEmpty(t, response.Results) } // TestSearchService_BuildIndexes tests building indexes from storage. func TestSearchService_BuildIndexes(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() // Create nodes with embeddings embedding := make([]float32, 1024) embedding[0] = 1.0 // Simple non-zero embedding nodes := []*storage.Node{ { ID: "node1", Labels: []string{"Node"}, Embedding: embedding, Properties: map[string]any{ "title": "Test Node 1", "content": "This is test content for searching", }, }, { ID: "node2", Labels: []string{"Node"}, Embedding: embedding, Properties: map[string]any{ "title": "Test Node 2", "text": "Another searchable document", }, }, } err := engine.BulkCreateNodes(nodes) require.NoError(t, err) // Create service and build indexes svc := NewService(engine) err = svc.BuildIndexes(context.Background()) require.NoError(t, err) // Verify indexes were built assert.Equal(t, 2, svc.vectorIndex.Count()) assert.Equal(t, 2, svc.fulltextIndex.Count()) } // TestSearchService_WithRealData tests search with exported Neo4j data. func TestSearchService_WithRealData(t *testing.T) { // Path to exported data possiblePaths := []string{ filepath.Join("..", "..", "..", "data", "nornicdb"), os.Getenv("MIMIR_DATA_DIR"), "/Users/c815719/src/Mimir/data/nornicdb", } var exportDir string for _, p := range possiblePaths { if p == "" { continue } if _, err := os.Stat(p); err == nil { exportDir = p break } } if exportDir == "" { t.Skip("Export directory not found - run export-neo4j-to-json.mjs first") } // Check if nodes.json exists nodesFile := filepath.Join(exportDir, "nodes.json") if _, err := os.Stat(nodesFile); os.IsNotExist(err) { t.Skipf("nodes.json not found in %s - run export-neo4j-to-json.mjs first", exportDir) } t.Logf("Using export directory: %s", exportDir) // Load data into memory engine engine := storage.NewMemoryEngine() defer engine.Close() result, err := storage.LoadFromMimirExport(engine, exportDir) require.NoError(t, err) t.Logf("Loaded %d nodes, %d edges, %d embeddings", result.NodesImported, result.EdgesImported, result.EmbeddingsLoaded) // Create search service svc := NewService(engine) // Build indexes t.Log("Building search indexes...") startTime := time.Now() err = svc.BuildIndexes(context.Background()) require.NoError(t, err) t.Logf("Index build time: %v", time.Since(startTime)) t.Logf("Vector index: %d vectors", svc.vectorIndex.Count()) t.Logf("Fulltext index: %d documents", svc.fulltextIndex.Count()) // Test full-text search t.Log("\n=== Full-text search test ===") ctx := context.Background() opts := DefaultSearchOptions() opts.Limit = 5 response, err := svc.fullTextSearchOnly(ctx, "docker configuration", opts) require.NoError(t, err) t.Logf("Search for 'docker configuration': %d results", len(response.Results)) for _, r := range response.Results { t.Logf(" %s: type=%s title=%s score=%.4f", r.ID[:20]+"...", r.Type, r.Title, r.Score) } // Test type filtering t.Log("\n=== Type filtering test ===") opts.Types = []string{"todo"} response, err = svc.fullTextSearchOnly(ctx, "implementation", opts) require.NoError(t, err) t.Logf("Search for 'implementation' (type=todo): %d results", len(response.Results)) for _, r := range response.Results { t.Logf(" %s: type=%s title=%s", r.ID[:20]+"...", r.Type, r.Title) } } // TestSearchService_RRFHybrid tests the full RRF hybrid search with real data. func TestSearchService_RRFHybrid(t *testing.T) { // Path to exported data possiblePaths := []string{ filepath.Join("..", "..", "..", "data", "nornicdb"), "/Users/c815719/src/Mimir/data/nornicdb", } var exportDir string for _, p := range possiblePaths { if _, err := os.Stat(p); err == nil { exportDir = p break } } if exportDir == "" { t.Skip("Export directory not found") } // Check if nodes.json exists nodesFile := filepath.Join(exportDir, "nodes.json") if _, err := os.Stat(nodesFile); os.IsNotExist(err) { t.Skipf("nodes.json not found in %s - run export-neo4j-to-json.mjs first", exportDir) } // Load data engine := storage.NewMemoryEngine() defer engine.Close() result, err := storage.LoadFromMimirExport(engine, exportDir) require.NoError(t, err) require.True(t, result.EmbeddingsLoaded > 0, "Need embeddings for RRF test") // Create search service and build indexes svc := NewService(engine) err = svc.BuildIndexes(context.Background()) require.NoError(t, err) // Get a real embedding to use as query nodes, _ := engine.AllNodes() var queryEmbedding []float32 for _, node := range nodes { if len(node.Embedding) == 1024 { queryEmbedding = node.Embedding t.Logf("Using embedding from node: %s", node.ID) break } } require.NotNil(t, queryEmbedding, "Need an embedding for query") // Test RRF hybrid search t.Log("\n=== RRF Hybrid Search Test ===") ctx := context.Background() opts := DefaultSearchOptions() opts.Limit = 10 response, err := svc.Search(ctx, "authentication login security", queryEmbedding, opts) require.NoError(t, err) t.Logf("RRF Search results: %d total, method=%s, fallback=%v", len(response.Results), response.SearchMethod, response.FallbackTriggered) for i, r := range response.Results { t.Logf(" %d. %s: type=%s rrf=%.4f vectorRank=%d bm25Rank=%d", i+1, truncateID(r.ID, 20), r.Type, r.RRFScore, r.VectorRank, r.BM25Rank) } if response.Metrics != nil { t.Logf("Metrics: vector=%d, bm25=%d, fused=%d", response.Metrics.VectorCandidates, response.Metrics.BM25Candidates, response.Metrics.FusedCandidates) } // Verify RRF is working assert.Equal(t, "rrf_hybrid", response.SearchMethod) assert.False(t, response.FallbackTriggered) assert.NotEmpty(t, response.Results) // Test adaptive config t.Log("\n=== Adaptive RRF Config Test ===") shortOpts := GetAdaptiveRRFConfig("docker") t.Logf("Short query 'docker': vectorWeight=%.1f, bm25Weight=%.1f", shortOpts.VectorWeight, shortOpts.BM25Weight) longOpts := GetAdaptiveRRFConfig("how do I configure authentication and login security for production") t.Logf("Long query: vectorWeight=%.1f, bm25Weight=%.1f", longOpts.VectorWeight, longOpts.BM25Weight) } func truncateID(id string, maxLen int) string { if len(id) <= maxLen { return id } return id[:maxLen] + "..." } // BenchmarkVectorSearch benchmarks vector similarity search. func BenchmarkVectorSearch(b *testing.B) { idx := NewVectorIndex(1024) // Add 10K vectors for i := 0; i < 10000; i++ { vec := make([]float32, 1024) vec[i%1024] = 1.0 idx.Add(string(rune('a'+i%26))+string(rune(i)), vec) } query := make([]float32, 1024) query[0] = 1.0 b.ResetTimer() for i := 0; i < b.N; i++ { idx.Search(context.Background(), query, 10, 0.5) } } // BenchmarkBM25Search benchmarks BM25 full-text search. func BenchmarkBM25Search(b *testing.B) { idx := NewFulltextIndex() // Index 10K documents texts := []string{ "machine learning neural networks deep learning", "database systems query optimization indexing", "natural language processing transformers bert", "distributed systems microservices kubernetes", "frontend development react vue angular", } for i := 0; i < 10000; i++ { idx.Index(string(rune('a'+i%26))+string(rune(i)), texts[i%len(texts)]) } b.ResetTimer() for i := 0; i < b.N; i++ { idx.Search("machine learning", 10) } } // BenchmarkRRFFusion benchmarks RRF fusion algorithm. func BenchmarkRRFFusion(b *testing.B) { engine := storage.NewMemoryEngine() defer engine.Close() // Create nodes for i := 0; i < 100; i++ { engine.CreateNode(&storage.Node{ ID: storage.NodeID(string(rune('a'+i%26)) + string(rune(i))), Labels: []string{"Node"}, Properties: map[string]any{"title": "Test"}, }) } svc := NewService(engine) // Create result sets vectorResults := make([]indexResult, 50) bm25Results := make([]indexResult, 50) for i := 0; i < 50; i++ { vectorResults[i] = indexResult{ID: string(rune('a'+i%26)) + string(rune(i)), Score: float64(50-i) / 50.0} bm25Results[i] = indexResult{ID: string(rune('a'+(i+10)%26)) + string(rune(i)), Score: float64(50 - i)} } opts := DefaultSearchOptions() b.ResetTimer() for i := 0; i < b.N; i++ { svc.fuseRRF(vectorResults, bm25Results, opts) } } // ======================================== // Additional Tests for Coverage Improvement // ======================================== // TestVectorIndex_Remove tests vector removal. func TestVectorIndex_Remove(t *testing.T) { idx := NewVectorIndex(4) // Add vectors require.NoError(t, idx.Add("doc1", []float32{1, 0, 0, 0})) require.NoError(t, idx.Add("doc2", []float32{0, 1, 0, 0})) require.NoError(t, idx.Add("doc3", []float32{0, 0, 1, 0})) assert.Equal(t, 3, idx.Count()) assert.True(t, idx.HasVector("doc1")) // Remove a vector idx.Remove("doc1") assert.Equal(t, 2, idx.Count()) assert.False(t, idx.HasVector("doc1")) assert.True(t, idx.HasVector("doc2")) // Remove non-existent vector (should not panic) idx.Remove("nonexistent") assert.Equal(t, 2, idx.Count()) } // TestVectorIndex_GetDimensions tests dimension getter. func TestVectorIndex_GetDimensions(t *testing.T) { idx := NewVectorIndex(128) assert.Equal(t, 128, idx.GetDimensions()) idx2 := NewVectorIndex(1024) assert.Equal(t, 1024, idx2.GetDimensions()) } // TestVectorIndex_CosineSimilarity tests cosine similarity calculation via search. func TestVectorIndex_CosineSimilarity(t *testing.T) { idx := NewVectorIndex(4) // Add reference vectors require.NoError(t, idx.Add("ref1", []float32{1, 0, 0, 0})) require.NoError(t, idx.Add("ref2", []float32{0, 1, 0, 0})) require.NoError(t, idx.Add("ref3", []float32{-1, 0, 0, 0})) require.NoError(t, idx.Add("ref4", []float32{1, 1, 0, 0})) // Search for identical vector results, err := idx.Search(context.Background(), []float32{1, 0, 0, 0}, 10, 0.0) require.NoError(t, err) // ref1 should have highest score (identical) if len(results) > 0 { assert.Equal(t, "ref1", results[0].ID) assert.InDelta(t, 1.0, results[0].Score, 0.01) } } // TestFulltextIndex_Remove tests document removal from fulltext index. func TestFulltextIndex_Remove(t *testing.T) { idx := NewFulltextIndex() // Add documents idx.Index("doc1", "quick brown fox") idx.Index("doc2", "lazy brown dog") idx.Index("doc3", "quick lazy cat") assert.Equal(t, 3, idx.Count()) // Remove a document idx.Remove("doc1") assert.Equal(t, 2, idx.Count()) // Search should not return removed document results := idx.Search("quick", 10) for _, r := range results { assert.NotEqual(t, "doc1", r.ID) } // Remove non-existent document (should not panic) idx.Remove("nonexistent") assert.Equal(t, 2, idx.Count()) } // TestFulltextIndex_GetDocument tests document retrieval. func TestFulltextIndex_GetDocument(t *testing.T) { idx := NewFulltextIndex() // Add a document idx.Index("doc1", "quick brown fox") // Get the document content, exists := idx.GetDocument("doc1") assert.True(t, exists) assert.Equal(t, "quick brown fox", content) // Get non-existent document _, exists = idx.GetDocument("nonexistent") assert.False(t, exists) } // TestFulltextIndex_PhraseSearch tests phrase searching. func TestFulltextIndex_PhraseSearch(t *testing.T) { idx := NewFulltextIndex() // Add documents idx.Index("doc1", "the quick brown fox jumps over the lazy dog") idx.Index("doc2", "brown fox is quick") idx.Index("doc3", "the lazy brown dog sleeps") // Search for exact phrase "quick brown" results := idx.PhraseSearch("quick brown", 10) // Should find doc1 (has exact phrase "quick brown") foundDoc1 := false for _, r := range results { if r.ID == "doc1" { foundDoc1 = true } } assert.True(t, foundDoc1, "Should find doc1 with exact phrase 'quick brown'") } // TestSearchService_RemoveNode tests node removal from search service. func TestSearchService_RemoveNode(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() svc := NewService(engine) // Create and index nodes node1 := &storage.Node{ ID: "node1", Labels: []string{"Person"}, Properties: map[string]any{"name": "Alice", "embedding": []float32{1, 0, 0, 0}}, } node2 := &storage.Node{ ID: "node2", Labels: []string{"Person"}, Properties: map[string]any{"name": "Bob", "embedding": []float32{0, 1, 0, 0}}, } engine.CreateNode(node1) engine.CreateNode(node2) svc.IndexNode(node1) svc.IndexNode(node2) // Remove node1 svc.RemoveNode("node1") // Search should not return removed node response, err := svc.Search(context.Background(), "Alice", nil, DefaultSearchOptions()) require.NoError(t, err) for _, r := range response.Results { assert.NotEqual(t, "node1", r.ID) } } // TestSearchService_HybridSearch tests the hybrid RRF search. func TestSearchService_HybridSearch(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() svc := NewService(engine) // Create nodes with both text and embeddings nodes := []*storage.Node{ { ID: "node1", Labels: []string{"Document"}, Properties: map[string]any{ "title": "Machine Learning Basics", "content": "Introduction to machine learning algorithms", "embedding": []float32{0.9, 0.1, 0, 0}, }, }, { ID: "node2", Labels: []string{"Document"}, Properties: map[string]any{ "title": "Deep Learning Neural Networks", "content": "Deep neural networks and deep learning", "embedding": []float32{0.8, 0.2, 0, 0}, }, }, { ID: "node3", Labels: []string{"Document"}, Properties: map[string]any{ "title": "Data Science Overview", "content": "Data science and analytics fundamentals", "embedding": []float32{0.1, 0.9, 0, 0}, }, }, } for _, node := range nodes { engine.CreateNode(node) svc.IndexNode(node) } // Search with both text and query embedding opts := DefaultSearchOptions() queryEmbedding := []float32{0.85, 0.15, 0, 0} opts.VectorWeight = 0.6 opts.BM25Weight = 0.4 response, err := svc.Search(context.Background(), "machine learning", queryEmbedding, opts) require.NoError(t, err) // Should return results ordered by hybrid score assert.Greater(t, len(response.Results), 0) } // TestSearchService_VectorSearchOnly tests vector-only search mode. func TestSearchService_VectorSearchOnly(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() // Create a service with 4-dimensional vector index svc := &Service{ engine: engine, vectorIndex: NewVectorIndex(4), // Use 4 dimensions for test vectors fulltextIndex: NewFulltextIndex(), } // Create nodes with embeddings in the Embedding field nodes := []*storage.Node{ { ID: "vec1", Labels: []string{"Vector"}, Embedding: []float32{1, 0, 0, 0}, }, { ID: "vec2", Labels: []string{"Vector"}, Embedding: []float32{0.9, 0.1, 0, 0}, }, { ID: "vec3", Labels: []string{"Vector"}, Embedding: []float32{0, 1, 0, 0}, }, } for _, node := range nodes { engine.CreateNode(node) err := svc.IndexNode(node) require.NoError(t, err) } // Search with only query embedding (no text query) opts := DefaultSearchOptions() opts.MinSimilarity = 0.5 queryEmbedding := []float32{1, 0, 0, 0} response, err := svc.Search(context.Background(), "", queryEmbedding, opts) require.NoError(t, err) // Should return vec1 and vec2 (above threshold), not vec3 assert.GreaterOrEqual(t, len(response.Results), 1) for _, r := range response.Results { assert.NotEqual(t, "vec3", r.ID, "vec3 should be below similarity threshold") } } // TestSearchService_FilterByType tests type filtering. func TestSearchService_FilterByType(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() svc := NewService(engine) // Create nodes with different labels nodes := []*storage.Node{ {ID: "person1", Labels: []string{"Person"}, Properties: map[string]any{"name": "Alice"}}, {ID: "person2", Labels: []string{"Person"}, Properties: map[string]any{"name": "Bob"}}, {ID: "doc1", Labels: []string{"Document"}, Properties: map[string]any{"name": "Alice's Doc"}}, } for _, node := range nodes { engine.CreateNode(node) svc.IndexNode(node) } // Search with type filter using opts.Types opts := DefaultSearchOptions() opts.Types = []string{"Person"} response, err := svc.Search(context.Background(), "alice", nil, opts) require.NoError(t, err) // Should only return Person nodes for _, r := range response.Results { assert.Contains(t, r.Labels, "Person") assert.NotContains(t, r.Labels, "Document") } } // TestSearchService_EnrichResults tests result enrichment. func TestSearchService_EnrichResults(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() // Create a node with full properties node := &storage.Node{ ID: "enriched1", Labels: []string{"Person", "Employee"}, Properties: map[string]any{ "name": "Alice", "age": 30, "email": "alice@example.com", "embedding": []float32{1, 0, 0, 0}, }, } engine.CreateNode(node) svc := NewService(engine) svc.IndexNode(node) // Search and verify enriched results opts := DefaultSearchOptions() response, err := svc.Search(context.Background(), "Alice", nil, opts) require.NoError(t, err) assert.Greater(t, len(response.Results), 0) if len(response.Results) > 0 { // Check that result has enriched properties r := response.Results[0] assert.Equal(t, "enriched1", r.ID) assert.Contains(t, r.Labels, "Person") assert.Contains(t, r.Labels, "Employee") // Properties should be included if r.Properties != nil { assert.Equal(t, "Alice", r.Properties["name"]) } } } // TestSqrt tests the math.Sqrt standard library function. // (Originally tested custom sqrt, now consolidated to use math.Sqrt) func TestSqrt(t *testing.T) { tests := []struct { input float64 expected float64 }{ {0, 0}, {1, 1}, {4, 2}, {9, 3}, {16, 4}, {2, 1.414}, } for _, tt := range tests { result := math.Sqrt(tt.input) assert.InDelta(t, tt.expected, result, 0.01) } } // TestTruncate tests the truncate helper function. func TestTruncate(t *testing.T) { tests := []struct { input string maxLen int expected string }{ {"short", 10, "short"}, {"longer string", 8, "longe..."}, // 8-3=5 chars + "..." {"exact", 5, "exact"}, {"", 5, ""}, {"test", 0, ""}, // Edge case: 0 maxLen {"test", 3, "tes"}, // Edge case: maxLen <= 3, no ellipsis {"test", 2, "te"}, // Edge case: maxLen <= 3, no ellipsis {"hello world", 7, "hell..."}, // Normal case: 7-3=4 chars + "..." } for _, tt := range tests { result := truncate(tt.input, tt.maxLen) assert.Equal(t, tt.expected, result, "truncate(%q, %d)", tt.input, tt.maxLen) } } // TestSearchService_BuildIndexesFromStorage tests index building from storage. func TestSearchService_BuildIndexesFromStorage(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() // Create nodes before service nodes := []*storage.Node{ { ID: "preexisting1", Labels: []string{"Doc"}, Properties: map[string]any{ "content": "preexisting document content", "embedding": []float32{1, 0, 0, 0}, }, }, { ID: "preexisting2", Labels: []string{"Doc"}, Properties: map[string]any{ "content": "another preexisting document", }, }, } for _, node := range nodes { engine.CreateNode(node) } // Create service and build indexes svc := NewService(engine) err := svc.BuildIndexes(context.Background()) assert.NoError(t, err) // Verify search works response, err := svc.Search(context.Background(), "preexisting", nil, DefaultSearchOptions()) require.NoError(t, err) assert.Greater(t, len(response.Results), 0) } // TestGetAdaptiveRRFConfig tests RRF configuration. func TestGetAdaptiveRRFConfig(t *testing.T) { // Test the package-level function tests := []struct { name string query string }{ { name: "short query", query: "test", }, { name: "longer query", query: "machine learning concepts and algorithms", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { config := GetAdaptiveRRFConfig(tt.query) // Should return valid config assert.NotNil(t, config) assert.Greater(t, config.Limit, 0) }) } } // TestSearchService_EmptyQuery tests behavior with empty query. func TestSearchService_EmptyQuery(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() svc := NewService(engine) // Search with empty query and no embedding response, err := svc.Search(context.Background(), "", nil, DefaultSearchOptions()) require.NoError(t, err) assert.Equal(t, 0, len(response.Results)) } // TestSearchService_SpecialCharacters tests search with special characters. func TestSearchService_SpecialCharacters(t *testing.T) { engine := storage.NewMemoryEngine() defer engine.Close() svc := NewService(engine) // Create node with special characters node := &storage.Node{ ID: "special1", Labels: []string{"Doc"}, Properties: map[string]any{"content": "C++ programming & Java!"}, } engine.CreateNode(node) svc.IndexNode(node) // Search should handle special chars without panicking response, err := svc.Search(context.Background(), "C++", nil, DefaultSearchOptions()) // Should not panic, even if results vary assert.NoError(t, err) assert.NotNil(t, response) } // ======================================== // Tests for 0% coverage functions // ======================================== func TestVectorSearchOnlyDirect(t *testing.T) { ctx := context.Background() t.Run("basic_vector_search", func(t *testing.T) { store := storage.NewMemoryEngine() defer store.Close() // Create nodes with embeddings (float32) embedding := make([]float32, 1024) for i := range embedding { embedding[i] = float32(i) / 1024.0 } store.CreateNode(&storage.Node{ ID: "node-1", Labels: []string{"Document"}, Embedding: embedding, Properties: map[string]interface{}{ "title": "Test Doc", "content": "Test content", }, }) service := NewService(store) // Index node node, _ := store.GetNode("node-1") service.IndexNode(node) // Create query embedding (float32 for search) queryEmb := make([]float32, 1024) for i := range queryEmb { queryEmb[i] = float32(i) / 1024.0 } opts := &SearchOptions{ Limit: 10, } response, err := service.vectorSearchOnly(ctx, queryEmb, opts) if err != nil { t.Logf("vectorSearchOnly error (may be expected): %v", err) } if response != nil { t.Logf("Vector search returned %d results", len(response.Results)) } }) t.Run("empty_embedding", func(t *testing.T) { store := storage.NewMemoryEngine() defer store.Close() service := NewService(store) opts := &SearchOptions{Limit: 10} response, err := service.vectorSearchOnly(ctx, nil, opts) // Expect error or empty results for nil embedding if err == nil && response != nil && len(response.Results) != 0 { t.Error("Expected empty results for nil embedding") } }) } func TestBuildIndexesDirect(t *testing.T) { ctx := context.Background() t.Run("build_on_empty_storage", func(t *testing.T) { store := storage.NewMemoryEngine() defer store.Close() service := NewService(store) err := service.BuildIndexes(ctx) if err != nil { t.Errorf("BuildIndexes on empty storage failed: %v", err) } }) t.Run("build_with_nodes", func(t *testing.T) { store := storage.NewMemoryEngine() defer store.Close() // Create nodes with embeddings (float32) embedding := make([]float32, 1024) for i := range embedding { embedding[i] = 0.5 } store.CreateNode(&storage.Node{ ID: "doc-1", Labels: []string{"Document"}, Embedding: embedding, Properties: map[string]interface{}{ "content": "First document content", }, }) store.CreateNode(&storage.Node{ ID: "doc-2", Labels: []string{"Document"}, Properties: map[string]interface{}{ "content": "Second document without embedding", }, }) service := NewService(store) err := service.BuildIndexes(ctx) if err != nil { t.Errorf("BuildIndexes failed: %v", err) } }) } func TestVectorIndexCosineSimilarityIndirect(t *testing.T) { // Test cosine similarity through the VectorIndex search idx := NewVectorIndex(4) // Add a vector (float32) vec1 := []float32{1.0, 0.0, 0.0, 0.0} idx.Add("node-1", vec1) // Search with similar vector ctx := context.Background() queryVec := []float32{1.0, 0.0, 0.0, 0.0} results, err := idx.Search(ctx, queryVec, 1, 0.0) if err != nil { t.Fatalf("Search failed: %v", err) } if len(results) == 0 { t.Error("Expected at least one result") } else { // Should have high similarity for identical vectors if results[0].Score < 0.99 { t.Errorf("Expected high similarity for identical vectors, got %f", results[0].Score) } } } func TestSearchServiceSearchDirect(t *testing.T) { ctx := context.Background() t.Run("search_with_embedding", func(t *testing.T) { store := storage.NewMemoryEngine() defer store.Close() embedding := make([]float32, 1024) for i := range embedding { embedding[i] = 0.5 } store.CreateNode(&storage.Node{ ID: "doc-1", Labels: []string{"Document"}, Embedding: embedding, Properties: map[string]interface{}{ "content": "Machine learning tutorial", }, }) service := NewService(store) // Index the node node, _ := store.GetNode("doc-1") service.IndexNode(node) queryEmb := make([]float32, 1024) for i := range queryEmb { queryEmb[i] = 0.5 } opts := &SearchOptions{ Limit: 10, } response, err := service.Search(ctx, "machine learning", queryEmb, opts) if err != nil { t.Fatalf("Search failed: %v", err) } t.Logf("Search returned %d results", len(response.Results)) }) } func TestFulltextSearchWithPrefix(t *testing.T) { idx := NewFulltextIndex() // Index documents idx.Index("doc-1", "machine learning algorithms") idx.Index("doc-2", "deep learning models") idx.Index("doc-3", "natural language processing") // Search with prefix results := idx.Search("mach", 10) // Should match "machine" t.Logf("Prefix search for 'mach' returned %d results", len(results)) // Full term search results = idx.Search("learning", 10) if len(results) < 2 { t.Errorf("Expected at least 2 results for 'learning', got %d", len(results)) } } func TestRRFHybridSearchDirect(t *testing.T) { ctx := context.Background() t.Run("rrf_with_both_results", func(t *testing.T) { store := storage.NewMemoryEngine() defer store.Close() embedding := make([]float32, 1024) for i := range embedding { embedding[i] = 0.5 } store.CreateNode(&storage.Node{ ID: "doc-1", Labels: []string{"Document"}, Embedding: embedding, Properties: map[string]interface{}{ "content": "Machine learning tutorial content", }, }) service := NewService(store) node, _ := store.GetNode("doc-1") service.IndexNode(node) queryEmb := make([]float32, 1024) for i := range queryEmb { queryEmb[i] = 0.5 } opts := &SearchOptions{ Limit: 10, RRFK: 60, VectorWeight: 0.6, BM25Weight: 0.4, } response, err := service.rrfHybridSearch(ctx, "machine learning", queryEmb, opts) if err != nil { t.Fatalf("rrfHybridSearch failed: %v", err) } t.Logf("RRF search returned %d results", len(response.Results)) }) } // ============================================================================= // MMR Diversification Tests // ============================================================================= func TestMMRDiversification(t *testing.T) { store := storage.NewMemoryEngine() service := NewService(store) // Create nodes with embeddings - some similar, some diverse createNodeWithEmbedding := func(id string, labels []string, embedding []float32, props map[string]interface{}) { node := &storage.Node{ ID: storage.NodeID(id), Labels: labels, Properties: props, Embedding: embedding, } store.CreateNode(node) service.IndexNode(node) } t.Run("mmr_promotes_diversity", func(t *testing.T) { // Create 3 documents: 2 nearly identical, 1 diverse // Embeddings are 4-dimensional for simplicity createNodeWithEmbedding("similar1", []string{"Doc"}, []float32{0.9, 0.1, 0.0, 0.0}, map[string]interface{}{ "title": "Machine Learning Basics", "content": "Introduction to ML algorithms", }) createNodeWithEmbedding("similar2", []string{"Doc"}, []float32{0.89, 0.11, 0.0, 0.0}, map[string]interface{}{ "title": "ML Fundamentals", "content": "Basic machine learning concepts", }) createNodeWithEmbedding("diverse1", []string{"Doc"}, []float32{0.1, 0.1, 0.8, 0.0}, map[string]interface{}{ "title": "Database Design", "content": "SQL and NoSQL databases", }) // Build RRF results with EQUAL scores to isolate diversity effect rrfResults := []rrfResult{ {ID: "similar1", RRFScore: 0.05, VectorRank: 1, BM25Rank: 1}, {ID: "similar2", RRFScore: 0.05, VectorRank: 2, BM25Rank: 2}, // Equal score {ID: "diverse1", RRFScore: 0.05, VectorRank: 3, BM25Rank: 3}, // Equal score } // Query embedding similar to "similar1" queryEmb := []float32{0.9, 0.1, 0.0, 0.0} // Without MMR: results should be in original order resultsNoMMR := service.applyMMR(rrfResults, queryEmb, 3, 1.0) // lambda=1.0 = no diversity assert.Len(t, resultsNoMMR, 3, "Should return all results") t.Logf("Without MMR (lambda=1.0): %v", []string{resultsNoMMR[0].ID, resultsNoMMR[1].ID, resultsNoMMR[2].ID}) // With MMR (lambda=0.3): strong diversity preference resultsWithMMR := service.applyMMR(rrfResults, queryEmb, 3, 0.3) // lambda=0.3 = 70% diversity assert.Len(t, resultsWithMMR, 3, "Should return all results") t.Logf("With MMR (lambda=0.3): %v", []string{resultsWithMMR[0].ID, resultsWithMMR[1].ID, resultsWithMMR[2].ID}) // Verify MMR algorithm completes successfully // Note: exact order depends on embeddings being retrieved from storage }) t.Run("mmr_lambda_1_equals_no_diversity", func(t *testing.T) { rrfResults := []rrfResult{ {ID: "doc1", RRFScore: 0.1}, {ID: "doc2", RRFScore: 0.09}, {ID: "doc3", RRFScore: 0.08}, } // Lambda=1.0 should return results in original order (pure relevance) results := service.applyMMR(rrfResults, []float32{1, 0, 0, 0}, 3, 1.0) assert.Equal(t, "doc1", results[0].ID) assert.Equal(t, "doc2", results[1].ID) assert.Equal(t, "doc3", results[2].ID) }) t.Run("mmr_handles_empty_results", func(t *testing.T) { results := service.applyMMR([]rrfResult{}, []float32{1, 0, 0, 0}, 10, 0.7) assert.Empty(t, results) }) t.Run("mmr_handles_single_result", func(t *testing.T) { rrfResults := []rrfResult{{ID: "only", RRFScore: 0.1}} results := service.applyMMR(rrfResults, []float32{1, 0, 0, 0}, 10, 0.7) assert.Len(t, results, 1) assert.Equal(t, "only", results[0].ID) }) } func TestSearchWithMMROption(t *testing.T) { store := storage.NewMemoryEngine() service := NewService(store) ctx := context.Background() // Create nodes with embeddings for i := 0; i < 5; i++ { node := &storage.Node{ ID: storage.NodeID(fmt.Sprintf("doc%d", i)), Labels: []string{"Document"}, Properties: map[string]interface{}{ "title": fmt.Sprintf("Document %d about AI", i), "content": "This is content about artificial intelligence and machine learning", }, Embedding: make([]float32, 1024), } // Slightly different embeddings for j := range node.Embedding { node.Embedding[j] = float32(i)*0.01 + float32(j)*0.001 } store.CreateNode(node) service.IndexNode(node) } t.Run("search_with_mmr_enabled", func(t *testing.T) { queryEmb := make([]float32, 1024) for i := range queryEmb { queryEmb[i] = 0.5 } opts := &SearchOptions{ Limit: 5, MMREnabled: true, MMRLambda: 0.7, } response, err := service.Search(ctx, "AI machine learning", queryEmb, opts) require.NoError(t, err) assert.Contains(t, response.SearchMethod, "mmr", "Search method should indicate MMR") assert.NotEmpty(t, response.Results) t.Logf("MMR search returned %d results with method: %s", len(response.Results), response.SearchMethod) }) t.Run("search_without_mmr", func(t *testing.T) { queryEmb := make([]float32, 1024) for i := range queryEmb { queryEmb[i] = 0.5 } opts := &SearchOptions{ Limit: 5, MMREnabled: false, } response, err := service.Search(ctx, "AI machine learning", queryEmb, opts) require.NoError(t, err) assert.NotContains(t, response.SearchMethod, "mmr", "Search method should not mention MMR") }) }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server