M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

main.go•13 KiB

// Command eval runs the search quality evaluation harness against NornicDB. // // Usage: // // go run ./cmd/eval [flags] // // Flags: // // -url NornicDB server URL (default: http://localhost:7474) // -suite Path to test suite JSON file // -output Output format: summary, detailed, json, compact (default: summary) // -save Save results to JSON file // -threshold Override pass/fail thresholds (format: p10=0.5,mrr=0.5,hit=0.8) // // This command evaluates search quality by: // 1. Connecting to a running NornicDB server // 2. Running test queries against the search API // 3. Computing IR metrics (Precision, Recall, MRR, NDCG) // 4. Reporting pass/fail based on thresholds package main import ( "bytes" "context" "encoding/json" "flag" "fmt" "net/http" "os" "strings" "time" "github.com/orneryd/nornicdb/pkg/eval" ) func main() { // Parse flags url := flag.String("url", "http://localhost:7474", "NornicDB server URL") suitePath := flag.String("suite", "", "Path to test suite JSON file") output := flag.String("output", "summary", "Output format: summary, detailed, json, compact") savePath := flag.String("save", "", "Save results to JSON file") thresholds := flag.String("threshold", "", "Override thresholds (p10=0.5,mrr=0.5,hit=0.8)") createSample := flag.Bool("create-sample", false, "Create sample test data in the database") flag.Parse() // Check server health fmt.Printf("🔍 Connecting to NornicDB at %s...\n", *url) if err := checkHealth(*url); err != nil { fmt.Fprintf(os.Stderr, "❌ Server not reachable: %v\n", err) os.Exit(1) } fmt.Println("✅ Server is healthy") // Create sample data if requested if *createSample { fmt.Println("📝 Creating sample test data...") if err := createSampleData(*url); err != nil { fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to create sample data: %v\n", err) } } // Create HTTP-based search adapter searcher := &HTTPSearcher{url: *url} // Create harness with HTTP searcher harness := NewHTTPHarness(searcher) // Load test suite or use built-in tests if *suitePath != "" { fmt.Printf("📂 Loading test suite from %s...\n", *suitePath) if err := harness.LoadSuite(*suitePath); err != nil { fmt.Fprintf(os.Stderr, "❌ Failed to load suite: %v\n", err) os.Exit(1) } } else { // Add built-in demo tests fmt.Println("📝 Using built-in demo test cases...") addDemoTestCases(harness) } // Override thresholds if specified if *thresholds != "" { t := parseThresholds(*thresholds) harness.SetThresholds(t) } // Run evaluation fmt.Println("\n🚀 Running evaluation...") ctx := context.Background() result, err := harness.Run(ctx) if err != nil { fmt.Fprintf(os.Stderr, "❌ Evaluation failed: %v\n", err) os.Exit(1) } // Output results reporter := eval.NewReporter(os.Stdout) switch *output { case "summary": reporter.PrintSummary(result) case "detailed": reporter.PrintSummary(result) reporter.PrintDetails(result) case "json": reporter.PrintJSON(result) case "compact": reporter.PrintCompact(result) default: reporter.PrintSummary(result) } // Save results if requested if *savePath != "" { if err := reporter.SaveJSON(result, *savePath); err != nil { fmt.Fprintf(os.Stderr, "⚠️ Failed to save results: %v\n", err) } else { fmt.Printf("💾 Results saved to %s\n", *savePath) } } // Exit with appropriate code if result.FailedTests > 0 { os.Exit(1) } } // HTTPSearcher performs searches via HTTP API type HTTPSearcher struct { url string client *http.Client } // Search performs a search query via HTTP func (s *HTTPSearcher) Search(ctx context.Context, query string) ([]string, error) { if s.client == nil { s.client = &http.Client{Timeout: 30 * time.Second} } // Call NornicDB search API reqBody := map[string]interface{}{ "query": query, "limit": 50, } body, _ := json.Marshal(reqBody) req, err := http.NewRequestWithContext(ctx, "POST", s.url+"/nornicdb/search", bytes.NewReader(body)) if err != nil { return nil, err } req.Header.Set("Content-Type", "application/json") resp, err := s.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() // Response is an array: [{"node":{"id":"..."},"score":...}, ...] var results []struct { Node struct { ID string `json:"id"` } `json:"node"` Score float64 `json:"score"` } if err := json.NewDecoder(resp.Body).Decode(&results); err != nil { return nil, err } ids := make([]string, len(results)) for i, r := range results { ids[i] = r.Node.ID } return ids, nil } // HTTPHarness wraps eval.Harness with HTTP-based search type HTTPHarness struct { searcher *HTTPSearcher testCases []eval.TestCase thresholds eval.Thresholds } func NewHTTPHarness(searcher *HTTPSearcher) *HTTPHarness { return &HTTPHarness{ searcher: searcher, testCases: make([]eval.TestCase, 0), thresholds: eval.DefaultThresholds(), } } func (h *HTTPHarness) AddTestCase(tc eval.TestCase) { h.testCases = append(h.testCases, tc) } func (h *HTTPHarness) AddTestCases(cases []eval.TestCase) { h.testCases = append(h.testCases, cases...) } func (h *HTTPHarness) LoadSuite(path string) error { data, err := os.ReadFile(path) if err != nil { return err } var suite eval.TestSuite if err := json.Unmarshal(data, &suite); err != nil { return err } h.testCases = append(h.testCases, suite.TestCases...) return nil } func (h *HTTPHarness) SetThresholds(t eval.Thresholds) { h.thresholds = t } func (h *HTTPHarness) Run(ctx context.Context) (*eval.EvalResult, error) { if len(h.testCases) == 0 { return nil, fmt.Errorf("no test cases defined") } startTime := time.Now() results := make([]eval.TestResult, 0, len(h.testCases)) for _, tc := range h.testCases { result := h.runTestCase(ctx, tc) results = append(results, result) } aggregate := computeAggregate(results) passed, failed := countPassFail(results, h.thresholds) return &eval.EvalResult{ SuiteName: "http-eval", Timestamp: startTime, Duration: time.Since(startTime), Aggregate: aggregate, Results: results, TotalTests: len(results), PassedTests: passed, FailedTests: failed, Thresholds: h.thresholds, }, nil } func (h *HTTPHarness) runTestCase(ctx context.Context, tc eval.TestCase) eval.TestResult { start := time.Now() returned, err := h.searcher.Search(ctx, tc.Query) if err != nil { return eval.TestResult{ TestCase: tc, Error: err.Error(), Duration: time.Since(start), } } metrics := computeMetrics(tc, returned) return eval.TestResult{ TestCase: tc, Metrics: metrics, Returned: returned, Duration: time.Since(start), SearchMethod: "http", } } // Metric computation functions func computeMetrics(tc eval.TestCase, returned []string) eval.Metrics { expected := make(map[string]bool) for _, id := range tc.Expected { expected[id] = true } grades := tc.RelevanceGrades if grades == nil { grades = make(map[string]int) for _, id := range tc.Expected { grades[id] = 1 } } return eval.Metrics{ Precision1: precision(returned, expected, 1), Precision5: precision(returned, expected, 5), Precision10: precision(returned, expected, 10), Recall5: recall(returned, expected, 5), Recall10: recall(returned, expected, 10), Recall50: recall(returned, expected, 50), MRR: mrr(returned, expected), NDCG5: ndcg(returned, grades, 5), NDCG10: ndcg(returned, grades, 10), MAP: averagePrecision(returned, expected), HitRate: hitRate(returned, expected), } } func precision(returned []string, expected map[string]bool, k int) float64 { if k <= 0 || len(returned) == 0 { return 0.0 } limit := min(k, len(returned)) relevant := 0 for i := 0; i < limit; i++ { if expected[returned[i]] { relevant++ } } return float64(relevant) / float64(k) } func recall(returned []string, expected map[string]bool, k int) float64 { if len(expected) == 0 { return 0.0 } limit := min(k, len(returned)) relevant := 0 for i := 0; i < limit; i++ { if expected[returned[i]] { relevant++ } } return float64(relevant) / float64(len(expected)) } func mrr(returned []string, expected map[string]bool) float64 { for i, id := range returned { if expected[id] { return 1.0 / float64(i+1) } } return 0.0 } func ndcg(returned []string, grades map[string]int, k int) float64 { // Simplified NDCG if len(grades) == 0 { return 0.0 } limit := min(k, len(returned)) dcg := 0.0 for i := 0; i < limit; i++ { grade := float64(grades[returned[i]]) dcg += grade / (1.0 + float64(i)) } // Ideal DCG (perfect ranking) idcg := 0.0 for i := 0; i < min(k, len(grades)); i++ { idcg += 1.0 / (1.0 + float64(i)) } if idcg == 0 { return 0.0 } return dcg / idcg } func averagePrecision(returned []string, expected map[string]bool) float64 { if len(expected) == 0 { return 0.0 } sum := 0.0 relevant := 0 for i, id := range returned { if expected[id] { relevant++ sum += float64(relevant) / float64(i+1) } } return sum / float64(len(expected)) } func hitRate(returned []string, expected map[string]bool) float64 { for _, id := range returned { if expected[id] { return 1.0 } } return 0.0 } func computeAggregate(results []eval.TestResult) eval.Metrics { var agg eval.Metrics n := 0 for _, r := range results { if r.Error != "" { continue } n++ agg.Precision1 += r.Metrics.Precision1 agg.Precision5 += r.Metrics.Precision5 agg.Precision10 += r.Metrics.Precision10 agg.Recall5 += r.Metrics.Recall5 agg.Recall10 += r.Metrics.Recall10 agg.Recall50 += r.Metrics.Recall50 agg.MRR += r.Metrics.MRR agg.NDCG5 += r.Metrics.NDCG5 agg.NDCG10 += r.Metrics.NDCG10 agg.MAP += r.Metrics.MAP agg.HitRate += r.Metrics.HitRate } if n > 0 { fn := float64(n) agg.Precision1 /= fn agg.Precision5 /= fn agg.Precision10 /= fn agg.Recall5 /= fn agg.Recall10 /= fn agg.Recall50 /= fn agg.MRR /= fn agg.NDCG5 /= fn agg.NDCG10 /= fn agg.MAP /= fn agg.HitRate /= fn } return agg } func countPassFail(results []eval.TestResult, t eval.Thresholds) (passed, failed int) { for _, r := range results { if r.Error != "" || r.Metrics.HitRate < t.HitRate { failed++ } else { passed++ } } return } func min(a, b int) int { if a < b { return a } return b } func checkHealth(url string) error { client := &http.Client{Timeout: 5 * time.Second} resp, err := client.Get(url + "/health") if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("unhealthy: status %d", resp.StatusCode) } return nil } func parseThresholds(s string) eval.Thresholds { t := eval.DefaultThresholds() for _, pair := range strings.Split(s, ",") { parts := strings.SplitN(pair, "=", 2) if len(parts) != 2 { continue } var val float64 fmt.Sscanf(parts[1], "%f", &val) switch parts[0] { case "p10", "precision10": t.Precision10 = val case "mrr": t.MRR = val case "hit", "hitrate": t.HitRate = val } } return t } func createSampleData(url string) error { client := &http.Client{Timeout: 30 * time.Second} // Sample nodes to create cypher := ` CREATE (n1:Memory:Concept {id: 'ml-intro', title: 'Introduction to Machine Learning', content: 'Machine learning is AI that learns from data'}) CREATE (n2:Memory:Concept {id: 'ml-neural', title: 'Neural Networks', content: 'Neural networks are inspired by biological brains'}) CREATE (n3:Memory:Decision {id: 'db-design', title: 'Database Decision', content: 'We chose PostgreSQL for relational and NornicDB for graphs'}) CREATE (n4:Memory:Code {id: 'code-auth', title: 'Auth Middleware', content: 'JWT authentication middleware for API'}) CREATE (n5:Task {id: 'task-api', title: 'REST API', content: 'Implement user management endpoints', status: 'pending'}) ` reqBody := map[string]interface{}{ "statements": []map[string]interface{}{ {"statement": cypher}, }, } body, _ := json.Marshal(reqBody) resp, err := client.Post(url+"/db/neo4j/tx/commit", "application/json", bytes.NewReader(body)) if err != nil { return err } defer resp.Body.Close() return nil } func addDemoTestCases(harness *HTTPHarness) { harness.AddTestCases([]eval.TestCase{ { Name: "ML Concept Search", Query: "machine learning neural networks", Expected: []string{"ml-intro", "ml-neural"}, Tags: []string{"concepts", "ml"}, }, { Name: "Database Decision", Query: "database architecture postgresql", Expected: []string{"db-design"}, Tags: []string{"decisions"}, }, { Name: "Code Search", Query: "authentication JWT middleware", Expected: []string{"code-auth"}, Tags: []string{"code"}, }, { Name: "Task Search", Query: "API implementation pending", Expected: []string{"task-api"}, Tags: []string{"tasks"}, }, { Name: "Semantic - AI Systems", Query: "artificial intelligence systems that learn", Expected: []string{"ml-intro", "ml-neural"}, RelevanceGrades: map[string]int{ "ml-intro": 3, "ml-neural": 2, }, Tags: []string{"semantic"}, }, }) }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

main.go•13 KiB