Skip to main content
Glama
orneryd

M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

by orneryd
reporter.go7.41 kB
package eval import ( "encoding/json" "fmt" "io" "os" "strings" "time" ) // Reporter formats and outputs evaluation results. type Reporter struct { writer io.Writer } // NewReporter creates a new reporter that writes to the given writer. func NewReporter(w io.Writer) *Reporter { if w == nil { w = os.Stdout } return &Reporter{writer: w} } // PrintSummary prints a human-readable summary of results. func (r *Reporter) PrintSummary(result *EvalResult) { w := r.writer fmt.Fprintln(w) fmt.Fprintln(w, "╔════════════════════════════════════════════════════════════════╗") fmt.Fprintln(w, "║ NornicDB Search Evaluation Results ║") fmt.Fprintln(w, "╚════════════════════════════════════════════════════════════════╝") fmt.Fprintln(w) // Summary fmt.Fprintf(w, "📊 Suite: %s\n", result.SuiteName) fmt.Fprintf(w, "📅 Time: %s\n", result.Timestamp.Format(time.RFC3339)) fmt.Fprintf(w, "⏱️ Duration: %v\n", result.Duration.Round(time.Millisecond)) fmt.Fprintln(w) // Pass/Fail summary passRate := float64(result.PassedTests) / float64(result.TotalTests) * 100 statusIcon := "✅" if result.FailedTests > 0 { statusIcon = "⚠️" } if passRate < 50 { statusIcon = "❌" } fmt.Fprintf(w, "%s Tests: %d/%d passed (%.1f%%)\n", statusIcon, result.PassedTests, result.TotalTests, passRate) fmt.Fprintln(w) // Aggregate metrics fmt.Fprintln(w, "┌─────────────────────────────────────────────────────────────────┐") fmt.Fprintln(w, "│ Aggregate Metrics │") fmt.Fprintln(w, "├─────────────────────────────────────────────────────────────────┤") r.printMetricRow(w, "Precision@1", result.Aggregate.Precision1, -1) r.printMetricRow(w, "Precision@5", result.Aggregate.Precision5, -1) r.printMetricRow(w, "Precision@10", result.Aggregate.Precision10, result.Thresholds.Precision10) fmt.Fprintln(w, "├─────────────────────────────────────────────────────────────────┤") r.printMetricRow(w, "Recall@5", result.Aggregate.Recall5, -1) r.printMetricRow(w, "Recall@10", result.Aggregate.Recall10, result.Thresholds.Recall10) r.printMetricRow(w, "Recall@50", result.Aggregate.Recall50, -1) fmt.Fprintln(w, "├─────────────────────────────────────────────────────────────────┤") r.printMetricRow(w, "MRR", result.Aggregate.MRR, result.Thresholds.MRR) r.printMetricRow(w, "NDCG@5", result.Aggregate.NDCG5, -1) r.printMetricRow(w, "NDCG@10", result.Aggregate.NDCG10, result.Thresholds.NDCG10) r.printMetricRow(w, "MAP", result.Aggregate.MAP, -1) fmt.Fprintln(w, "├─────────────────────────────────────────────────────────────────┤") r.printMetricRow(w, "Hit Rate", result.Aggregate.HitRate, result.Thresholds.HitRate) fmt.Fprintln(w, "└─────────────────────────────────────────────────────────────────┘") fmt.Fprintln(w) } // printMetricRow prints a single metric row with optional threshold comparison. func (r *Reporter) printMetricRow(w io.Writer, name string, value float64, threshold float64) { bar := r.progressBar(value, 20) status := " " if threshold >= 0 { if value >= threshold { status = "✓" } else { status = "✗" } } threshStr := "" if threshold >= 0 { threshStr = fmt.Sprintf(" (target: %.2f)", threshold) } fmt.Fprintf(w, "│ %s %-14s %s %.3f%s\n", status, name, bar, value, threshStr) } // progressBar creates a visual progress bar. func (r *Reporter) progressBar(value float64, width int) string { filled := int(value * float64(width)) if filled > width { filled = width } if filled < 0 { filled = 0 } bar := strings.Repeat("█", filled) + strings.Repeat("░", width-filled) return fmt.Sprintf("[%s]", bar) } // PrintDetails prints detailed per-test results. func (r *Reporter) PrintDetails(result *EvalResult) { w := r.writer fmt.Fprintln(w) fmt.Fprintln(w, "┌─────────────────────────────────────────────────────────────────┐") fmt.Fprintln(w, "│ Per-Test Results │") fmt.Fprintln(w, "└─────────────────────────────────────────────────────────────────┘") fmt.Fprintln(w) for i, tr := range result.Results { status := "✅" if tr.Error != "" { status = "❌" } else if tr.Metrics.HitRate == 0 { status = "⚠️" } fmt.Fprintf(w, "%s Test %d: %s\n", status, i+1, tr.TestCase.Name) fmt.Fprintf(w, " Query: %q\n", truncate(tr.TestCase.Query, 50)) fmt.Fprintf(w, " Method: %s | Duration: %v\n", tr.SearchMethod, tr.Duration.Round(time.Microsecond)) if tr.Error != "" { fmt.Fprintf(w, " Error: %s\n", tr.Error) } else { fmt.Fprintf(w, " P@10: %.2f | R@10: %.2f | MRR: %.2f | NDCG@10: %.2f\n", tr.Metrics.Precision10, tr.Metrics.Recall10, tr.Metrics.MRR, tr.Metrics.NDCG10) fmt.Fprintf(w, " Expected: %d | Returned: %d | Hits: %.0f\n", len(tr.TestCase.Expected), len(tr.Returned), tr.Metrics.HitRate) } fmt.Fprintln(w) } } // PrintJSON outputs results as JSON. func (r *Reporter) PrintJSON(result *EvalResult) error { encoder := json.NewEncoder(r.writer) encoder.SetIndent("", " ") return encoder.Encode(result) } // SaveJSON saves results to a JSON file. func (r *Reporter) SaveJSON(result *EvalResult, path string) error { file, err := os.Create(path) if err != nil { return fmt.Errorf("failed to create file: %w", err) } defer file.Close() encoder := json.NewEncoder(file) encoder.SetIndent("", " ") return encoder.Encode(result) } // PrintCompact prints a one-line summary. func (r *Reporter) PrintCompact(result *EvalResult) { status := "PASS" if result.FailedTests > 0 { status = "FAIL" } fmt.Fprintf(r.writer, "[%s] %d/%d tests | P@10=%.2f R@10=%.2f MRR=%.2f NDCG=%.2f HitRate=%.2f | %v\n", status, result.PassedTests, result.TotalTests, result.Aggregate.Precision10, result.Aggregate.Recall10, result.Aggregate.MRR, result.Aggregate.NDCG10, result.Aggregate.HitRate, result.Duration.Round(time.Millisecond), ) } func truncate(s string, max int) string { if len(s) <= max { return s } return s[:max-3] + "..." }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server