"""Evaluation module for semantic search quality."""
from .metrics import (
AggregatedMetrics,
AggregatedStabilityMetrics,
FileMatchDetail,
MetricStability,
QueryMetrics,
QueryStabilityMetrics,
TokenStats,
aggregate_metrics,
aggregate_query_runs,
aggregate_stability_metrics,
calculate_f1,
calculate_metric_stability,
calculate_precision,
calculate_recall,
)
from .run_eval import (
Example,
ExpectedItem,
evaluate_example,
evaluate_example_multiple_runs,
load_dataset,
main,
main_with_stability,
)
__all__ = [
# Metrics classes
"AggregatedMetrics",
"AggregatedStabilityMetrics",
"FileMatchDetail",
"MetricStability",
"QueryMetrics",
"QueryStabilityMetrics",
"TokenStats",
# Metrics functions
"aggregate_metrics",
"aggregate_query_runs",
"aggregate_stability_metrics",
"calculate_f1",
"calculate_metric_stability",
"calculate_precision",
"calculate_recall",
# Run eval classes
"Example",
"ExpectedItem",
# Run eval functions
"evaluate_example",
"evaluate_example_multiple_runs",
"load_dataset",
"main",
"main_with_stability",
]