from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
class BaseEvaluator(ABC):
"""Abstract base class for Evaluator implementations."""
@abstractmethod
def evaluate(
self,
query: str,
retrieved_ids: List[str],
golden_ids: List[str],
trace: Optional[Any] = None,
**kwargs: Any,
) -> Dict[str, float]:
"""
Evaluate retrieval results against golden standard.
Args:
query: The search query.
retrieved_ids: List of document IDs retrieved by the system.
golden_ids: List of expected document IDs (ground truth).
trace: Optional trace context.
**kwargs: Additional arguments for advanced evaluators (e.g. Ragas).
Common kwargs:
- retrieved_texts (List[str]): Content of retrieved chunks.
- golden_texts (List[str]): Content of golden chunks.
- generated_answer (str): Answer generated by the LLM.
Returns:
Dictionary of metric names and their values (e.g., {"hit_rate": 1.0}).
"""
pass