Modular RAG MCP Server

ragas_evaluator.py•4.33 KiB

from typing import Any, Dict, List, Optional import logging try: from ragas import evaluate as ragas_evaluate from ragas.metrics import ( context_precision, context_recall, faithfulness, answer_relevancy, ) from datasets import Dataset RAGAS_AVAILABLE = True except ImportError: RAGAS_AVAILABLE = False # Placeholders to prevent NameError in __init__ before check ragas_evaluate = None context_precision = None context_recall = None faithfulness = None answer_relevancy = None Dataset = None from src.libs.evaluator.base_evaluator import BaseEvaluator logger = logging.getLogger(__name__) class RagasEvaluator(BaseEvaluator): """ Evaluator implementation using Ragas library. Supports metrics: context_precision, context_recall, faithfulness, answer_relevancy. """ def __init__(self, metrics: Optional[List[str]] = None): super().__init__() if not RAGAS_AVAILABLE: raise ImportError("ragas package is not installed. Please install it with `pip install ragas`.") self.metrics_map = { "context_precision": context_precision, "context_recall": context_recall, "faithfulness": faithfulness, "answer_relevancy": answer_relevancy, } self.selected_metrics = [] if metrics: for m in metrics: if m in self.metrics_map: self.selected_metrics.append(self.metrics_map[m]) else: logger.warning(f"Metric {m} not supported by RagasEvaluator. Skipping.") if not self.selected_metrics: # Default to all if not specified self.selected_metrics = list(self.metrics_map.values()) def evaluate( self, query: str, retrieved_ids: List[str], golden_ids: List[str], trace: Optional[Any] = None, **kwargs: Any, ) -> Dict[str, float]: """ Run Ragas evaluation. Requires the following kwargs: - retrieved_texts (List[str]): Content of retrieved chunks. - generated_answer (str): Answer generated by the LLM (required for faithfulness, answer_relevancy). - golden_answer (str): Ground truth answer (required for context_recall). Note: retrieved_ids and golden_ids are ignored by Ragas metrics, but kept for interface compatibility. """ retrieved_texts = kwargs.get("retrieved_texts", []) generated_answer = kwargs.get("generated_answer", "") golden_answer = kwargs.get("golden_answer", "") # Prepare dataset data = { "question": [query], "contexts": [retrieved_texts], } if generated_answer: data["answer"] = [generated_answer] if golden_answer: data["ground_truth"] = [golden_answer] # Filter metrics based on available data active_metrics = [] for metric in self.selected_metrics: # Check requirements for each metric # This is a simplification; Ragas raises errors if columns missing if metric.name in ["faithfulness", "answer_relevancy"] and not generated_answer: logger.warning(f"Skipping {metric.name} due to missing generated_answer.") continue if metric.name in ["context_recall"] and not golden_answer: logger.warning(f"Skipping {metric.name} due to missing golden_answer.") continue if metric.name in ["context_precision"] and not golden_answer: # context_precision typically needs ground_truth (or at least relevant chunks) logger.warning(f"Skipping {metric.name} due to missing golden_answer.") continue active_metrics.append(metric) if not active_metrics: logger.warning("No applicable metrics for the provided data.") return {} try: dataset = Dataset.from_dict(data) results = ragas_evaluate(dataset=dataset, metrics=active_metrics) return dict(results) except Exception as e: logger.error(f"Ragas evaluation failed: {e}") return {}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yj-liuzepeng/rag-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ragas_evaluator.py•4.33 KiB