Skip to main content
Glama

Dingo MCP Server

by MigoXLab
dataset_hallucination_evaluation.py7.4 kB
""" Dataset Hallucination Evaluation Example This example demonstrates how to use Dingo's hallucination detection capability for batch evaluation of datasets, particularly useful for: - RAG system evaluation - LLM response validation - SFT data quality assessment """ from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor # Force import hallucination detection modules from dingo.model.llm.llm_hallucination import LLMHallucination from dingo.model.prompt.prompt_hallucination import PromptHallucination from dingo.model.rule.rule_hallucination_hhem import RuleHallucinationHHEM def evaluate_hallucination_jsonl_dataset(): """ Example 1: Evaluate a JSONL dataset for hallucinations Expected JSONL format: {"data_id": "1", "prompt": "question", "content": "response", "context": ["context1", "context2"]} """ print("=== Example 1: JSONL Dataset Evaluation ===") input_data = { "input_path": str(Path("test/data/hallucination_test.jsonl")), # Your JSONL file path "output_path": "output/hallucination_evaluation/", "dataset": { "source": "local", "format": "jsonl", "field": { "prompt": "prompt", "content": "content", "context": "context", } }, "executor": { "prompt_list": ["PromptHallucination"], "result_save": { "bad": True } }, "evaluator": { "llm_config": { "LLMHallucination": { "model": "deepseek-chat", "key": "Your API Key", "api_url": "https://api.deepseek.com/v1" } } } } input_args = InputArgs(**input_data) executor = Executor.exec_map["local"](input_args) result = executor.execute() print(result) def evaluate_hallucination_with_hhem_rule(): """ Example 2: Evaluate hallucinations using RuleHallucinationHHEM (Local HHEM model) RuleHallucinationHHEM uses Vectara's HHEM-2.1-Open model for local inference: - Superior performance compared to GPT-3.5/GPT-4 on benchmarks - Local inference with <600MB RAM usage - Fast processing (~1.5s for 2k tokens on modern CPU) - No API costs or rate limits """ print("=== Example 2: HHEM Rule-Based Evaluation ===") input_data = { "input_path": str(Path("test/data/hallucination_test.jsonl")), "output_path": "output/hhem_evaluation/", "dataset": { "source": "local", "format": "jsonl", "field": { "prompt": "prompt", "content": "content", "context": "context", } }, "executor": { "rule_list": ["RuleHallucinationHHEM"], # Use HHEM rule instead of LLM "result_save": { "bad": True, "good": True # Also save good examples for comparison } }, "evaluator": { "rule_config": { "RuleHallucinationHHEM": { "threshold": 0.8 # Default threshold (0.0-1.0, higher = more strict) } } } } input_args = InputArgs(**input_data) executor = Executor.exec_map["local"](input_args) result = executor.execute() print(result) def evaluate_combined_llm_and_hhem(): """ Example 3: Combined evaluation using both LLM and HHEM for comprehensive analysis """ print("=== Example 3: Combined LLM + HHEM Evaluation ===") input_data = { "input_path": str(Path("test/data/hallucination_test.jsonl")), "output_path": "output/combined_evaluation/", "dataset": { "source": "local", "format": "jsonl", "field": { "prompt": "prompt", "content": "content", "context": "context", } }, "executor": { "rule_list": ["RuleHallucinationHHEM"], # Local HHEM rule "prompt_list": ["PromptHallucination"], # LLM-based evaluation "result_save": { "bad": True, "good": True } }, "evaluator": { "rule_config": { "RuleHallucinationHHEM": { "threshold": 0.5 # HHEM threshold } }, "llm_config": { "LLMHallucination": { "model": "deepseek-chat", "key": "Your API Key", "api_url": "https://api.deepseek.com/v1" } } } } input_args = InputArgs(**input_data) executor = Executor.exec_map["local"](input_args) result = executor.execute() print(result) def create_sample_test_data(): """ Helper function to create sample test data for demonstration """ import json import os # Create test directory os.makedirs("test_data", exist_ok=True) # Sample hallucination test data hallucination_samples = [ { "data_id": "1", "prompt": "When did Einstein win the Nobel Prize?", "content": "Einstein won the Nobel Prize in 1969 for his discovery of the photoelectric effect.", "context": ["Einstein won the Nobel Prize in 1921.", "The prize was for his work on the photoelectric effect."] }, { "data_id": "2", "prompt": "What is the capital of Japan?", "content": "The capital of Japan is Tokyo, which is located on the eastern coast of Honshu island.", "context": ["Tokyo is the capital of Japan.", "Tokyo is located on Honshu island."] }, { "data_id": "3", "prompt": "How many continents are there?", "content": "There are 8 continents in the world including Asia, Europe, North America, South America, Africa, Australia, Antarctica, and Atlantis.", "context": ["There are 7 continents.", "The continents are Asia, Europe, North America, South America, Africa, Australia, and Antarctica."] } ] # Write to JSONL file with open("test/data/hallucination_test.jsonl", "w", encoding="utf-8") as f: for sample in hallucination_samples: f.write(json.dumps(sample, ensure_ascii=False) + "\n") print("✅ Sample test data created in test_data/hallucination_test.jsonl") if __name__ == "__main__": # Create sample data first (if needed) # create_sample_test_data() # Commented out - using pre-built test data print() # Run examples (comment out if you don't have actual data) # evaluate_hallucination_jsonl_dataset() evaluate_hallucination_with_hhem_rule() # evaluate_combined_llm_and_hhem() # Uncomment to test combined approach print("💡 Usage Tips:") print("- Use lower thresholds (0.2-0.3) for sensitive hallucination detection") print("- Use higher thresholds (0.6-0.8) for more permissive evaluation") print("- Combine with other quality metrics for comprehensive assessment") print("- Use parallel processing (max_workers) for large datasets") print("- Check output files for detailed per-item analysis") print()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/MigoXLab/dingo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server