Skip to main content
Glama

Dingo MCP Server

by MigoXLab
dataset_factcheck_evaluation.py3.98 kB
""" Dataset Factuality Evaluation Example This example demonstrates how to use Dingo's factuality evaluation capability for batch evaluation of datasets, particularly useful for: - LLM response validation - RAG system evaluation - SFT data quality assessment """ import os from pathlib import Path from dingo.config import InputArgs from dingo.exec import Executor from dingo.io import Data # Force import factuality evaluation modules from dingo.model.llm.llm_factcheck_public import LLMFactCheckPublic from dingo.model.prompt.prompt_factcheck import PromptFactCheck OPENAI_MODEL = 'deepseek-chat' OPENAI_URL = 'https://api.deepseek.com/v1' OPENAI_KEY = os.getenv("OPENAI_KEY") def evaluate_factuality_jsonl_dataset(): """ Example: Evaluate a JSONL dataset for factuality Expected JSONL format: {"data_id": "1", "prompt": "question", "content": "response"} """ print("=== Dataset Factuality Evaluation ===") input_data = { "input_path": str(Path("test/data/factcheck_test.jsonl")), # Your JSONL file path "output_path": "output/factcheck_evaluation/", "dataset": { "source": "local", "format": "jsonl", "field": { "prompt": "question", # 注意这里使用 question 作为 prompt 字段 "content": "content" } }, "executor": { "eval_group": "factuality", # 使用 factuality 评估组 "result_save": { "bad": True, # 保存不实信息 "good": True # 保存真实信息 } }, "evaluator": { "llm_config": { "LLMFactCheckPublic": { "model": OPENAI_MODEL, "key": OPENAI_KEY, "api_url": OPENAI_URL, } } } } input_args = InputArgs(**input_data) executor = Executor.exec_map["local"](input_args) result = executor.execute() print("\n=== Evaluation Summary ===") print(f"Total processed: {result.total}") print(f"Factual responses: {result.num_good}") print(f"Non-factual responses: {result.num_bad}") print(f"Overall factuality score: {result.score:.2%}") print(f"\nType distribution: {result.type_ratio}") print(f"Name distribution: {result.name_ratio}") def evaluate_single_data_example(): """ Example: Evaluate a single piece of data for factuality This is useful for testing or real-time evaluation """ print("=== Single Data Factuality Evaluation ===") # 配置评估器 evaluator = LLMFactCheckPublic() evaluator.dynamic_config.model = OPENAI_MODEL evaluator.dynamic_config.key = OPENAI_KEY evaluator.dynamic_config.api_url = OPENAI_URL evaluator.dynamic_config.parameters = { "temperature": 0.1, # 降低随机性以提高一致性 "max_tokens": 2000 } # 创建测试数据 test_data = Data( data_id="test_1", prompt="Tell me about Albert Einstein's Nobel Prize.", content="Albert Einstein won the Nobel Prize in Physics in 1921 for his work on the photoelectric effect. However, many people mistakenly think he won it for his theory of relativity, which actually never received a Nobel Prize due to the controversial nature of relativity at the time." ) # 执行评估 result = evaluator.eval(test_data) print("\n=== Evaluation Result ===") print(f"Error Status: {result.error_status}") print(f"Type: {result.type}") print(f"Name: {result.name}") print(f"Reason: {result.reason}") if __name__ == "__main__": print("📊 Dingo Factuality Evaluation Examples") print("=" * 60) print() # Run examples # print("1. Dataset Evaluation Example") # print("-" * 30) # evaluate_factuality_jsonl_dataset() print("2. Single Data Evaluation Example") print("-" * 30) evaluate_single_data_example()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/MigoXLab/dingo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server