Dingo MCP Server

Overview Schema Related Servers Score Discussions

dingo
app_gradio

app.py•15.9 KiB

import json import os import pprint import shutil from functools import partial from pathlib import Path import gradio as gr from dingo.config import InputArgs from dingo.exec import Executor from dingo.model import Model def dingo_demo( uploaded_file, dataset_source, data_format, input_path, max_workers, batch_size, column_id, column_prompt, column_content, column_image, rule_list, prompt_list, scene_list, model, key, api_url ): if not data_format: raise gr.Error('ValueError: data_format can not be empty, please input.') if not column_content: raise gr.Error('ValueError: column_content can not be empty, please input.') if not rule_list and not prompt_list: raise gr.Error('ValueError: rule_list and prompt_list can not be empty at the same time.') # Handle input path based on dataset source if dataset_source == "hugging_face": if not input_path: raise gr.Error('ValueError: input_path can not be empty for hugging_face dataset, please input.') final_input_path = input_path else: # local if not uploaded_file: raise gr.Error('Please upload a file for local dataset.') file_base_name = os.path.basename(uploaded_file.name) if not str(file_base_name).endswith(('.jsonl', '.json', '.txt')): raise gr.Error('File format must be \'.jsonl\', \'.json\' or \'.txt\'') final_input_path = uploaded_file.name if max_workers <= 0: raise gr.Error('Please input value > 0 in max_workers.') if batch_size <= 0: raise gr.Error('Please input value > 0 in batch_size.') try: input_data = { "input_path": final_input_path, "output_path": "" if dataset_source == 'hugging_face' else os.path.dirname(final_input_path), "dataset": { "source": dataset_source, "format": data_format, "field": { "content": column_content } }, "executor": { "rule_list": rule_list, "prompt_list": prompt_list, "result_save": { "bad": True, "raw": True }, "max_workers": max_workers, "batch_size": batch_size, }, "evaluator": { "llm_config": { scene_list: { "model": model, "key": key, "api_url": api_url, } } } } if column_id: input_data['dataset']['field']['id'] = column_id if column_prompt: input_data['dataset']['field']['prompt'] = column_prompt if column_image: input_data['dataset']['field']['image'] = column_image # print(input_data) # exit(0) input_args = InputArgs(**input_data) executor = Executor.exec_map["local"](input_args) summary = executor.execute().to_dict() detail = executor.get_bad_info_list() new_detail = [] for item in detail: new_detail.append(item) if summary['output_path']: shutil.rmtree(summary['output_path']) # 返回两个值：概要信息和详细信息 return json.dumps(summary, indent=4), new_detail except Exception as e: raise gr.Error(str(e)) def update_input_components(dataset_source): # 根据数据源的不同，返回不同的输入组件 if dataset_source == "hugging_face": # 如果数据源是huggingface，返回一个可见的文本框和一个不可见的文件组件 return [ gr.Textbox(visible=True), gr.File(visible=False), ] else: # local # 如果数据源是本地，返回一个不可见的文本框和一个可见的文件组件 return [ gr.Textbox(visible=False), gr.File(visible=True), ] def update_rule_list(rule_type_mapping, rule_type): return gr.CheckboxGroup( choices=rule_type_mapping.get(rule_type, []), value=[], label="rule_list" ) def update_prompt_list(scene_prompt_mapping, scene): """根据选择的场景更新可用的prompt列表，并清空所有勾选""" return gr.CheckboxGroup( choices=scene_prompt_mapping.get(scene, []), value=[], # 清空所有勾选 label="prompt_list" ) # prompt_list变化时，动态控制model、key、api_url的显示 def toggle_llm_fields(prompt_values): visible = bool(prompt_values) return ( gr.update(visible=visible), gr.update(visible=visible), gr.update(visible=visible) ) # 控制column_id、column_prompt、column_content、column_image的显示 def update_column_fields(rule_list, prompt_list): rule_type_mapping = get_rule_type_mapping() scene_prompt_mapping = get_scene_prompt_mapping() data_column_mapping = get_data_column_mapping() status_mapping = { 'id': False, 'prompt': False, 'content': False, 'image': False, } res = ( gr.update(visible=status_mapping['id']), gr.update(visible=status_mapping['prompt']), gr.update(visible=status_mapping['content']), gr.update(visible=status_mapping['image']) ) if not rule_list and not prompt_list: return res key_list = [] key_list += get_key_by_mapping(rule_type_mapping, rule_list) key_list += get_key_by_mapping(scene_prompt_mapping, prompt_list) data_column = [] for key in key_list: if not data_column: data_column = data_column_mapping[key] else: new_data_column = data_column_mapping[key] if data_column != new_data_column: raise gr.Error(f'ConflictError: {key} need data type is different from other.') for c in data_column: status_mapping[c] = True res = ( gr.update(visible=status_mapping['id']), gr.update(visible=status_mapping['prompt']), gr.update(visible=status_mapping['content']), gr.update(visible=status_mapping['image']) ) return res def get_rule_type_mapping(): origin_map = Model.get_rule_metric_type_map() process_map = {'Rule-Based TEXT Quality Metrics': []} # can adjust the order for k, v in origin_map.items(): if k in ['QUALITY_BAD_COMPLETENESS', 'QUALITY_BAD_EFFECTIVENESS', 'QUALITY_BAD_FLUENCY', 'QUALITY_BAD_RELEVANCE', 'QUALITY_BAD_SIMILARITY', 'QUALITY_BAD_UNDERSTANDABILITY']: k = 'Rule-Based TEXT Quality Metrics' for r in v: if k not in process_map: process_map[k] = [] process_map[k].append(r.__name__) # print(process_map) return process_map def get_scene_prompt_mapping(): origin_map = Model.get_scenario_prompt_map() process_map = {'LLMTextQualityModelBase': [], 'LLMTextQualityPromptBase': []} # can adjust the order for k, v in origin_map.items(): for p in v: if k not in process_map: process_map[k] = [] process_map[k].append(p.__name__) # print(process_map) return process_map def get_key_by_mapping(map_dict: dict, value_list: list): key_list = [] for k, v in map_dict.items(): if bool(set(v) & set(value_list)): key_list.append(k) return key_list def get_data_column_mapping(): return { # llm 'LLMTextQualityPromptBase': ['content'], 'LLMTextQualityModelBase': ['content'], 'LLMSecurityPolitics': ['content'], 'LLMSecurityProhibition': ['content'], 'LLMText3HHarmless': ['content'], 'LLMText3HHelpful': ['content'], 'LLMText3HHonest': ['content'], 'LLMClassifyTopic': ['content'], 'LLMClassifyQR': ['content'], 'LLMDatamanAssessment': ['content'], 'VLMImageRelevant': ['prompt', 'content'], # rule # 'QUALITY_BAD_COMPLETENESS': ['content'], # 'QUALITY_BAD_EFFECTIVENESS': ['content'], # 'QUALITY_BAD_FLUENCY': ['content'], # 'QUALITY_BAD_RELEVANCE': ['content'], # 'QUALITY_BAD_SIMILARITY': ['content'], # 'QUALITY_BAD_UNDERSTANDABILITY': ['content'], 'Rule-Based TEXT Quality Metrics': ['content'], 'QUALITY_BAD_SECURITY': ['content'], 'QUALITY_BAD_IMG_EFFECTIVENESS': ['image'], 'QUALITY_BAD_IMG_RELEVANCE': ['content', 'image'], 'QUALITY_BAD_IMG_SIMILARITY': ['content'], } if __name__ == '__main__': rule_type_mapping = get_rule_type_mapping() rule_type_options = list(rule_type_mapping.keys()) scene_prompt_mapping = get_scene_prompt_mapping() scene_options = list(scene_prompt_mapping.keys()) current_dir = Path(__file__).parent with open(os.path.join(current_dir, 'header.html'), "r") as file: header = file.read() with gr.Blocks() as demo: gr.HTML(header) with gr.Row(): with gr.Column(): with gr.Column(): dataset_source = gr.Dropdown( choices=["hugging_face", "local"], value="hugging_face", label="dataset [source]" ) input_path = gr.Textbox( value='chupei/format-jsonl', placeholder="please input hugging_face dataset path", label="input_path", visible=True ) uploaded_file = gr.File( label="upload file", visible=False ) data_format = gr.Dropdown( ["jsonl", "json", "plaintext", "listjson"], label="data_format" ) with gr.Row(): max_workers = gr.Number( value=1, # placeholder="", label="max_workers", precision=0 ) batch_size = gr.Number( value=1, # placeholder="", label="batch_size", precision=0 ) # Add the rule_type dropdown near where scene_list is defined rule_type = gr.Dropdown( choices=rule_type_options, value=rule_type_options[0], label="rule_type", interactive=True ) rule_list = gr.CheckboxGroup( choices=rule_type_mapping.get(rule_type_options[0], []), label="rule_list" ) # 添加场景选择下拉框 scene_list = gr.Dropdown( choices=scene_options, value=scene_options[0], label="scenario_list", interactive=True ) prompt_list = gr.CheckboxGroup( choices=scene_prompt_mapping.get(scene_options[0], []), label="prompt_list" ) # LLM模型名 model = gr.Textbox( placeholder="If want to use llm, please input model, such as: deepseek-chat", label="model", visible=False ) # LLM API KEY key = gr.Textbox( placeholder="If want to use llm, please input key, such as: 123456789012345678901234567890xx", label="API KEY", visible=False ) # LLM API URL api_url = gr.Textbox( placeholder="If want to use llm, please input api_url, such as: https://api.deepseek.com/v1", label="API URL", visible=False ) with gr.Row(): # 字段映射说明文本，带示例链接 with gr.Column(): gr.Markdown( "Please input the column name of dataset in the input boxes below ( [examples](https://github.com/MigoXLab/dingo/tree/main/examples) )") column_id = gr.Textbox( value="", placeholder="Column name of id in the input file. If exists multiple levels, use '.' separate", label="column_id", visible=False ) column_prompt = gr.Textbox( value="", placeholder="Column name of prompt in the input file. If exists multiple levels, use '.' separate", label="column_prompt", visible=False ) column_content = gr.Textbox( value="content", placeholder="Column name of content in the input file. If exists multiple levels, use '.' separate", label="column_content", visible=False ) column_image = gr.Textbox( value="", placeholder="Column name of image in the input file. If exists multiple levels, use '.' separate", label="column_image", visible=False ) with gr.Row(): submit_single = gr.Button(value="Submit", interactive=True, variant="primary") with gr.Column(): # 修改输出组件部分，使用Tabs with gr.Tabs(): with gr.Tab("Result Summary"): summary_output = gr.Textbox(label="summary", max_lines=50) with gr.Tab("Result Detail"): detail_output = gr.JSON(label="detail", max_height=800) # 使用JSON组件来更好地展示结构化数据 dataset_source.change( fn=update_input_components, inputs=dataset_source, outputs=[input_path, uploaded_file] ) rule_type.change( fn=partial(update_rule_list, rule_type_mapping), inputs=rule_type, outputs=rule_list ) # 场景变化时更新prompt列表 scene_list.change( fn=partial(update_prompt_list, scene_prompt_mapping), inputs=scene_list, outputs=prompt_list ) prompt_list.change( fn=toggle_llm_fields, inputs=prompt_list, outputs=[model, key, api_url] ) # column字段显示控制 for comp in [rule_list, prompt_list]: comp.change( fn=update_column_fields, inputs=[rule_list, prompt_list], outputs=[column_id, column_prompt, column_content, column_image] ) submit_single.click( fn=dingo_demo, inputs=[ uploaded_file, dataset_source, data_format, input_path, max_workers, batch_size, column_id, column_prompt, column_content, column_image, rule_list, prompt_list, scene_list, model, key, api_url ], outputs=[summary_output, detail_output] # 修改输出为两个组件 ) # 启动界面 demo.launch()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/MigoXLab/dingo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

app.py•15.9 KiB