EntityIdentification

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

Entity-Resolution

main.py•3.55 kB

from flask import Flask, request, jsonify from mcp.server import FastMCP import google.generativeai as genai import json import re from typing import Dict, Any # Initialize Flask app flask_app = Flask(__name__) # Configure logging logHandler = logging.StreamHandler() formatter = jsonlogger.JsonFormatter() logHandler.setFormatter(formatter) app.logger.addHandler(logHandler) app.logger.setLevel(logging.INFO) # Suppress the default Flask logger log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) # Initialize FastMCP server mcp_server = FastMCP('entity-comparison') def normalize_text(text: str) -> str: """ 标准化文本：转换为小写、去除标点符号、归一化空白字符。 """ text = str(text) text = text.lower() text = re.sub(r'[^\w\s]', '', text) text = re.sub(r'\s+', ' ', text).strip() return text def compare_values(val1: Any, val2: Any) -> tuple[bool, bool]: """ 对比两个 value： - exact_equal：直接比较原始数据 - semantic_equal：比较标准化后的文本 """ if not isinstance(val1, list): val1 = [val1] if not isinstance(val2, list): val2 = [val2] exact_equal = (val1 == val2) norm1 = [normalize_text(item) for item in val1] norm2 = [normalize_text(item) for item in val2] semantic_equal = (sorted(norm1) == sorted(norm2)) return exact_equal, semantic_equal @mcp_server.tool() async def compare_entities(json1: Dict[str, Any], json2: Dict[str, Any], api_key: str) -> Dict[str, Any]: """ 比较两个实体的JSON数据，使用Gemini进行语义相似度分析 """ # Configure Gemini API genai.configure(api_key=api_key) # Set safety settings safety_settings = [ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, ] # Initialize models comparison_model = genai.GenerativeModel("gemini-2.0-flash-exp") final_model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp") # Compare individual fields comparison_results = {} system_instruction = '请判断这个两个要素在语义层面是否一致，直接返回true或者false' for key in json1: if key not in json2: comparison_results[key] = {"error": "在第二份数据中未找到该 key"} continue exact_equal, semantic_equal = compare_values(json1[key], json2[key]) # Get LLM comparison llm_result = comparison_model.generate_content( system_instruction + ":" + str(json1[key]) + "\n" + str(json2[key]) ) comparison_results[key] = { "exact_equal": exact_equal, "semantic_equal": semantic_equal, "LLM_equal": llm_result.text, "value1": json1[key], "value2": json2[key], } # Get final analysis final_result = final_model.generate_content( "综合这些信息，你认为可以判断两个数据来自同一主体吗？" + json.dumps(comparison_results, ensure_ascii=False, indent=4) ) return { "field_comparisons": comparison_results, "final_analysis": final_result.text } if __name__ == "__main__": # Run Flask app mcp_server.run()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/u3588064/Entity-Resolution'

If you have feedback or need assistance with the MCP directory API, please join our Discord server