Skip to main content
Glama

EntityIdentification

by u3588064
main.py3.55 kB
from flask import Flask, request, jsonify from mcp.server import FastMCP import google.generativeai as genai import json import re from typing import Dict, Any # Initialize Flask app flask_app = Flask(__name__) # Configure logging logHandler = logging.StreamHandler() formatter = jsonlogger.JsonFormatter() logHandler.setFormatter(formatter) app.logger.addHandler(logHandler) app.logger.setLevel(logging.INFO) # Suppress the default Flask logger log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) # Initialize FastMCP server mcp_server = FastMCP('entity-comparison') def normalize_text(text: str) -> str: """ 标准化文本:转换为小写、去除标点符号、归一化空白字符。 """ text = str(text) text = text.lower() text = re.sub(r'[^\w\s]', '', text) text = re.sub(r'\s+', ' ', text).strip() return text def compare_values(val1: Any, val2: Any) -> tuple[bool, bool]: """ 对比两个 value: - exact_equal:直接比较原始数据 - semantic_equal:比较标准化后的文本 """ if not isinstance(val1, list): val1 = [val1] if not isinstance(val2, list): val2 = [val2] exact_equal = (val1 == val2) norm1 = [normalize_text(item) for item in val1] norm2 = [normalize_text(item) for item in val2] semantic_equal = (sorted(norm1) == sorted(norm2)) return exact_equal, semantic_equal @mcp_server.tool() async def compare_entities(json1: Dict[str, Any], json2: Dict[str, Any], api_key: str) -> Dict[str, Any]: """ 比较两个实体的JSON数据,使用Gemini进行语义相似度分析 """ # Configure Gemini API genai.configure(api_key=api_key) # Set safety settings safety_settings = [ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}, ] # Initialize models comparison_model = genai.GenerativeModel("gemini-2.0-flash-exp") final_model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp") # Compare individual fields comparison_results = {} system_instruction = '请判断这个两个要素在语义层面是否一致,直接返回true或者false' for key in json1: if key not in json2: comparison_results[key] = {"error": "在第二份数据中未找到该 key"} continue exact_equal, semantic_equal = compare_values(json1[key], json2[key]) # Get LLM comparison llm_result = comparison_model.generate_content( system_instruction + ":" + str(json1[key]) + "\n" + str(json2[key]) ) comparison_results[key] = { "exact_equal": exact_equal, "semantic_equal": semantic_equal, "LLM_equal": llm_result.text, "value1": json1[key], "value2": json2[key], } # Get final analysis final_result = final_model.generate_content( "综合这些信息,你认为可以判断两个数据来自同一主体吗?" + json.dumps(comparison_results, ensure_ascii=False, indent=4) ) return { "field_comparisons": comparison_results, "final_analysis": final_result.text } if __name__ == "__main__": # Run Flask app mcp_server.run()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/u3588064/Entity-Resolution'

If you have feedback or need assistance with the MCP directory API, please join our Discord server