MCP Development Framework

{ "sourceFile": "mcp_simple_tool/tools/image_recognition_tool.py", "activeCommit": 0, "commits": [ { "activePatchIndex": 1, "patches": [ { "date": 1741494708460, "content": "Index: \n===================================================================\n--- \n+++ \n" }, { "date": 1741495204420, "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,191 +1,1 @@\n-\"\"\"\n-大模型图像识别工具,用于与大模型API交互进行图像识别\n-\"\"\"\n-\n-import os\n-import json\n-import http.client\n-import traceback\n-from typing import Dict, List, Any, Optional\n-import mcp.types as types\n-from . import BaseTool, ToolRegistry\n-\n-class ImageRecognizer:\n- \"\"\"图像识别器,负责与大模型API交互进行图像识别\"\"\"\n- \n- def __init__(self, base_url: str, api_key: str, model: str = \"gpt-4o\"):\n- \"\"\"\n- 初始化图像识别器\n- \n- Args:\n- base_url: API基础URL,不包含路径\n- api_key: API密钥\n- model: 使用的模型名称\n- \"\"\"\n- self.base_url = base_url\n- self.api_key = api_key\n- self.model = model\n- \n- async def recognize_image(self, image_path: str, prompt: str = \"请详细描述这张图片的内容\") -> Optional[str]:\n- \"\"\"\n- 识别图片内容\n- \n- Args:\n- image_path: 图片路径\n- prompt: 提示词\n- \n- Returns:\n- 识别结果文本,如果失败则返回None\n- \"\"\"\n- try:\n- # 检查图片是否存在\n- if not os.path.exists(image_path):\n- print(f\"错误: 图片不存在: {image_path}\")\n- return None\n- \n- # 构建图片URL(本地文件路径或HTTP URL)\n- if image_path.startswith(('http://', 'https://')):\n- image_url = image_path\n- else:\n- # 如果是本地文件,需要确保它是可访问的URL\n- # 这里假设图片已经被放置在可通过HTTP访问的位置\n- # 实际应用中可能需要上传图片到临时存储服务\n- image_url = f\"file://{os.path.abspath(image_path)}\"\n- \n- # 创建HTTP连接\n- conn = http.client.HTTPSConnection(self.base_url)\n- \n- # 构建请求负载\n- payload = json.dumps({\n- \"model\": self.model,\n- \"stream\": False,\n- \"messages\": [\n- {\n- \"role\": \"user\",\n- \"content\": [\n- {\n- \"type\": \"text\",\n- \"text\": prompt\n- },\n- {\n- \"type\": \"image_url\",\n- \"image_url\": {\n- \"url\": image_url\n- }\n- }\n- ]\n- }\n- ],\n- \"max_tokens\": 400\n- })\n- \n- # 设置请求头\n- headers = {\n- 'Accept': 'application/json',\n- 'Authorization': f'Bearer {self.api_key}',\n- 'Content-Type': 'application/json'\n- }\n- \n- # 发送请求\n- conn.request(\"POST\", \"/v1/chat/completions\", payload, headers)\n- \n- # 获取响应\n- res = conn.getresponse()\n- data = res.read().decode(\"utf-8\")\n- \n- # 解析响应\n- response = json.loads(data)\n- \n- # 提取回复内容\n- if 'choices' in response and len(response['choices']) > 0:\n- content = response['choices'][0]['message']['content']\n- return content\n- else:\n- print(f\"错误: 无法从响应中提取内容: {response}\")\n- return None\n- \n- except Exception as e:\n- print(f\"图像识别过程中出错: {str(e)}\")\n- traceback.print_exc()\n- return None\n-\n-@ToolRegistry.register\n-class ImageRecognitionTool(BaseTool):\n- \"\"\"图像识别工具,用于识别图片内容\"\"\"\n- \n- name = \"image_recognition\"\n- description = \"使用大模型识别图片内容\"\n- input_schema = {\n- \"type\": \"object\",\n- \"required\": [\"image_path\"],\n- \"properties\": {\n- \"image_path\": {\n- \"type\": \"string\",\n- \"description\": \"图片的本地路径或URL\",\n- },\n- \"prompt\": {\n- \"type\": \"string\",\n- \"description\": \"提示词,指导模型如何描述图片\",\n- },\n- \"base_url\": {\n- \"type\": \"string\",\n- \"description\": \"API基础URL,不包含路径\",\n- },\n- \"api_key\": {\n- \"type\": \"string\",\n- \"description\": \"API密钥\",\n- },\n- \"model\": {\n- \"type\": \"string\",\n- \"description\": \"使用的模型名称\",\n- }\n- },\n- }\n- \n- async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n- \"\"\"\n- 执行图像识别\n- \n- Args:\n- arguments: 参数字典,必须包含'image_path'键\n- \n- Returns:\n- 识别结果\n- \"\"\"\n- # 参数验证\n- if \"image_path\" not in arguments:\n- return [types.TextContent(\n- type=\"text\",\n- text=\"错误: 缺少必要参数 'image_path'\"\n- )]\n- \n- # 获取参数\n- image_path = arguments[\"image_path\"]\n- prompt = arguments.get(\"prompt\", \"请详细描述这张图片的内容\")\n- base_url = arguments.get(\"base_url\", os.environ.get(\"LLM_API_BASE_URL\", \"api.openai.com\"))\n- api_key = arguments.get(\"api_key\", os.environ.get(\"LLM_API_KEY\", \"\"))\n- model = arguments.get(\"model\", os.environ.get(\"LLM_MODEL\", \"gpt-4o\"))\n- \n- # 验证API密钥\n- if not api_key:\n- return [types.TextContent(\n- type=\"text\",\n- text=\"错误: 缺少API密钥,请通过参数提供或设置环境变量 LLM_API_KEY\"\n- )]\n- \n- # 创建图像识别器\n- recognizer = ImageRecognizer(base_url, api_key, model)\n- \n- # 执行图像识别\n- result = await recognizer.recognize_image(image_path, prompt)\n- \n- if result:\n- return [types.TextContent(\n- type=\"text\",\n- text=f\"# 图像识别结果\\n\\n{result}\"\n- )]\n- else:\n- return [types.TextContent(\n- type=\"text\",\n- text=\"图像识别失败,请检查图片路径和API配置。\"\n- )] \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n" } ], "date": 1741494708460, "name": "Commit-0", "content": "\"\"\"\n大模型图像识别工具,用于与大模型API交互进行图像识别\n\"\"\"\n\nimport os\nimport json\nimport http.client\nimport traceback\nfrom typing import Dict, List, Any, Optional\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\nclass ImageRecognizer:\n \"\"\"图像识别器,负责与大模型API交互进行图像识别\"\"\"\n \n def __init__(self, base_url: str, api_key: str, model: str = \"gpt-4o\"):\n \"\"\"\n 初始化图像识别器\n \n Args:\n base_url: API基础URL,不包含路径\n api_key: API密钥\n model: 使用的模型名称\n \"\"\"\n self.base_url = base_url\n self.api_key = api_key\n self.model = model\n \n async def recognize_image(self, image_path: str, prompt: str = \"请详细描述这张图片的内容\") -> Optional[str]:\n \"\"\"\n 识别图片内容\n \n Args:\n image_path: 图片路径\n prompt: 提示词\n \n Returns:\n 识别结果文本,如果失败则返回None\n \"\"\"\n try:\n # 检查图片是否存在\n if not os.path.exists(image_path):\n print(f\"错误: 图片不存在: {image_path}\")\n return None\n \n # 构建图片URL(本地文件路径或HTTP URL)\n if image_path.startswith(('http://', 'https://')):\n image_url = image_path\n else:\n # 如果是本地文件,需要确保它是可访问的URL\n # 这里假设图片已经被放置在可通过HTTP访问的位置\n # 实际应用中可能需要上传图片到临时存储服务\n image_url = f\"file://{os.path.abspath(image_path)}\"\n \n # 创建HTTP连接\n conn = http.client.HTTPSConnection(self.base_url)\n \n # 构建请求负载\n payload = json.dumps({\n \"model\": self.model,\n \"stream\": False,\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": prompt\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": image_url\n }\n }\n ]\n }\n ],\n \"max_tokens\": 400\n })\n \n # 设置请求头\n headers = {\n 'Accept': 'application/json',\n 'Authorization': f'Bearer {self.api_key}',\n 'Content-Type': 'application/json'\n }\n \n # 发送请求\n conn.request(\"POST\", \"/v1/chat/completions\", payload, headers)\n \n # 获取响应\n res = conn.getresponse()\n data = res.read().decode(\"utf-8\")\n \n # 解析响应\n response = json.loads(data)\n \n # 提取回复内容\n if 'choices' in response and len(response['choices']) > 0:\n content = response['choices'][0]['message']['content']\n return content\n else:\n print(f\"错误: 无法从响应中提取内容: {response}\")\n return None\n \n except Exception as e:\n print(f\"图像识别过程中出错: {str(e)}\")\n traceback.print_exc()\n return None\n\n@ToolRegistry.register\nclass ImageRecognitionTool(BaseTool):\n \"\"\"图像识别工具,用于识别图片内容\"\"\"\n \n name = \"image_recognition\"\n description = \"使用大模型识别图片内容\"\n input_schema = {\n \"type\": \"object\",\n \"required\": [\"image_path\"],\n \"properties\": {\n \"image_path\": {\n \"type\": \"string\",\n \"description\": \"图片的本地路径或URL\",\n },\n \"prompt\": {\n \"type\": \"string\",\n \"description\": \"提示词,指导模型如何描述图片\",\n },\n \"base_url\": {\n \"type\": \"string\",\n \"description\": \"API基础URL,不包含路径\",\n },\n \"api_key\": {\n \"type\": \"string\",\n \"description\": \"API密钥\",\n },\n \"model\": {\n \"type\": \"string\",\n \"description\": \"使用的模型名称\",\n }\n },\n }\n \n async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n \"\"\"\n 执行图像识别\n \n Args:\n arguments: 参数字典,必须包含'image_path'键\n \n Returns:\n 识别结果\n \"\"\"\n # 参数验证\n if \"image_path\" not in arguments:\n return [types.TextContent(\n type=\"text\",\n text=\"错误: 缺少必要参数 'image_path'\"\n )]\n \n # 获取参数\n image_path = arguments[\"image_path\"]\n prompt = arguments.get(\"prompt\", \"请详细描述这张图片的内容\")\n base_url = arguments.get(\"base_url\", os.environ.get(\"LLM_API_BASE_URL\", \"api.openai.com\"))\n api_key = arguments.get(\"api_key\", os.environ.get(\"LLM_API_KEY\", \"\"))\n model = arguments.get(\"model\", os.environ.get(\"LLM_MODEL\", \"gpt-4o\"))\n \n # 验证API密钥\n if not api_key:\n return [types.TextContent(\n type=\"text\",\n text=\"错误: 缺少API密钥,请通过参数提供或设置环境变量 LLM_API_KEY\"\n )]\n \n # 创建图像识别器\n recognizer = ImageRecognizer(base_url, api_key, model)\n \n # 执行图像识别\n result = await recognizer.recognize_image(image_path, prompt)\n \n if result:\n return [types.TextContent(\n type=\"text\",\n text=f\"# 图像识别结果\\n\\n{result}\"\n )]\n else:\n return [types.TextContent(\n type=\"text\",\n text=\"图像识别失败,请检查图片路径和API配置。\"\n )] " } ] }