MCP Development Framework

MIT License
OverviewInspectSchema Related Servers Reviews Score
{
    "sourceFile": "mcp_simple_tool/tools/image_recognition_tool.py",
    "activeCommit": 0,
    "commits": [
        {
            "activePatchIndex": 1,
            "patches": [
                {
                    "date": 1741494708460,
                    "content": "Index: \n===================================================================\n--- \n+++ \n"
                },
                {
                    "date": 1741495204420,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,191 +1,1 @@\n-\"\"\"\n-大模型图像识别工具，用于与大模型API交互进行图像识别\n-\"\"\"\n-\n-import os\n-import json\n-import http.client\n-import traceback\n-from typing import Dict, List, Any, Optional\n-import mcp.types as types\n-from . import BaseTool, ToolRegistry\n-\n-class ImageRecognizer:\n-    \"\"\"图像识别器，负责与大模型API交互进行图像识别\"\"\"\n-    \n-    def __init__(self, base_url: str, api_key: str, model: str = \"gpt-4o\"):\n-        \"\"\"\n-        初始化图像识别器\n-        \n-        Args:\n-            base_url: API基础URL，不包含路径\n-            api_key: API密钥\n-            model: 使用的模型名称\n-        \"\"\"\n-        self.base_url = base_url\n-        self.api_key = api_key\n-        self.model = model\n-    \n-    async def recognize_image(self, image_path: str, prompt: str = \"请详细描述这张图片的内容\") -> Optional[str]:\n-        \"\"\"\n-        识别图片内容\n-        \n-        Args:\n-            image_path: 图片路径\n-            prompt: 提示词\n-            \n-        Returns:\n-            识别结果文本，如果失败则返回None\n-        \"\"\"\n-        try:\n-            # 检查图片是否存在\n-            if not os.path.exists(image_path):\n-                print(f\"错误: 图片不存在: {image_path}\")\n-                return None\n-                \n-            # 构建图片URL（本地文件路径或HTTP URL）\n-            if image_path.startswith(('http://', 'https://')):\n-                image_url = image_path\n-            else:\n-                # 如果是本地文件，需要确保它是可访问的URL\n-                # 这里假设图片已经被放置在可通过HTTP访问的位置\n-                # 实际应用中可能需要上传图片到临时存储服务\n-                image_url = f\"file://{os.path.abspath(image_path)}\"\n-            \n-            # 创建HTTP连接\n-            conn = http.client.HTTPSConnection(self.base_url)\n-            \n-            # 构建请求负载\n-            payload = json.dumps({\n-                \"model\": self.model,\n-                \"stream\": False,\n-                \"messages\": [\n-                    {\n-                        \"role\": \"user\",\n-                        \"content\": [\n-                            {\n-                                \"type\": \"text\",\n-                                \"text\": prompt\n-                            },\n-                            {\n-                                \"type\": \"image_url\",\n-                                \"image_url\": {\n-                                    \"url\": image_url\n-                                }\n-                            }\n-                        ]\n-                    }\n-                ],\n-                \"max_tokens\": 400\n-            })\n-            \n-            # 设置请求头\n-            headers = {\n-                'Accept': 'application/json',\n-                'Authorization': f'Bearer {self.api_key}',\n-                'Content-Type': 'application/json'\n-            }\n-            \n-            # 发送请求\n-            conn.request(\"POST\", \"/v1/chat/completions\", payload, headers)\n-            \n-            # 获取响应\n-            res = conn.getresponse()\n-            data = res.read().decode(\"utf-8\")\n-            \n-            # 解析响应\n-            response = json.loads(data)\n-            \n-            # 提取回复内容\n-            if 'choices' in response and len(response['choices']) > 0:\n-                content = response['choices'][0]['message']['content']\n-                return content\n-            else:\n-                print(f\"错误: 无法从响应中提取内容: {response}\")\n-                return None\n-                \n-        except Exception as e:\n-            print(f\"图像识别过程中出错: {str(e)}\")\n-            traceback.print_exc()\n-            return None\n-\n-@ToolRegistry.register\n-class ImageRecognitionTool(BaseTool):\n-    \"\"\"图像识别工具，用于识别图片内容\"\"\"\n-    \n-    name = \"image_recognition\"\n-    description = \"使用大模型识别图片内容\"\n-    input_schema = {\n-        \"type\": \"object\",\n-        \"required\": [\"image_path\"],\n-        \"properties\": {\n-            \"image_path\": {\n-                \"type\": \"string\",\n-                \"description\": \"图片的本地路径或URL\",\n-            },\n-            \"prompt\": {\n-                \"type\": \"string\",\n-                \"description\": \"提示词，指导模型如何描述图片\",\n-            },\n-            \"base_url\": {\n-                \"type\": \"string\",\n-                \"description\": \"API基础URL，不包含路径\",\n-            },\n-            \"api_key\": {\n-                \"type\": \"string\",\n-                \"description\": \"API密钥\",\n-            },\n-            \"model\": {\n-                \"type\": \"string\",\n-                \"description\": \"使用的模型名称\",\n-            }\n-        },\n-    }\n-    \n-    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"\n-        执行图像识别\n-        \n-        Args:\n-            arguments: 参数字典，必须包含'image_path'键\n-            \n-        Returns:\n-            识别结果\n-        \"\"\"\n-        # 参数验证\n-        if \"image_path\" not in arguments:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=\"错误: 缺少必要参数 'image_path'\"\n-            )]\n-        \n-        # 获取参数\n-        image_path = arguments[\"image_path\"]\n-        prompt = arguments.get(\"prompt\", \"请详细描述这张图片的内容\")\n-        base_url = arguments.get(\"base_url\", os.environ.get(\"LLM_API_BASE_URL\", \"api.openai.com\"))\n-        api_key = arguments.get(\"api_key\", os.environ.get(\"LLM_API_KEY\", \"\"))\n-        model = arguments.get(\"model\", os.environ.get(\"LLM_MODEL\", \"gpt-4o\"))\n-        \n-        # 验证API密钥\n-        if not api_key:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=\"错误: 缺少API密钥，请通过参数提供或设置环境变量 LLM_API_KEY\"\n-            )]\n-        \n-        # 创建图像识别器\n-        recognizer = ImageRecognizer(base_url, api_key, model)\n-        \n-        # 执行图像识别\n-        result = await recognizer.recognize_image(image_path, prompt)\n-        \n-        if result:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=f\"# 图像识别结果\\n\\n{result}\"\n-            )]\n-        else:\n-            return [types.TextContent(\n-                type=\"text\",\n-                text=\"图像识别失败，请检查图片路径和API配置。\"\n-            )] \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
                }
            ],
            "date": 1741494708460,
            "name": "Commit-0",
            "content": "\"\"\"\n大模型图像识别工具，用于与大模型API交互进行图像识别\n\"\"\"\n\nimport os\nimport json\nimport http.client\nimport traceback\nfrom typing import Dict, List, Any, Optional\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\nclass ImageRecognizer:\n    \"\"\"图像识别器，负责与大模型API交互进行图像识别\"\"\"\n    \n    def __init__(self, base_url: str, api_key: str, model: str = \"gpt-4o\"):\n        \"\"\"\n        初始化图像识别器\n        \n        Args:\n            base_url: API基础URL，不包含路径\n            api_key: API密钥\n            model: 使用的模型名称\n        \"\"\"\n        self.base_url = base_url\n        self.api_key = api_key\n        self.model = model\n    \n    async def recognize_image(self, image_path: str, prompt: str = \"请详细描述这张图片的内容\") -> Optional[str]:\n        \"\"\"\n        识别图片内容\n        \n        Args:\n            image_path: 图片路径\n            prompt: 提示词\n            \n        Returns:\n            识别结果文本，如果失败则返回None\n        \"\"\"\n        try:\n            # 检查图片是否存在\n            if not os.path.exists(image_path):\n                print(f\"错误: 图片不存在: {image_path}\")\n                return None\n                \n            # 构建图片URL（本地文件路径或HTTP URL）\n            if image_path.startswith(('http://', 'https://')):\n                image_url = image_path\n            else:\n                # 如果是本地文件，需要确保它是可访问的URL\n                # 这里假设图片已经被放置在可通过HTTP访问的位置\n                # 实际应用中可能需要上传图片到临时存储服务\n                image_url = f\"file://{os.path.abspath(image_path)}\"\n            \n            # 创建HTTP连接\n            conn = http.client.HTTPSConnection(self.base_url)\n            \n            # 构建请求负载\n            payload = json.dumps({\n                \"model\": self.model,\n                \"stream\": False,\n                \"messages\": [\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\n                                \"type\": \"text\",\n                                \"text\": prompt\n                            },\n                            {\n                                \"type\": \"image_url\",\n                                \"image_url\": {\n                                    \"url\": image_url\n                                }\n                            }\n                        ]\n                    }\n                ],\n                \"max_tokens\": 400\n            })\n            \n            # 设置请求头\n            headers = {\n                'Accept': 'application/json',\n                'Authorization': f'Bearer {self.api_key}',\n                'Content-Type': 'application/json'\n            }\n            \n            # 发送请求\n            conn.request(\"POST\", \"/v1/chat/completions\", payload, headers)\n            \n            # 获取响应\n            res = conn.getresponse()\n            data = res.read().decode(\"utf-8\")\n            \n            # 解析响应\n            response = json.loads(data)\n            \n            # 提取回复内容\n            if 'choices' in response and len(response['choices']) > 0:\n                content = response['choices'][0]['message']['content']\n                return content\n            else:\n                print(f\"错误: 无法从响应中提取内容: {response}\")\n                return None\n                \n        except Exception as e:\n            print(f\"图像识别过程中出错: {str(e)}\")\n            traceback.print_exc()\n            return None\n\n@ToolRegistry.register\nclass ImageRecognitionTool(BaseTool):\n    \"\"\"图像识别工具，用于识别图片内容\"\"\"\n    \n    name = \"image_recognition\"\n    description = \"使用大模型识别图片内容\"\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"image_path\"],\n        \"properties\": {\n            \"image_path\": {\n                \"type\": \"string\",\n                \"description\": \"图片的本地路径或URL\",\n            },\n            \"prompt\": {\n                \"type\": \"string\",\n                \"description\": \"提示词，指导模型如何描述图片\",\n            },\n            \"base_url\": {\n                \"type\": \"string\",\n                \"description\": \"API基础URL，不包含路径\",\n            },\n            \"api_key\": {\n                \"type\": \"string\",\n                \"description\": \"API密钥\",\n            },\n            \"model\": {\n                \"type\": \"string\",\n                \"description\": \"使用的模型名称\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        执行图像识别\n        \n        Args:\n            arguments: 参数字典，必须包含'image_path'键\n            \n        Returns:\n            识别结果\n        \"\"\"\n        # 参数验证\n        if \"image_path\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少必要参数 'image_path'\"\n            )]\n        \n        # 获取参数\n        image_path = arguments[\"image_path\"]\n        prompt = arguments.get(\"prompt\", \"请详细描述这张图片的内容\")\n        base_url = arguments.get(\"base_url\", os.environ.get(\"LLM_API_BASE_URL\", \"api.openai.com\"))\n        api_key = arguments.get(\"api_key\", os.environ.get(\"LLM_API_KEY\", \"\"))\n        model = arguments.get(\"model\", os.environ.get(\"LLM_MODEL\", \"gpt-4o\"))\n        \n        # 验证API密钥\n        if not api_key:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少API密钥，请通过参数提供或设置环境变量 LLM_API_KEY\"\n            )]\n        \n        # 创建图像识别器\n        recognizer = ImageRecognizer(base_url, api_key, model)\n        \n        # 执行图像识别\n        result = await recognizer.recognize_image(image_path, prompt)\n        \n        if result:\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"# 图像识别结果\\n\\n{result}\"\n            )]\n        else:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"图像识别失败，请检查图片路径和API配置。\"\n            )] "
        }
    ]
}