MCP Development Framework

{ "sourceFile": "mcp_tool/tools/image_recognition.py", "activeCommit": 0, "commits": [ { "activePatchIndex": 4, "patches": [ { "date": 1741521023660, "content": "Index: \n===================================================================\n--- \n+++ \n" }, { "date": 1741521301820, "content": "Index: \n===================================================================\n--- \n+++ \n@@ -13,8 +13,14 @@\n # 配置日志\n logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')\n logger = logging.getLogger(__name__)\n \n+# 路径映射配置\n+# 本地路径到API服务路径的映射\n+PATH_MAPPING = {\n+ '/host_images': os.environ.get('API_IMAGE_PATH', '/host_images')\n+}\n+\n class ImageRecognitionClient:\n \"\"\"\n 图像识别客户端,用于调用外部大模型API进行图像识别\n \"\"\"\n@@ -59,8 +65,11 @@\n if not self.api_key:\n logger.error(\"API密钥未设置,无法进行图像识别\")\n return None\n \n+ # 映射路径,处理不同容器间的路径差异\n+ mapped_path = self._map_path(image_path)\n+ \n if not os.path.exists(image_path):\n logger.error(f\"图像文件不存在: {image_path}\")\n return None\n \n@@ -80,9 +89,9 @@\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n- \"url\": self._get_image_url(image_path)\n+ \"url\": self._get_image_url(mapped_path)\n }\n }\n ]\n }\n@@ -128,8 +137,32 @@\n except Exception as e:\n logger.error(f\"图像识别过程中发生错误: {str(e)}\")\n return None\n \n+ def _map_path(self, image_path: str) -> str:\n+ \"\"\"\n+ 映射路径,处理不同容器间的路径差异\n+ \n+ Args:\n+ image_path: 原始图像路径\n+ \n+ Returns:\n+ 映射后的路径\n+ \"\"\"\n+ # 如果是URL,不需要映射\n+ if image_path.startswith(('http://', 'https://')):\n+ return image_path\n+ \n+ # 检查是否需要路径映射\n+ for local_path, api_path in PATH_MAPPING.items():\n+ if image_path.startswith(local_path):\n+ mapped_path = image_path.replace(local_path, api_path)\n+ logger.info(f\"路径映射: {image_path} -> {mapped_path}\")\n+ return mapped_path\n+ \n+ # 如果没有匹配的映射,返回原路径\n+ return image_path\n+ \n def _get_image_url(self, image_path: str) -> str:\n \"\"\"\n 获取图像URL,如果是本地文件则转换为base64编码\n \n" }, { "date": 1741522010479, "content": "Index: \n===================================================================\n--- \n+++ \n@@ -6,8 +6,10 @@\n import http.client\n import json\n import base64\n import logging\n+import ssl\n+import time\n from typing import Optional, Dict, Any, Union\n from urllib.parse import urlparse\n \n # 配置日志\n@@ -19,36 +21,53 @@\n PATH_MAPPING = {\n '/host_images': os.environ.get('API_IMAGE_PATH', '/host_images')\n }\n \n+# 超时设置\n+DEFAULT_TIMEOUT = int(os.environ.get('LLM_API_TIMEOUT', '60')) # 默认60秒超时\n+MAX_RETRIES = int(os.environ.get('LLM_API_MAX_RETRIES', '2')) # 默认最多重试2次\n+\n class ImageRecognitionClient:\n \"\"\"\n 图像识别客户端,用于调用外部大模型API进行图像识别\n \"\"\"\n \n def __init__(self, \n base_url: str = None, \n api_key: str = None, \n- model: str = None):\n+ model: str = None,\n+ timeout: int = None,\n+ max_retries: int = None):\n \"\"\"\n 初始化图像识别客户端\n \n Args:\n base_url: API基础URL,如果为None则从环境变量获取\n api_key: API密钥,如果为None则从环境变量获取\n model: 使用的模型名称,如果为None则从环境变量获取\n+ timeout: 请求超时时间(秒),如果为None则从环境变量获取\n+ max_retries: 最大重试次数,如果为None则从环境变量获取\n \"\"\"\n # 从环境变量或参数获取配置\n self.base_url = base_url or os.environ.get('LLM_API_BASE_URL', 'api.openai.com')\n self.api_key = api_key or os.environ.get('LLM_API_KEY', '')\n self.model = model or os.environ.get('LLM_MODEL', 'gpt-4o')\n self.max_tokens = int(os.environ.get('LLM_MAX_TOKENS', '400'))\n+ self.timeout = timeout or DEFAULT_TIMEOUT\n+ self.max_retries = max_retries or MAX_RETRIES\n \n # 解析base_url,分离主机名和路径\n- parsed_url = urlparse(f\"https://{self.base_url}\" if not self.base_url.startswith(('http://', 'https://')) else self.base_url)\n+ parsed_url = urlparse(self.base_url)\n+ # 如果没有协议,默认使用https\n+ if not parsed_url.scheme:\n+ parsed_url = urlparse(f\"https://{self.base_url}\")\n+ \n self.host = parsed_url.netloc\n self.base_path = parsed_url.path.rstrip('/')\n+ self.use_https = parsed_url.scheme == 'https'\n \n+ logger.info(f\"API配置: 主机={self.host}, 路径={self.base_path}, HTTPS={self.use_https}, 超时={self.timeout}秒, 最大重试次数={self.max_retries}\")\n+ \n # 验证配置\n if not self.api_key:\n logger.warning(\"API密钥未设置,图像识别功能将不可用\")\n \n@@ -72,72 +91,114 @@\n if not os.path.exists(image_path):\n logger.error(f\"图像文件不存在: {image_path}\")\n return None\n \n- try:\n- # 检查是否是图像文件\n- if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):\n- logger.warning(f\"文件可能不是图像: {image_path}\")\n- \n- # 准备请求数据\n- messages = [\n- {\n- \"role\": \"user\",\n- \"content\": [\n- {\n- \"type\": \"text\",\n- \"text\": \"请详细描述这张图片中的内容,包括图片中的文字、图形、颜色等元素。\"\n- },\n- {\n- \"type\": \"image_url\",\n- \"image_url\": {\n- \"url\": self._get_image_url(mapped_path)\n+ # 重试机制\n+ retries = 0\n+ last_error = None\n+ \n+ while retries <= self.max_retries:\n+ try:\n+ # 检查是否是图像文件\n+ if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):\n+ logger.warning(f\"文件可能不是图像: {image_path}\")\n+ \n+ # 准备请求数据\n+ messages = [\n+ {\n+ \"role\": \"user\",\n+ \"content\": [\n+ {\n+ \"type\": \"text\",\n+ \"text\": \"请详细描述这张图片中的内容,包括图片中的文字、图形、颜色等元素。\"\n+ },\n+ {\n+ \"type\": \"image_url\",\n+ \"image_url\": {\n+ \"url\": self._get_image_url(mapped_path)\n+ }\n }\n- }\n- ]\n+ ]\n+ }\n+ ]\n+ \n+ payload = json.dumps({\n+ \"model\": self.model,\n+ \"stream\": False,\n+ \"messages\": messages,\n+ \"max_tokens\": self.max_tokens\n+ })\n+ \n+ headers = {\n+ 'Accept': 'application/json',\n+ 'Authorization': f'Bearer {self.api_key}',\n+ 'Content-Type': 'application/json'\n }\n- ]\n- \n- payload = json.dumps({\n- \"model\": self.model,\n- \"stream\": False,\n- \"messages\": messages,\n- \"max_tokens\": self.max_tokens\n- })\n- \n- headers = {\n- 'Accept': 'application/json',\n- 'Authorization': f'Bearer {self.api_key}',\n- 'Content-Type': 'application/json'\n- }\n- \n- # 建立连接并发送请求\n- conn = http.client.HTTPSConnection(self.host)\n- conn.request(\"POST\", f\"{self.base_path}/v1/chat/completions\", payload, headers)\n- \n- # 获取响应\n- response = conn.getresponse()\n- data = response.read().decode(\"utf-8\")\n- conn.close()\n- \n- # 解析响应\n- response_data = json.loads(data)\n- \n- if 'error' in response_data:\n- logger.error(f\"API错误: {response_data['error']['message']}\")\n+ \n+ # 建立连接并发送请求\n+ if self.use_https:\n+ # 创建SSL上下文\n+ context = ssl.create_default_context()\n+ conn = http.client.HTTPSConnection(self.host, context=context, timeout=self.timeout)\n+ else:\n+ conn = http.client.HTTPConnection(self.host, timeout=self.timeout)\n+ \n+ logger.info(f\"发送请求到: {self.host}{self.base_path}/chat/completions (尝试 {retries+1}/{self.max_retries+1})\")\n+ conn.request(\"POST\", f\"{self.base_path}/chat/completions\", payload, headers)\n+ \n+ # 获取响应\n+ response = conn.getresponse()\n+ data = response.read().decode(\"utf-8\")\n+ conn.close()\n+ \n+ logger.info(f\"API响应状态码: {response.status}\")\n+ \n+ # 解析响应\n+ response_data = json.loads(data)\n+ \n+ if 'error' in response_data:\n+ error_msg = response_data['error']['message'] if 'message' in response_data['error'] else response_data['error']\n+ logger.error(f\"API错误: {error_msg}\")\n+ last_error = error_msg\n+ retries += 1\n+ if retries <= self.max_retries:\n+ # 指数退避重试\n+ wait_time = 2 ** retries\n+ logger.info(f\"等待 {wait_time} 秒后重试...\")\n+ time.sleep(wait_time)\n+ continue\n+ return None\n+ \n+ # 提取识别结果\n+ if 'choices' in response_data and len(response_data['choices']) > 0:\n+ result = response_data['choices'][0]['message']['content']\n+ return result\n+ else:\n+ logger.error(f\"API响应中未找到识别结果: {response_data}\")\n+ last_error = \"API响应中未找到识别结果\"\n+ retries += 1\n+ if retries <= self.max_retries:\n+ wait_time = 2 ** retries\n+ logger.info(f\"等待 {wait_time} 秒后重试...\")\n+ time.sleep(wait_time)\n+ continue\n+ return None\n+ \n+ except Exception as e:\n+ logger.error(f\"图像识别过程中发生错误: {str(e)}\")\n+ import traceback\n+ logger.error(traceback.format_exc())\n+ last_error = str(e)\n+ retries += 1\n+ if retries <= self.max_retries:\n+ wait_time = 2 ** retries\n+ logger.info(f\"等待 {wait_time} 秒后重试...\")\n+ time.sleep(wait_time)\n+ continue\n return None\n- \n- # 提取识别结果\n- if 'choices' in response_data and len(response_data['choices']) > 0:\n- result = response_data['choices'][0]['message']['content']\n- return result\n- else:\n- logger.error(\"API响应中未找到识别结果\")\n- return None\n- \n- except Exception as e:\n- logger.error(f\"图像识别过程中发生错误: {str(e)}\")\n- return None\n+ \n+ logger.error(f\"图像识别失败,已达到最大重试次数 ({self.max_retries})。最后错误: {last_error}\")\n+ return None\n \n def _map_path(self, image_path: str) -> str:\n \"\"\"\n 映射路径,处理不同容器间的路径差异\n" }, { "date": 1741522919675, "content": "Index: \n===================================================================\n--- \n+++ \n@@ -25,8 +25,14 @@\n # 超时设置\n DEFAULT_TIMEOUT = int(os.environ.get('LLM_API_TIMEOUT', '60')) # 默认60秒超时\n MAX_RETRIES = int(os.environ.get('LLM_API_MAX_RETRIES', '2')) # 默认最多重试2次\n \n+# 打印当前环境变量值,用于调试\n+logger.info(f\"环境变量中的超时设置: LLM_API_TIMEOUT={os.environ.get('LLM_API_TIMEOUT', '未设置')}\")\n+logger.info(f\"环境变量中的重试设置: LLM_API_MAX_RETRIES={os.environ.get('LLM_API_MAX_RETRIES', '未设置')}\")\n+logger.info(f\"实际使用的超时设置: DEFAULT_TIMEOUT={DEFAULT_TIMEOUT}\")\n+logger.info(f\"实际使用的重试设置: MAX_RETRIES={MAX_RETRIES}\")\n+\n class ImageRecognitionClient:\n \"\"\"\n 图像识别客户端,用于调用外部大模型API进行图像识别\n \"\"\"\n@@ -51,11 +57,39 @@\n self.base_url = base_url or os.environ.get('LLM_API_BASE_URL', 'api.openai.com')\n self.api_key = api_key or os.environ.get('LLM_API_KEY', '')\n self.model = model or os.environ.get('LLM_MODEL', 'gpt-4o')\n self.max_tokens = int(os.environ.get('LLM_MAX_TOKENS', '400'))\n- self.timeout = timeout or DEFAULT_TIMEOUT\n- self.max_retries = max_retries or MAX_RETRIES\n \n+ # 确保正确读取超时和重试设置\n+ timeout_str = os.environ.get('LLM_API_TIMEOUT')\n+ max_retries_str = os.environ.get('LLM_API_MAX_RETRIES')\n+ \n+ # 如果参数提供了值,优先使用参数值\n+ if timeout is not None:\n+ self.timeout = timeout\n+ # 否则尝试从环境变量获取\n+ elif timeout_str is not None:\n+ try:\n+ self.timeout = int(timeout_str)\n+ except ValueError:\n+ logger.warning(f\"环境变量LLM_API_TIMEOUT值无效: {timeout_str},使用默认值: {DEFAULT_TIMEOUT}\")\n+ self.timeout = DEFAULT_TIMEOUT\n+ # 如果都没有,使用默认值\n+ else:\n+ self.timeout = DEFAULT_TIMEOUT\n+ \n+ # 同样处理重试次数\n+ if max_retries is not None:\n+ self.max_retries = max_retries\n+ elif max_retries_str is not None:\n+ try:\n+ self.max_retries = int(max_retries_str)\n+ except ValueError:\n+ logger.warning(f\"环境变量LLM_API_MAX_RETRIES值无效: {max_retries_str},使用默认值: {MAX_RETRIES}\")\n+ self.max_retries = MAX_RETRIES\n+ else:\n+ self.max_retries = MAX_RETRIES\n+ \n # 解析base_url,分离主机名和路径\n parsed_url = urlparse(self.base_url)\n # 如果没有协议,默认使用https\n if not parsed_url.scheme:\n" }, { "date": 1741523194318, "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,321 +1,1 @@\n-\"\"\"\n-图像识别内部接口模块,用于调用外部大模型进行图像识别\n-\"\"\"\n-\n-import os\n-import http.client\n-import json\n-import base64\n-import logging\n-import ssl\n-import time\n-from typing import Optional, Dict, Any, Union\n-from urllib.parse import urlparse\n-\n-# 配置日志\n-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')\n-logger = logging.getLogger(__name__)\n-\n-# 路径映射配置\n-# 本地路径到API服务路径的映射\n-PATH_MAPPING = {\n- '/host_images': os.environ.get('API_IMAGE_PATH', '/host_images')\n-}\n-\n-# 超时设置\n-DEFAULT_TIMEOUT = int(os.environ.get('LLM_API_TIMEOUT', '60')) # 默认60秒超时\n-MAX_RETRIES = int(os.environ.get('LLM_API_MAX_RETRIES', '2')) # 默认最多重试2次\n-\n-# 打印当前环境变量值,用于调试\n-logger.info(f\"环境变量中的超时设置: LLM_API_TIMEOUT={os.environ.get('LLM_API_TIMEOUT', '未设置')}\")\n-logger.info(f\"环境变量中的重试设置: LLM_API_MAX_RETRIES={os.environ.get('LLM_API_MAX_RETRIES', '未设置')}\")\n-logger.info(f\"实际使用的超时设置: DEFAULT_TIMEOUT={DEFAULT_TIMEOUT}\")\n-logger.info(f\"实际使用的重试设置: MAX_RETRIES={MAX_RETRIES}\")\n-\n-class ImageRecognitionClient:\n- \"\"\"\n- 图像识别客户端,用于调用外部大模型API进行图像识别\n- \"\"\"\n- \n- def __init__(self, \n- base_url: str = None, \n- api_key: str = None, \n- model: str = None,\n- timeout: int = None,\n- max_retries: int = None):\n- \"\"\"\n- 初始化图像识别客户端\n- \n- Args:\n- base_url: API基础URL,如果为None则从环境变量获取\n- api_key: API密钥,如果为None则从环境变量获取\n- model: 使用的模型名称,如果为None则从环境变量获取\n- timeout: 请求超时时间(秒),如果为None则从环境变量获取\n- max_retries: 最大重试次数,如果为None则从环境变量获取\n- \"\"\"\n- # 从环境变量或参数获取配置\n- self.base_url = base_url or os.environ.get('LLM_API_BASE_URL', 'api.openai.com')\n- self.api_key = api_key or os.environ.get('LLM_API_KEY', '')\n- self.model = model or os.environ.get('LLM_MODEL', 'gpt-4o')\n- self.max_tokens = int(os.environ.get('LLM_MAX_TOKENS', '400'))\n- \n- # 确保正确读取超时和重试设置\n- timeout_str = os.environ.get('LLM_API_TIMEOUT')\n- max_retries_str = os.environ.get('LLM_API_MAX_RETRIES')\n- \n- # 如果参数提供了值,优先使用参数值\n- if timeout is not None:\n- self.timeout = timeout\n- # 否则尝试从环境变量获取\n- elif timeout_str is not None:\n- try:\n- self.timeout = int(timeout_str)\n- except ValueError:\n- logger.warning(f\"环境变量LLM_API_TIMEOUT值无效: {timeout_str},使用默认值: {DEFAULT_TIMEOUT}\")\n- self.timeout = DEFAULT_TIMEOUT\n- # 如果都没有,使用默认值\n- else:\n- self.timeout = DEFAULT_TIMEOUT\n- \n- # 同样处理重试次数\n- if max_retries is not None:\n- self.max_retries = max_retries\n- elif max_retries_str is not None:\n- try:\n- self.max_retries = int(max_retries_str)\n- except ValueError:\n- logger.warning(f\"环境变量LLM_API_MAX_RETRIES值无效: {max_retries_str},使用默认值: {MAX_RETRIES}\")\n- self.max_retries = MAX_RETRIES\n- else:\n- self.max_retries = MAX_RETRIES\n- \n- # 解析base_url,分离主机名和路径\n- parsed_url = urlparse(self.base_url)\n- # 如果没有协议,默认使用https\n- if not parsed_url.scheme:\n- parsed_url = urlparse(f\"https://{self.base_url}\")\n- \n- self.host = parsed_url.netloc\n- self.base_path = parsed_url.path.rstrip('/')\n- self.use_https = parsed_url.scheme == 'https'\n- \n- logger.info(f\"API配置: 主机={self.host}, 路径={self.base_path}, HTTPS={self.use_https}, 超时={self.timeout}秒, 最大重试次数={self.max_retries}\")\n- \n- # 验证配置\n- if not self.api_key:\n- logger.warning(\"API密钥未设置,图像识别功能将不可用\")\n- \n- def recognize_image(self, image_path: str) -> Optional[str]:\n- \"\"\"\n- 识别图像内容\n- \n- Args:\n- image_path: 图像文件路径\n- \n- Returns:\n- 识别结果文本,如果识别失败则返回None\n- \"\"\"\n- if not self.api_key:\n- logger.error(\"API密钥未设置,无法进行图像识别\")\n- return None\n- \n- # 映射路径,处理不同容器间的路径差异\n- mapped_path = self._map_path(image_path)\n- \n- if not os.path.exists(image_path):\n- logger.error(f\"图像文件不存在: {image_path}\")\n- return None\n- \n- # 重试机制\n- retries = 0\n- last_error = None\n- \n- while retries <= self.max_retries:\n- try:\n- # 检查是否是图像文件\n- if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):\n- logger.warning(f\"文件可能不是图像: {image_path}\")\n- \n- # 准备请求数据\n- messages = [\n- {\n- \"role\": \"user\",\n- \"content\": [\n- {\n- \"type\": \"text\",\n- \"text\": \"请详细描述这张图片中的内容,包括图片中的文字、图形、颜色等元素。\"\n- },\n- {\n- \"type\": \"image_url\",\n- \"image_url\": {\n- \"url\": self._get_image_url(mapped_path)\n- }\n- }\n- ]\n- }\n- ]\n- \n- payload = json.dumps({\n- \"model\": self.model,\n- \"stream\": False,\n- \"messages\": messages,\n- \"max_tokens\": self.max_tokens\n- })\n- \n- headers = {\n- 'Accept': 'application/json',\n- 'Authorization': f'Bearer {self.api_key}',\n- 'Content-Type': 'application/json'\n- }\n- \n- # 建立连接并发送请求\n- if self.use_https:\n- # 创建SSL上下文\n- context = ssl.create_default_context()\n- conn = http.client.HTTPSConnection(self.host, context=context, timeout=self.timeout)\n- else:\n- conn = http.client.HTTPConnection(self.host, timeout=self.timeout)\n- \n- logger.info(f\"发送请求到: {self.host}{self.base_path}/chat/completions (尝试 {retries+1}/{self.max_retries+1})\")\n- conn.request(\"POST\", f\"{self.base_path}/chat/completions\", payload, headers)\n- \n- # 获取响应\n- response = conn.getresponse()\n- data = response.read().decode(\"utf-8\")\n- conn.close()\n- \n- logger.info(f\"API响应状态码: {response.status}\")\n- \n- # 解析响应\n- response_data = json.loads(data)\n- \n- if 'error' in response_data:\n- error_msg = response_data['error']['message'] if 'message' in response_data['error'] else response_data['error']\n- logger.error(f\"API错误: {error_msg}\")\n- last_error = error_msg\n- retries += 1\n- if retries <= self.max_retries:\n- # 指数退避重试\n- wait_time = 2 ** retries\n- logger.info(f\"等待 {wait_time} 秒后重试...\")\n- time.sleep(wait_time)\n- continue\n- return None\n- \n- # 提取识别结果\n- if 'choices' in response_data and len(response_data['choices']) > 0:\n- result = response_data['choices'][0]['message']['content']\n- return result\n- else:\n- logger.error(f\"API响应中未找到识别结果: {response_data}\")\n- last_error = \"API响应中未找到识别结果\"\n- retries += 1\n- if retries <= self.max_retries:\n- wait_time = 2 ** retries\n- logger.info(f\"等待 {wait_time} 秒后重试...\")\n- time.sleep(wait_time)\n- continue\n- return None\n- \n- except Exception as e:\n- logger.error(f\"图像识别过程中发生错误: {str(e)}\")\n- import traceback\n- logger.error(traceback.format_exc())\n- last_error = str(e)\n- retries += 1\n- if retries <= self.max_retries:\n- wait_time = 2 ** retries\n- logger.info(f\"等待 {wait_time} 秒后重试...\")\n- time.sleep(wait_time)\n- continue\n- return None\n- \n- logger.error(f\"图像识别失败,已达到最大重试次数 ({self.max_retries})。最后错误: {last_error}\")\n- return None\n- \n- def _map_path(self, image_path: str) -> str:\n- \"\"\"\n- 映射路径,处理不同容器间的路径差异\n- \n- Args:\n- image_path: 原始图像路径\n- \n- Returns:\n- 映射后的路径\n- \"\"\"\n- # 如果是URL,不需要映射\n- if image_path.startswith(('http://', 'https://')):\n- return image_path\n- \n- # 检查是否需要路径映射\n- for local_path, api_path in PATH_MAPPING.items():\n- if image_path.startswith(local_path):\n- mapped_path = image_path.replace(local_path, api_path)\n- logger.info(f\"路径映射: {image_path} -> {mapped_path}\")\n- return mapped_path\n- \n- # 如果没有匹配的映射,返回原路径\n- return image_path\n- \n- def _get_image_url(self, image_path: str) -> str:\n- \"\"\"\n- 获取图像URL,如果是本地文件则转换为base64编码\n- \n- Args:\n- image_path: 图像文件路径\n- \n- Returns:\n- 图像URL或base64编码的数据URL\n- \"\"\"\n- # 如果是URL则直接返回\n- if image_path.startswith(('http://', 'https://')):\n- return image_path\n- \n- # 否则将本地文件转换为base64编码的数据URL\n- try:\n- with open(image_path, \"rb\") as image_file:\n- encoded_string = base64.b64encode(image_file.read()).decode('utf-8')\n- \n- # 确定MIME类型\n- mime_type = self._get_mime_type(image_path)\n- return f\"data:{mime_type};base64,{encoded_string}\"\n- except Exception as e:\n- logger.error(f\"图像文件读取或编码失败: {str(e)}\")\n- raise\n- \n- def _get_mime_type(self, file_path: str) -> str:\n- \"\"\"\n- 根据文件扩展名获取MIME类型\n- \n- Args:\n- file_path: 文件路径\n- \n- Returns:\n- MIME类型字符串\n- \"\"\"\n- extension = os.path.splitext(file_path)[1].lower()\n- mime_types = {\n- '.jpg': 'image/jpeg',\n- '.jpeg': 'image/jpeg',\n- '.png': 'image/png',\n- '.gif': 'image/gif',\n- '.bmp': 'image/bmp',\n- '.webp': 'image/webp'\n- }\n- return mime_types.get(extension, 'application/octet-stream')\n-\n-\n-# 创建默认客户端实例,方便直接导入使用\n-default_client = ImageRecognitionClient()\n-\n-def recognize_image(image_path: str) -> Optional[str]:\n- \"\"\"\n- 使用默认客户端识别图像内容\n- \n- Args:\n- image_path: 图像文件路径\n- \n- Returns:\n- 识别结果文本,如果识别失败则返回None\n- \"\"\"\n- return default_client.recognize_image(image_path) \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n" } ], "date": 1741521023660, "name": "Commit-0", "content": "\"\"\"\n图像识别内部接口模块,用于调用外部大模型进行图像识别\n\"\"\"\n\nimport os\nimport http.client\nimport json\nimport base64\nimport logging\nfrom typing import Optional, Dict, Any, Union\nfrom urllib.parse import urlparse\n\n# 配置日志\nlogging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')\nlogger = logging.getLogger(__name__)\n\nclass ImageRecognitionClient:\n \"\"\"\n 图像识别客户端,用于调用外部大模型API进行图像识别\n \"\"\"\n \n def __init__(self, \n base_url: str = None, \n api_key: str = None, \n model: str = None):\n \"\"\"\n 初始化图像识别客户端\n \n Args:\n base_url: API基础URL,如果为None则从环境变量获取\n api_key: API密钥,如果为None则从环境变量获取\n model: 使用的模型名称,如果为None则从环境变量获取\n \"\"\"\n # 从环境变量或参数获取配置\n self.base_url = base_url or os.environ.get('LLM_API_BASE_URL', 'api.openai.com')\n self.api_key = api_key or os.environ.get('LLM_API_KEY', '')\n self.model = model or os.environ.get('LLM_MODEL', 'gpt-4o')\n self.max_tokens = int(os.environ.get('LLM_MAX_TOKENS', '400'))\n \n # 解析base_url,分离主机名和路径\n parsed_url = urlparse(f\"https://{self.base_url}\" if not self.base_url.startswith(('http://', 'https://')) else self.base_url)\n self.host = parsed_url.netloc\n self.base_path = parsed_url.path.rstrip('/')\n \n # 验证配置\n if not self.api_key:\n logger.warning(\"API密钥未设置,图像识别功能将不可用\")\n \n def recognize_image(self, image_path: str) -> Optional[str]:\n \"\"\"\n 识别图像内容\n \n Args:\n image_path: 图像文件路径\n \n Returns:\n 识别结果文本,如果识别失败则返回None\n \"\"\"\n if not self.api_key:\n logger.error(\"API密钥未设置,无法进行图像识别\")\n return None\n \n if not os.path.exists(image_path):\n logger.error(f\"图像文件不存在: {image_path}\")\n return None\n \n try:\n # 检查是否是图像文件\n if not image_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):\n logger.warning(f\"文件可能不是图像: {image_path}\")\n \n # 准备请求数据\n messages = [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"请详细描述这张图片中的内容,包括图片中的文字、图形、颜色等元素。\"\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": self._get_image_url(image_path)\n }\n }\n ]\n }\n ]\n \n payload = json.dumps({\n \"model\": self.model,\n \"stream\": False,\n \"messages\": messages,\n \"max_tokens\": self.max_tokens\n })\n \n headers = {\n 'Accept': 'application/json',\n 'Authorization': f'Bearer {self.api_key}',\n 'Content-Type': 'application/json'\n }\n \n # 建立连接并发送请求\n conn = http.client.HTTPSConnection(self.host)\n conn.request(\"POST\", f\"{self.base_path}/v1/chat/completions\", payload, headers)\n \n # 获取响应\n response = conn.getresponse()\n data = response.read().decode(\"utf-8\")\n conn.close()\n \n # 解析响应\n response_data = json.loads(data)\n \n if 'error' in response_data:\n logger.error(f\"API错误: {response_data['error']['message']}\")\n return None\n \n # 提取识别结果\n if 'choices' in response_data and len(response_data['choices']) > 0:\n result = response_data['choices'][0]['message']['content']\n return result\n else:\n logger.error(\"API响应中未找到识别结果\")\n return None\n \n except Exception as e:\n logger.error(f\"图像识别过程中发生错误: {str(e)}\")\n return None\n \n def _get_image_url(self, image_path: str) -> str:\n \"\"\"\n 获取图像URL,如果是本地文件则转换为base64编码\n \n Args:\n image_path: 图像文件路径\n \n Returns:\n 图像URL或base64编码的数据URL\n \"\"\"\n # 如果是URL则直接返回\n if image_path.startswith(('http://', 'https://')):\n return image_path\n \n # 否则将本地文件转换为base64编码的数据URL\n try:\n with open(image_path, \"rb\") as image_file:\n encoded_string = base64.b64encode(image_file.read()).decode('utf-8')\n \n # 确定MIME类型\n mime_type = self._get_mime_type(image_path)\n return f\"data:{mime_type};base64,{encoded_string}\"\n except Exception as e:\n logger.error(f\"图像文件读取或编码失败: {str(e)}\")\n raise\n \n def _get_mime_type(self, file_path: str) -> str:\n \"\"\"\n 根据文件扩展名获取MIME类型\n \n Args:\n file_path: 文件路径\n \n Returns:\n MIME类型字符串\n \"\"\"\n extension = os.path.splitext(file_path)[1].lower()\n mime_types = {\n '.jpg': 'image/jpeg',\n '.jpeg': 'image/jpeg',\n '.png': 'image/png',\n '.gif': 'image/gif',\n '.bmp': 'image/bmp',\n '.webp': 'image/webp'\n }\n return mime_types.get(extension, 'application/octet-stream')\n\n\n# 创建默认客户端实例,方便直接导入使用\ndefault_client = ImageRecognitionClient()\n\ndef recognize_image(image_path: str) -> Optional[str]:\n \"\"\"\n 使用默认客户端识别图像内容\n \n Args:\n image_path: 图像文件路径\n \n Returns:\n 识别结果文本,如果识别失败则返回None\n \"\"\"\n return default_client.recognize_image(image_path) " } ] }