Doubao Image Description MCP Server

server.py•16.1 KiB

#!/usr/bin/env python3 """ 豆包图片描述 MCP 服务器使用豆包大模型的视觉理解能力来描述图片。 """ import os import base64 import logging import json import hashlib import time import io from typing import Optional from pathlib import Path # 配置日志记录到 stderr (避免干扰 stdio 传输) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', stream=__import__('sys').stderr ) logger = logging.getLogger(__name__) try: from PIL import Image PIL_AVAILABLE = True logger.info("PIL/Pillow 已安装，图片优化功能可用") except ImportError: PIL_AVAILABLE = False logger.warning("PIL/Pillow 未安装，图片优化功能不可用。安装: pip install Pillow") # 从环境变量获取 API Key # 重要：请替换为你自己的豆包 API Key！ # 获取方式：https://console.volcengine.com/ API_KEY = os.environ.get("VOLCENGINE_API_KEY", "YOUR_DOUBAO_API_KEY_HERE") MODEL_ID = os.environ.get("DOUBAO_MODEL_ID", "doubao-seed-1-6-251015") try: from mcp.server.fastmcp import FastMCP from volcenginesdkarkruntime import Ark import httpx # 初始化 MCP 服务器 mcp = FastMCP("doubao-image-describer") # 支持的图片格式 SUPPORTED_IMAGE_FORMATS = { '.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.tif', '.ico', '.svg', '.heic', '.heif', '.raw', '.cr2', '.nef', '.arw', '.dng' } # 初始化豆包客户端 try: client = Ark(api_key=API_KEY) logger.info("豆包客户端初始化成功") except Exception as e: logger.error(f"豆包客户端初始化失败: {e}") client = None except ImportError as e: logger.error(f"导入依赖失败: {e}") logger.error("请运行: pip install mcp[cli] volcengine-python-sdk[ark] httpx") raise def check_image_format(file_path_or_url: str, is_url: bool = False) -> tuple[bool, str]: """检查文件扩展名是否为支持的图片格式 Args: file_path_or_url: 文件路径或 URL is_url: 是否为 URL（URL 如果没有扩展名也会通过） Returns: (is_valid, error_message): 是否有效和错误信息 """ # 提取文件扩展名 # 处理 URL 查询参数（如 image.jpg?width=800） path_part = file_path_or_url.split('?')[0].split('#')[0] ext = Path(path_part).suffix.lower() # 对于 URL，如果没有扩展名，允许通过（某些图片 URL 不带扩展名） if is_url and not ext: return True, "" if not ext: return False, f"错误: 无法识别文件扩展名。请提供有效的图片文件路径" if ext not in SUPPORTED_IMAGE_FORMATS: supported_list = ', '.join(sorted(SUPPORTED_IMAGE_FORMATS)) return False, ( f"错误: 不支持的文件格式 '{ext}'\n" f"支持的图片格式: {supported_list}\n" f"请确保输入的是图片文件（如 .jpg, .png, .gif 等）" ) return True, "" # 图片描述缓存 class ImageDescriptionCache: """图片描述缓存类""" def __init__(self, cache_dir: str = None): if cache_dir is None: cache_dir = os.path.expanduser("~/.iflow/cache/doubao-image-mcp") self.cache_dir = Path(cache_dir) self.cache_dir.mkdir(parents=True, exist_ok=True) logger.info(f"缓存目录: {self.cache_dir}") def _get_file_hash(self, file_path: str) -> str: """获取文件内容哈希（用于检测文件变化）""" try: with open(file_path, 'rb') as f: return hashlib.md5(f.read()).hexdigest()[:8] except Exception: return hashlib.md5(str(file_path).encode()).hexdigest()[:8] def get_cache_key(self, image_path: str, model: str, prompt: str) -> str: """生成缓存键""" file_hash = self._get_file_hash(image_path) key_str = f"{model}_{prompt}_{file_hash}" return hashlib.md5(key_str.encode()).hexdigest() def get(self, image_path: str, model: str, prompt: str) -> str | None: """获取缓存结果""" key = self.get_cache_key(image_path, model, prompt) cache_file = self.cache_dir / f"{key}.json" if cache_file.exists(): try: with open(cache_file, 'r', encoding='utf-8') as f: data = json.load(f) logger.info(f"缓存命中: {image_path}") return data.get('description') except Exception as e: logger.warning(f"读取缓存失败: {e}") return None def set(self, image_path: str, model: str, prompt: str, description: str): """保存结果到缓存""" key = self.get_cache_key(image_path, model, prompt) cache_file = self.cache_dir / f"{key}.json" try: with open(cache_file, 'w', encoding='utf-8') as f: json.dump({ 'description': description, 'timestamp': time.time(), 'image_path': image_path, 'model': model, 'prompt': prompt }, f, ensure_ascii=False, indent=2) logger.info(f"结果已缓存: {image_path}") except Exception as e: logger.warning(f"保存缓存失败: {e}") # 全局缓存实例 cache = ImageDescriptionCache() MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB def check_file_size(file_path: str) -> tuple[bool, str]: """检查文件大小是否在限制内 Args: file_path: 文件路径 Returns: (is_valid, error_message): 是否有效和错误信息 """ try: size = Path(file_path).stat().st_size size_mb = size / (1024 * 1024) if size > MAX_FILE_SIZE: return False, ( f"错误: 文件过大 ({size_mb:.1f}MB)\n" f"最大支持 {MAX_FILE_SIZE / (1024 * 1024):.0f}MB\n" f"建议: 压缩图片或使用较小的文件" ) if size == 0: return False, "错误: 文件为空" return True, "" except Exception as e: return False, f"错误: 无法检查文件大小: {e}" # 图片优化配置 OPTIMIZE_SIZE = (1920, 1080) # 最大尺寸 OPTIMIZE_QUALITY = 85 # JPEG 质量 OPTIMIZE_MAX_SIZE_MB = 2 # 超过 2MB 自动优化 def optimize_image(image_path: str) -> tuple[bytes, str]: """优化图片：调整大小、转换格式、压缩 Args: image_path: 图片文件路径 Returns: (optimized_bytes, status_message): 优化后的图片数据和状态信息 """ if not PIL_AVAILABLE: # PIL 未安装，直接返回原始数据 with open(image_path, 'rb') as f: return f.read(), "未优化（PIL未安装）" try: start_time = time.time() with Image.open(image_path) as img: original_size = len(img.tobytes()) original_mb = original_size / (1024 * 1024) # 转换为 RGB（如果是 RGBA/P） if img.mode != 'RGB': img = img.convert('RGB') # 调整大小（如果超过限制） if img.size[0] > OPTIMIZE_SIZE[0] or img.size[1] > OPTIMIZE_SIZE[1]: img.thumbnail(OPTIMIZE_SIZE, Image.LANCZOS) logger.info(f"图片已调整: {img.size}") # 压缩为 JPEG buffer = io.BytesIO() img.save(buffer, format='JPEG', quality=OPTIMIZE_QUALITY, optimize=True) optimized = buffer.getvalue() optimized_size = len(optimized) optimized_mb = optimized_size / (1024 * 1024) compress_time = time.time() - start_time reduction = (1 - optimized_size / original_size) * 100 status = ( f"优化完成: {original_mb:.1f}MB → {optimized_mb:.1f}MB " f"({reduction:.0f}% 压缩), 耗时 {compress_time:.2f}秒" ) logger.info(status) return optimized, status except Exception as e: logger.warning(f"图片优化失败: {e}，使用原始图片") with open(image_path, 'rb') as f: return f.read(), f"优化失败: {e}" def encode_image_to_base64(image_path: str) -> str: """将本地图片文件编码为 Base64（自动优化大图片）""" try: # 检查文件大小 file_size = Path(image_path).stat().st_size file_size_mb = file_size / (1024 * 1024) # 决定是否需要优化 if file_size_mb > OPTIMIZE_MAX_SIZE_MB: logger.info(f"文件较大 ({file_size_mb:.1f}MB)，启用自动优化...") image_data, status = optimize_image(image_path) logger.info(status) else: with open(image_path, 'rb') as image_file: image_data = image_file.read() logger.info(f"文件大小适中 ({file_size_mb:.1f}MB)，无需优化") # 编码为 Base64 encoded = base64.b64encode(image_data).decode('utf-8') logger.info(f"成功编码图片: {image_path} (Base64: {len(encoded):,} 字符)") return encoded except FileNotFoundError: raise ValueError(f"文件不存在: {image_path}") except Exception as e: raise ValueError(f"编码图片失败: {str(e)}") def call_doubao_vision_api(image_input: dict, prompt: str = "请详细描述这张图片的内容，包括主要物体、场景、颜色、布局等细节。") -> str: """调用豆包视觉理解 API""" if not client: return "错误: 豆包客户端未正确初始化，请检查 API Key 配置" try: content = [ {"type": "text", "text": prompt}, image_input ] logger.info(f"调用豆包 API，模型: {MODEL_ID}") response = client.chat.completions.create( model=MODEL_ID, messages=[{"role": "user", "content": content}], ) if response.choices and len(response.choices) > 0: result = response.choices[0].message.content logger.info("豆包 API 调用成功") return result else: return "错误: 未收到有效的响应" except Exception as e: error_msg = f"调用豆包 API 失败: {str(e)}" logger.error(error_msg) return error_msg @mcp.tool() async def describe_image_from_file(file_path: str) -> str: """从本地文件路径描述图片""" try: path = Path(file_path) # 检查文件是否存在 if not path.exists(): return f"错误: 文件不存在: {file_path}" # 检查是否为文件 if not path.is_file(): return f"错误: 路径不是文件: {file_path}" # 检查图片格式 is_valid, error_msg = check_image_format(file_path) if not is_valid: logger.warning(f"图片格式检查失败: {file_path}") return error_msg # 编码图片 base64_data = encode_image_to_base64(file_path) # 构建图片输入 image_input = { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_data}"} } # 调用 API result = call_doubao_vision_api(image_input) return result except Exception as e: error_msg = f"处理图片失败: {str(e)}" logger.error(error_msg) return error_msg @mcp.tool() async def describe_image_from_base64(base64_data: str, prompt: str = "请详细描述这张图片的内容") -> str: """从 Base64 编码描述图片""" try: # 清理 base64 数据 clean_base64 = base64_data.strip() if clean_base64.startswith('data:image'): clean_base64 = clean_base64.split(',', 1)[1] if ',' in clean_base64 else clean_base64 # 验证 base64 格式 try: base64.b64decode(clean_base64, validate=True) except Exception: return "错误: 无效的 Base64 编码" # 构建图片输入 image_input = { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{clean_base64}"} } # 调用 API result = call_doubao_vision_api(image_input, prompt) return result except Exception as e: error_msg = f"处理 Base64 图片失败: {str(e)}" logger.error(error_msg) return error_msg @mcp.tool() async def describe_image_from_url(url: str, prompt: str = "请详细描述这张图片的内容") -> str: """从网络 URL 描述图片""" try: # 检查 URL 格式 if not url.startswith(('http://', 'https://')): return "错误: URL 必须以 http:// 或 https:// 开头" # 检查图片格式（从 URL 中提取扩展名） is_valid, error_msg = check_image_format(url, is_url=True) if not is_valid: logger.warning(f"图片格式检查失败: {url}") return error_msg # 直接使用 URL image_input = { "type": "image_url", "image_url": {"url": url} } # 调用 API result = call_doubao_vision_api(image_input, prompt) return result except Exception as e: error_msg = f"处理 URL 图片失败: {str(e)}" logger.error(error_msg) return error_msg @mcp.tool() async def describe(image_input: str, prompt: str = "请详细描述这张图片的内容") -> str: """智能描述图片（自动识别 URL 或本地文件路径）这是推荐的简化接口，会自动判断输入类型并调用相应的方法。 Args: image_input: 图片输入，可以是： - 网络 URL（以 http:// 或 https:// 开头） - 本地文件路径（如 D:\\download\\photo.jpg） prompt: 描述提示词（可选，默认为详细描述） Returns: 豆包视觉理解模型返回的图片描述 Examples: 描述网络图片: describe("https://example.com/image.jpg") 描述本地文件: describe("D:\\\\download\\\\photo.jpg") describe("D:/download/photo.jpg") 使用自定义提示词: describe("image.jpg", "请描述图片中的颜色和构图") """ try: # 步骤 1: 自动检测输入类型 is_url = image_input.startswith(('http://', 'https://')) # 步骤 2: 检查图片格式（URL 和文件路径有不同的检查规则） is_valid, error_msg = check_image_format(image_input, is_url=is_url) if not is_valid: logger.warning(f"图片格式检查失败: {image_input}") return error_msg # 步骤 3: 检查缓存（仅对本地文件） if not is_url: cached_result = cache.get(image_input, MODEL_ID, prompt) if cached_result: logger.info(f"使用缓存结果: {image_input}") return f"[缓存] {cached_result}" # 检查文件大小 size_valid, size_error = check_file_size(image_input) if not size_valid: logger.warning(f"文件大小检查失败: {image_input}") return size_error # 步骤 4: 根据类型调用相应方法 if is_url: # 网络 URL logger.info(f"检测到网络 URL，使用 describe_image_from_url") result = await describe_image_from_url(image_input, prompt) else: # 本地文件路径 logger.info(f"检测到本地文件路径，使用 describe_image_from_file") result = await describe_image_from_file(image_input) # 保存到缓存 if result and "错误" not in result and "失败" not in result: cache.set(image_input, MODEL_ID, prompt, result) return result except Exception as e: error_msg = f"处理图片失败: {str(e)}" logger.error(error_msg) return error_msg def main(): """启动 MCP 服务器""" logger.info("启动豆包图片描述 MCP 服务器") logger.info(f"API Key: {API_KEY[:20]}...") logger.info(f"Model ID: {MODEL_ID}") mcp.run(transport="stdio") if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/9527wow/doubao-image-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•16.1 KiB