MCP Development Framework

MIT License
OverviewInspectSchema Related Servers Reviews Score
{
    "sourceFile": "mcp_simple_tool/tools/word_tool.py",
    "activeCommit": 0,
    "commits": [
        {
            "activePatchIndex": 0,
            "patches": [
                {
                    "date": 1741337157526,
                    "content": "Index: \n===================================================================\n--- \n+++ \n"
                }
            ],
            "date": 1741337157526,
            "name": "Commit-0",
            "content": "\"\"\"\nWord文档解析工具，用于解析Word文档内容\n\"\"\"\n\nimport os\nimport traceback\nfrom typing import Dict, List, Any\nimport docx\nimport mcp.types as types\nfrom . import BaseTool, ToolRegistry\n\n@ToolRegistry.register\nclass WordTool(BaseTool):\n    \"\"\"\n    用于解析Word文档的工具，提取文本内容、表格和图片信息\n    \"\"\"\n    \n    name = \"word\"\n    description = \"解析Word文档内容，提取文本、表格和图片信息\"\n    input_schema = {\n        \"type\": \"object\",\n        \"required\": [\"file_path\"],\n        \"properties\": {\n            \"file_path\": {\n                \"type\": \"string\",\n                \"description\": \"Word文档的本地路径，例如'/path/to/document.docx'\",\n            }\n        },\n    }\n    \n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        解析Word文档\n        \n        Args:\n            arguments: 参数字典，必须包含'file_path'键\n            \n        Returns:\n            Word文档内容列表\n        \"\"\"\n        if \"file_path\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少必要参数 'file_path'\"\n            )]\n        \n        return await self._parse_word_document(arguments[\"file_path\"])\n    \n    async def _parse_word_document(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        解析Word文档内容\n        \n        Args:\n            file_path: Word文档路径\n            \n        Returns:\n            Word文档内容列表\n        \"\"\"\n        results = []\n        \n        # 检查文件是否存在\n        if not os.path.exists(file_path):\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 文件不存在: {file_path}\\n请检查路径是否正确，并确保文件可访问。\"\n            )]\n        \n        # 检查文件扩展名\n        if not file_path.lower().endswith(('.docx', '.doc')):\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 不支持的文件格式: {file_path}\\n仅支持.docx和.doc格式的Word文档。\"\n            )]\n        \n        try:\n            # 添加文件信息\n            file_size_mb = os.path.getsize(file_path) / (1024 * 1024)\n            results.append(types.TextContent(\n                type=\"text\",\n                text=f\"# Word文档解析\\n\\n文件大小: {file_size_mb:.2f} MB\"\n            ))\n            \n            # 打开Word文档\n            doc = docx.Document(file_path)\n            \n            # 提取文档属性\n            properties = {}\n            if hasattr(doc.core_properties, 'title') and doc.core_properties.title:\n                properties['标题'] = doc.core_properties.title\n            if hasattr(doc.core_properties, 'author') and doc.core_properties.author:\n                properties['作者'] = doc.core_properties.author\n            if hasattr(doc.core_properties, 'created') and doc.core_properties.created:\n                properties['创建时间'] = str(doc.core_properties.created)\n            if hasattr(doc.core_properties, 'modified') and doc.core_properties.modified:\n                properties['修改时间'] = str(doc.core_properties.modified)\n            if hasattr(doc.core_properties, 'comments') and doc.core_properties.comments:\n                properties['备注'] = doc.core_properties.comments\n            \n            # 添加文档属性信息\n            if properties:\n                properties_text = \"## 文档属性\\n\\n\"\n                for key, value in properties.items():\n                    properties_text += f\"- {key}: {value}\\n\"\n                results.append(types.TextContent(\n                    type=\"text\",\n                    text=properties_text\n                ))\n            \n            # 提取文档内容\n            content_text = \"## 文档内容\\n\\n\"\n            \n            # 处理段落\n            paragraphs_count = len(doc.paragraphs)\n            content_text += f\"### 段落 (共{paragraphs_count}个)\\n\\n\"\n            \n            for i, para in enumerate(doc.paragraphs):\n                if para.text.strip():  # 只处理非空段落\n                    content_text += f\"{para.text}\\n\\n\"\n            \n            # 处理表格\n            tables_count = len(doc.tables)\n            if tables_count > 0:\n                content_text += f\"### 表格 (共{tables_count}个)\\n\\n\"\n                \n                for i, table in enumerate(doc.tables):\n                    content_text += f\"#### 表格 {i+1}\\n\\n\"\n                    \n                    # 创建Markdown表格\n                    rows = []\n                    for row in table.rows:\n                        cells = [cell.text.replace('\\n', ' ').strip() for cell in row.cells]\n                        rows.append(cells)\n                    \n                    if rows:\n                        # 表头\n                        content_text += \"| \" + \" | \".join(rows[0]) + \" |\\n\"\n                        # 分隔线\n                        content_text += \"| \" + \" | \".join([\"---\"] * len(rows[0])) + \" |\\n\"\n                        # 表格内容\n                        for row in rows[1:]:\n                            content_text += \"| \" + \" | \".join(row) + \" |\\n\"\n                        \n                        content_text += \"\\n\"\n            \n            # 添加文档内容\n            results.append(types.TextContent(\n                type=\"text\",\n                text=content_text\n            ))\n            \n            # 提取图片信息\n            try:\n                # 计算文档中的图片数量\n                image_count = 0\n                for rel in doc.part.rels.values():\n                    if \"image\" in rel.target_ref:\n                        image_count += 1\n                \n                if image_count > 0:\n                    image_info = f\"## 图片信息\\n\\n文档中包含 {image_count} 张图片。\\n\\n\"\n                    image_info += \"注意：当前仅提供图片数量信息，不提取图片内容。如需查看图片，请直接打开原始文档。\\n\"\n                    \n                    results.append(types.TextContent(\n                        type=\"text\",\n                        text=image_info\n                    ))\n            except Exception as img_error:\n                results.append(types.TextContent(\n                    type=\"text\",\n                    text=f\"警告: 提取图片信息时出错: {str(img_error)}\"\n                ))\n            \n            # 添加处理完成的提示\n            results.append(types.TextContent(\n                type=\"text\",\n                text=\"Word文档处理完成！\"\n            ))\n            \n            return results\n        except Exception as e:\n            error_details = traceback.format_exc()\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 解析Word文档失败: {str(e)}\\n\"\n                     f\"可能的原因:\\n\"\n                     f\"1. 文件格式不兼容或已损坏\\n\"\n                     f\"2. 文件受密码保护\\n\"\n                     f\"3. 文件包含不支持的内容\\n\\n\"\n                     f\"详细错误信息: {error_details}\"\n            )] "
        }
    ]
}