MCP Development Framework

MIT License
OverviewInspectSchema Related Servers Reviews Score
{
    "sourceFile": "mcp_simple_tool/tools/quick_pdf_tool.py",
    "activeCommit": 0,
    "commits": [
        {
            "activePatchIndex": 2,
            "patches": [
                {
                    "date": 1741332308874,
                    "content": "Index: \n===================================================================\n--- \n+++ \n"
                },
                {
                    "date": 1741333338674,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,72 +1,35 @@\n-\"\"\"\n-PDF快速预览工具，仅提取文本内容，适用于大型PDF文件\n-\"\"\"\n-\n import os\n-import fitz  # PyMuPDF\n import PyPDF2\n import pymupdf4llm\n-import traceback\n-from typing import Dict, List, Any\n import mcp.types as types\n-from .base import BaseTool\n+from . import BaseTool, ToolRegistry\n \n-\n+@ToolRegistry.register\n class QuickPdfTool(BaseTool):\n-    \"\"\"\n-    用于快速预览PDF文件的工具，仅提取文本内容，不处理图片\n-    \"\"\"\n+    \"\"\"快速PDF预览工具，不包含图片处理，适用于大文件\"\"\"\n+    name = \"quick_pdf\"\n+    description = \"快速预览PDF文件内容（仅文本，无图片）\"\n+    input_schema = {\n+        \"type\": \"object\",\n+        \"required\": [\"file_path\"],\n+        \"properties\": {\n+            \"file_path\": {\n+                \"type\": \"string\",\n+                \"description\": \"PDF文件的本地路径，例如'/path/to/document.pdf'\",\n+            }\n+        },\n+    }\n     \n-    @property\n-    def name(self) -> str:\n-        return \"quick_pdf\"\n-    \n-    @property\n-    def description(self) -> str:\n-        return \"快速预览PDF文档内容（仅提取文本，不包含图片）\"\n-    \n-    @property\n-    def input_schema(self) -> Dict[str, Any]:\n-        return {\n-            \"type\": \"object\",\n-            \"required\": [\"file_path\"],\n-            \"properties\": {\n-                \"file_path\": {\n-                    \"type\": \"string\",\n-                    \"description\": \"PDF文件的本地路径，例如'/path/to/document.pdf'\",\n-                }\n-            },\n-        }\n-    \n-    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"\n-        快速预览PDF文档\n-        \n-        Args:\n-            arguments: 参数字典，必须包含'file_path'键\n-            \n-        Returns:\n-            PDF文本内容列表\n-        \"\"\"\n+    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+        \"\"\"快速预览PDF文件内容\"\"\"\n         if \"file_path\" not in arguments:\n             return [types.TextContent(\n                 type=\"text\",\n-                text=\"错误: 缺少必要参数 'file_path'\"\n+                text=\"Error: Missing required argument 'file_path'\"\n             )]\n-        \n-        return await self._quick_preview_pdf(arguments[\"file_path\"])\n-    \n-    async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"\n-        快速预览PDF文件内容，不包含图片处理\n-        \n-        Args:\n-            file_path: PDF文件路径\n             \n-        Returns:\n-            PDF文本内容列表\n-        \"\"\"\n+        file_path = arguments[\"file_path\"]\n         results = []\n         \n         # 检查文件是否存在\n         if not os.path.exists(file_path):\n@@ -92,113 +55,65 @@\n             max_pages = min(num_pages, 50)  # 快速模式可以处理更多页\n             pages_to_process = list(range(max_pages))\n             \n             try:\n-                # 尝试使用PyMuPDF提取文本（通常比PyPDF2更快更准确）\n-                pdf_document = fitz.open(file_path)\n+                # 使用PymuPDF4llm提取内容，但不提取图像\n+                md_content = pymupdf4llm.to_markdown(\n+                    doc=file_path,\n+                    pages=pages_to_process,\n+                    page_chunks=True,\n+                    write_images=False  # 不提取图像\n+                )\n                 \n-                # 提取文本内容\n-                text_content = \"\"\n-                \n-                # 添加PDF元数据\n-                text_content += f\"## PDF文档信息\\n\\n\"\n-                text_content += f\"- 页数: {num_pages}\\n\"\n-                \n-                # 从PyMuPDF获取元数据\n-                metadata = pdf_document.metadata\n-                if metadata:\n-                    for key, value in metadata.items():\n-                        if value and str(value).strip():\n-                            text_content += f\"- {key}: {value}\\n\"\n-                \n                 # 如果处理的页数少于总页数，添加提示\n                 if max_pages < num_pages:\n-                    text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n+                    md_content = f\"# PDF文档内容（前{max_pages}页）\\n\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n+                else:\n+                    md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n                 \n-                text_content += \"\\n## 内容摘要\\n\\n\"\n-                \n-                # 逐页提取文本\n-                for page_num in range(max_pages):\n-                    page = pdf_document[page_num]\n-                    page_text = page.get_text()\n-                    \n-                    if page_text.strip():\n-                        text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n-                        text_content += page_text.strip() + \"\\n\"\n-                \n-                pdf_document.close()\n-                \n                 # 添加提取的内容到结果\n                 results.append(types.TextContent(\n                     type=\"text\",\n-                    text=text_content\n+                    text=md_content\n                 ))\n-                \n-            except Exception as pymupdf_error:\n-                # 如果PyMuPDF提取失败，回退到PymuPDF4llm\n+            except Exception as extract_error:\n+                # 如果PymuPDF4llm提取失败，回退到原来的方法\n                 results.append(types.TextContent(\n                     type=\"text\",\n-                    text=f\"警告: 使用PyMuPDF提取内容失败: {str(pymupdf_error)}\\n正在尝试使用备用方法...\"\n+                    text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用备用方法...\"\n                 ))\n                 \n-                try:\n-                    # 使用PymuPDF4llm提取内容，但不提取图像\n-                    md_content = pymupdf4llm.to_markdown(\n-                        doc=file_path,\n-                        pages=pages_to_process,\n-                        page_chunks=True,\n-                        write_images=False  # 不提取图像\n-                    )\n+                # 使用PyPDF2提取文本\n+                text_content = \"\"\n+                with open(file_path, 'rb') as file:\n+                    reader = PyPDF2.PdfReader(file)\n                     \n-                    # 如果处理的页数少于总页数，添加提示\n+                    # 添加PDF元数据\n+                    text_content += f\"## PDF文档信息\\n\\n\"\n+                    text_content += f\"- 页数: {num_pages}\\n\"\n+                    if reader.metadata:\n+                        for key, value in reader.metadata.items():\n+                            if key.startswith('/'):\n+                                key = key[1:]\n+                            if value and str(value).strip():\n+                                text_content += f\"- {key}: {value}\\n\"\n+                    \n+                    # 限制处理的页数\n                     if max_pages < num_pages:\n-                        md_content = f\"# PDF文档内容（前{max_pages}页）\\n\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n-                    else:\n-                        md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n+                        text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n                     \n-                    # 添加提取的内容到结果\n-                    results.append(types.TextContent(\n-                        type=\"text\",\n-                        text=md_content\n-                    ))\n-                except Exception as extract_error:\n-                    # 如果PymuPDF4llm提取失败，回退到原来的方法\n-                    results.append(types.TextContent(\n-                        type=\"text\",\n-                        text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用最后的备用方法...\"\n-                    ))\n+                    text_content += \"\\n## 内容摘要\\n\\n\"\n                     \n-                    # 使用PyPDF2提取文本\n-                    text_content = \"\"\n-                    with open(file_path, 'rb') as file:\n-                        reader = PyPDF2.PdfReader(file)\n-                        \n-                        # 添加PDF元数据\n-                        text_content += f\"## PDF文档信息\\n\\n\"\n-                        text_content += f\"- 页数: {num_pages}\\n\"\n-                        if reader.metadata:\n-                            for key, value in reader.metadata.items():\n-                                if key.startswith('/'):\n-                                    key = key[1:]\n-                                if value and str(value).strip():\n-                                    text_content += f\"- {key}: {value}\\n\"\n-                        \n-                        # 限制处理的页数\n-                        if max_pages < num_pages:\n-                            text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n-                        \n-                        text_content += \"\\n## 内容摘要\\n\\n\"\n-                        \n-                        # 逐页提取文本\n-                        for page_num in range(max_pages):\n-                            page = reader.pages[page_num]\n-                            page_text = page.extract_text()\n-                            if page_text:\n-                                text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n-                                text_content += page_text + \"\\n\"\n-                    \n-                    # 添加文本内容到结果\n-                    results.append(types.TextContent(type=\"text\", text=text_content))\n+                    # 逐页提取文本\n+                    for page_num in range(max_pages):\n+                        page = reader.pages[page_num]\n+                        page_text = page.extract_text()\n+                        if page_text:\n+                            text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n+                            text_content += page_text + \"\\n\"\n+                \n+                # 添加文本内容到结果\n+                results.append(types.TextContent(type=\"text\", text=text_content))\n             \n             # 添加提示信息\n             results.append(types.TextContent(\n                 type=\"text\",\n@@ -206,8 +121,9 @@\n             ))\n             \n             return results\n         except Exception as e:\n+            import traceback\n             error_details = traceback.format_exc()\n             return [types.TextContent(\n                 type=\"text\",\n                 text=f\"错误: 快速预览PDF失败: {str(e)}\\n详细错误信息: {error_details}\"\n"
                },
                {
                    "date": 1741335072961,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,15 +1,25 @@\n+\"\"\"\n+PDF快速预览工具，仅提取文本内容，适用于大型PDF文件\n+\"\"\"\n+\n import os\n+import fitz  # PyMuPDF\n import PyPDF2\n import pymupdf4llm\n+import traceback\n+from typing import Dict, List, Any\n import mcp.types as types\n from . import BaseTool, ToolRegistry\n \n @ToolRegistry.register\n class QuickPdfTool(BaseTool):\n-    \"\"\"快速PDF预览工具，不包含图片处理，适用于大文件\"\"\"\n+    \"\"\"\n+    用于快速预览PDF文件的工具，仅提取文本内容，不处理图片\n+    \"\"\"\n+    \n     name = \"quick_pdf\"\n-    description = \"快速预览PDF文件内容（仅文本，无图片）\"\n+    description = \"快速预览PDF文档内容（仅提取文本，不包含图片）\"\n     input_schema = {\n         \"type\": \"object\",\n         \"required\": [\"file_path\"],\n         \"properties\": {\n@@ -19,17 +29,36 @@\n             }\n         },\n     }\n     \n-    async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n-        \"\"\"快速预览PDF文件内容\"\"\"\n+    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+        \"\"\"\n+        快速预览PDF文档\n+        \n+        Args:\n+            arguments: 参数字典，必须包含'file_path'键\n+            \n+        Returns:\n+            PDF文本内容列表\n+        \"\"\"\n         if \"file_path\" not in arguments:\n             return [types.TextContent(\n                 type=\"text\",\n-                text=\"Error: Missing required argument 'file_path'\"\n+                text=\"错误: 缺少必要参数 'file_path'\"\n             )]\n+        \n+        return await self._quick_preview_pdf(arguments[\"file_path\"])\n+    \n+    async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+        \"\"\"\n+        快速预览PDF文件内容，不包含图片处理\n+        \n+        Args:\n+            file_path: PDF文件路径\n             \n-        file_path = arguments[\"file_path\"]\n+        Returns:\n+            PDF文本内容列表\n+        \"\"\"\n         results = []\n         \n         # 检查文件是否存在\n         if not os.path.exists(file_path):\n@@ -55,65 +84,113 @@\n             max_pages = min(num_pages, 50)  # 快速模式可以处理更多页\n             pages_to_process = list(range(max_pages))\n             \n             try:\n-                # 使用PymuPDF4llm提取内容，但不提取图像\n-                md_content = pymupdf4llm.to_markdown(\n-                    doc=file_path,\n-                    pages=pages_to_process,\n-                    page_chunks=True,\n-                    write_images=False  # 不提取图像\n-                )\n+                # 尝试使用PyMuPDF提取文本（通常比PyPDF2更快更准确）\n+                pdf_document = fitz.open(file_path)\n                 \n+                # 提取文本内容\n+                text_content = \"\"\n+                \n+                # 添加PDF元数据\n+                text_content += f\"## PDF文档信息\\n\\n\"\n+                text_content += f\"- 页数: {num_pages}\\n\"\n+                \n+                # 从PyMuPDF获取元数据\n+                metadata = pdf_document.metadata\n+                if metadata:\n+                    for key, value in metadata.items():\n+                        if value and str(value).strip():\n+                            text_content += f\"- {key}: {value}\\n\"\n+                \n                 # 如果处理的页数少于总页数，添加提示\n                 if max_pages < num_pages:\n-                    md_content = f\"# PDF文档内容（前{max_pages}页）\\n\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n-                else:\n-                    md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n+                    text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n                 \n+                text_content += \"\\n## 内容摘要\\n\\n\"\n+                \n+                # 逐页提取文本\n+                for page_num in range(max_pages):\n+                    page = pdf_document[page_num]\n+                    page_text = page.get_text()\n+                    \n+                    if page_text.strip():\n+                        text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n+                        text_content += page_text.strip() + \"\\n\"\n+                \n+                pdf_document.close()\n+                \n                 # 添加提取的内容到结果\n                 results.append(types.TextContent(\n                     type=\"text\",\n-                    text=md_content\n+                    text=text_content\n                 ))\n-            except Exception as extract_error:\n-                # 如果PymuPDF4llm提取失败，回退到原来的方法\n+                \n+            except Exception as pymupdf_error:\n+                # 如果PyMuPDF提取失败，回退到PymuPDF4llm\n                 results.append(types.TextContent(\n                     type=\"text\",\n-                    text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用备用方法...\"\n+                    text=f\"警告: 使用PyMuPDF提取内容失败: {str(pymupdf_error)}\\n正在尝试使用备用方法...\"\n                 ))\n                 \n-                # 使用PyPDF2提取文本\n-                text_content = \"\"\n-                with open(file_path, 'rb') as file:\n-                    reader = PyPDF2.PdfReader(file)\n+                try:\n+                    # 使用PymuPDF4llm提取内容，但不提取图像\n+                    md_content = pymupdf4llm.to_markdown(\n+                        doc=file_path,\n+                        pages=pages_to_process,\n+                        page_chunks=True,\n+                        write_images=False  # 不提取图像\n+                    )\n                     \n-                    # 添加PDF元数据\n-                    text_content += f\"## PDF文档信息\\n\\n\"\n-                    text_content += f\"- 页数: {num_pages}\\n\"\n-                    if reader.metadata:\n-                        for key, value in reader.metadata.items():\n-                            if key.startswith('/'):\n-                                key = key[1:]\n-                            if value and str(value).strip():\n-                                text_content += f\"- {key}: {value}\\n\"\n-                    \n-                    # 限制处理的页数\n+                    # 如果处理的页数少于总页数，添加提示\n                     if max_pages < num_pages:\n-                        text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n+                        md_content = f\"# PDF文档内容（前{max_pages}页）\\n\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n+                    else:\n+                        md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n                     \n-                    text_content += \"\\n## 内容摘要\\n\\n\"\n+                    # 添加提取的内容到结果\n+                    results.append(types.TextContent(\n+                        type=\"text\",\n+                        text=md_content\n+                    ))\n+                except Exception as extract_error:\n+                    # 如果PymuPDF4llm提取失败，回退到原来的方法\n+                    results.append(types.TextContent(\n+                        type=\"text\",\n+                        text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用最后的备用方法...\"\n+                    ))\n                     \n-                    # 逐页提取文本\n-                    for page_num in range(max_pages):\n-                        page = reader.pages[page_num]\n-                        page_text = page.extract_text()\n-                        if page_text:\n-                            text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n-                            text_content += page_text + \"\\n\"\n-                \n-                # 添加文本内容到结果\n-                results.append(types.TextContent(type=\"text\", text=text_content))\n+                    # 使用PyPDF2提取文本\n+                    text_content = \"\"\n+                    with open(file_path, 'rb') as file:\n+                        reader = PyPDF2.PdfReader(file)\n+                        \n+                        # 添加PDF元数据\n+                        text_content += f\"## PDF文档信息\\n\\n\"\n+                        text_content += f\"- 页数: {num_pages}\\n\"\n+                        if reader.metadata:\n+                            for key, value in reader.metadata.items():\n+                                if key.startswith('/'):\n+                                    key = key[1:]\n+                                if value and str(value).strip():\n+                                    text_content += f\"- {key}: {value}\\n\"\n+                        \n+                        # 限制处理的页数\n+                        if max_pages < num_pages:\n+                            text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n+                        \n+                        text_content += \"\\n## 内容摘要\\n\\n\"\n+                        \n+                        # 逐页提取文本\n+                        for page_num in range(max_pages):\n+                            page = reader.pages[page_num]\n+                            page_text = page.extract_text()\n+                            if page_text:\n+                                text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n+                                text_content += page_text + \"\\n\"\n+                    \n+                    # 添加文本内容到结果\n+                    results.append(types.TextContent(type=\"text\", text=text_content))\n             \n             # 添加提示信息\n             results.append(types.TextContent(\n                 type=\"text\",\n@@ -121,9 +198,8 @@\n             ))\n             \n             return results\n         except Exception as e:\n-            import traceback\n             error_details = traceback.format_exc()\n             return [types.TextContent(\n                 type=\"text\",\n                 text=f\"错误: 快速预览PDF失败: {str(e)}\\n详细错误信息: {error_details}\"\n"
                }
            ],
            "date": 1741332308874,
            "name": "Commit-0",
            "content": "\"\"\"\nPDF快速预览工具，仅提取文本内容，适用于大型PDF文件\n\"\"\"\n\nimport os\nimport fitz  # PyMuPDF\nimport PyPDF2\nimport pymupdf4llm\nimport traceback\nfrom typing import Dict, List, Any\nimport mcp.types as types\nfrom .base import BaseTool\n\n\nclass QuickPdfTool(BaseTool):\n    \"\"\"\n    用于快速预览PDF文件的工具，仅提取文本内容，不处理图片\n    \"\"\"\n    \n    @property\n    def name(self) -> str:\n        return \"quick_pdf\"\n    \n    @property\n    def description(self) -> str:\n        return \"快速预览PDF文档内容（仅提取文本，不包含图片）\"\n    \n    @property\n    def input_schema(self) -> Dict[str, Any]:\n        return {\n            \"type\": \"object\",\n            \"required\": [\"file_path\"],\n            \"properties\": {\n                \"file_path\": {\n                    \"type\": \"string\",\n                    \"description\": \"PDF文件的本地路径，例如'/path/to/document.pdf'\",\n                }\n            },\n        }\n    \n    async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        快速预览PDF文档\n        \n        Args:\n            arguments: 参数字典，必须包含'file_path'键\n            \n        Returns:\n            PDF文本内容列表\n        \"\"\"\n        if \"file_path\" not in arguments:\n            return [types.TextContent(\n                type=\"text\",\n                text=\"错误: 缺少必要参数 'file_path'\"\n            )]\n        \n        return await self._quick_preview_pdf(arguments[\"file_path\"])\n    \n    async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n        \"\"\"\n        快速预览PDF文件内容，不包含图片处理\n        \n        Args:\n            file_path: PDF文件路径\n            \n        Returns:\n            PDF文本内容列表\n        \"\"\"\n        results = []\n        \n        # 检查文件是否存在\n        if not os.path.exists(file_path):\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 文件不存在: {file_path}\\n请检查路径是否正确，并确保文件可访问。\"\n            )]\n        \n        try:\n            # 添加文件信息\n            file_size_mb = os.path.getsize(file_path) / (1024 * 1024)\n            results.append(types.TextContent(\n                type=\"text\",\n                text=f\"# 快速预览模式 - 仅提取文本内容\\n\\n文件大小: {file_size_mb:.2f} MB\"\n            ))\n            \n            # 获取PDF页数\n            with open(file_path, 'rb') as file:\n                reader = PyPDF2.PdfReader(file)\n                num_pages = len(reader.pages)\n            \n            # 限制处理的页数\n            max_pages = min(num_pages, 50)  # 快速模式可以处理更多页\n            pages_to_process = list(range(max_pages))\n            \n            try:\n                # 尝试使用PyMuPDF提取文本（通常比PyPDF2更快更准确）\n                pdf_document = fitz.open(file_path)\n                \n                # 提取文本内容\n                text_content = \"\"\n                \n                # 添加PDF元数据\n                text_content += f\"## PDF文档信息\\n\\n\"\n                text_content += f\"- 页数: {num_pages}\\n\"\n                \n                # 从PyMuPDF获取元数据\n                metadata = pdf_document.metadata\n                if metadata:\n                    for key, value in metadata.items():\n                        if value and str(value).strip():\n                            text_content += f\"- {key}: {value}\\n\"\n                \n                # 如果处理的页数少于总页数，添加提示\n                if max_pages < num_pages:\n                    text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n                \n                text_content += \"\\n## 内容摘要\\n\\n\"\n                \n                # 逐页提取文本\n                for page_num in range(max_pages):\n                    page = pdf_document[page_num]\n                    page_text = page.get_text()\n                    \n                    if page_text.strip():\n                        text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n                        text_content += page_text.strip() + \"\\n\"\n                \n                pdf_document.close()\n                \n                # 添加提取的内容到结果\n                results.append(types.TextContent(\n                    type=\"text\",\n                    text=text_content\n                ))\n                \n            except Exception as pymupdf_error:\n                # 如果PyMuPDF提取失败，回退到PymuPDF4llm\n                results.append(types.TextContent(\n                    type=\"text\",\n                    text=f\"警告: 使用PyMuPDF提取内容失败: {str(pymupdf_error)}\\n正在尝试使用备用方法...\"\n                ))\n                \n                try:\n                    # 使用PymuPDF4llm提取内容，但不提取图像\n                    md_content = pymupdf4llm.to_markdown(\n                        doc=file_path,\n                        pages=pages_to_process,\n                        page_chunks=True,\n                        write_images=False  # 不提取图像\n                    )\n                    \n                    # 如果处理的页数少于总页数，添加提示\n                    if max_pages < num_pages:\n                        md_content = f\"# PDF文档内容（前{max_pages}页）\\n\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n                    else:\n                        md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n                    \n                    # 添加提取的内容到结果\n                    results.append(types.TextContent(\n                        type=\"text\",\n                        text=md_content\n                    ))\n                except Exception as extract_error:\n                    # 如果PymuPDF4llm提取失败，回退到原来的方法\n                    results.append(types.TextContent(\n                        type=\"text\",\n                        text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用最后的备用方法...\"\n                    ))\n                    \n                    # 使用PyPDF2提取文本\n                    text_content = \"\"\n                    with open(file_path, 'rb') as file:\n                        reader = PyPDF2.PdfReader(file)\n                        \n                        # 添加PDF元数据\n                        text_content += f\"## PDF文档信息\\n\\n\"\n                        text_content += f\"- 页数: {num_pages}\\n\"\n                        if reader.metadata:\n                            for key, value in reader.metadata.items():\n                                if key.startswith('/'):\n                                    key = key[1:]\n                                if value and str(value).strip():\n                                    text_content += f\"- {key}: {value}\\n\"\n                        \n                        # 限制处理的页数\n                        if max_pages < num_pages:\n                            text_content += f\"\\n> 注意: 由于文件较大，仅处理前 {max_pages} 页内容。\\n\"\n                        \n                        text_content += \"\\n## 内容摘要\\n\\n\"\n                        \n                        # 逐页提取文本\n                        for page_num in range(max_pages):\n                            page = reader.pages[page_num]\n                            page_text = page.extract_text()\n                            if page_text:\n                                text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n                                text_content += page_text + \"\\n\"\n                    \n                    # 添加文本内容到结果\n                    results.append(types.TextContent(type=\"text\", text=text_content))\n            \n            # 添加提示信息\n            results.append(types.TextContent(\n                type=\"text\",\n                text=\"\\n## 注意\\n\\n快速预览完成！如需查看图片内容，请使用完整的PDF解析工具。\"\n            ))\n            \n            return results\n        except Exception as e:\n            error_details = traceback.format_exc()\n            return [types.TextContent(\n                type=\"text\",\n                text=f\"错误: 快速预览PDF失败: {str(e)}\\n详细错误信息: {error_details}\"\n            )] "
        }
    ]
}