MCP Development Framework
by aigo666
- .lh
- mcp_simple_tool
- tools
{
"sourceFile": "mcp_simple_tool/tools/quick_pdf_tool.py",
"activeCommit": 0,
"commits": [
{
"activePatchIndex": 2,
"patches": [
{
"date": 1741332308874,
"content": "Index: \n===================================================================\n--- \n+++ \n"
},
{
"date": 1741333338674,
"content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,72 +1,35 @@\n-\"\"\"\n-PDF快速预览工具,仅提取文本内容,适用于大型PDF文件\n-\"\"\"\n-\n import os\n-import fitz # PyMuPDF\n import PyPDF2\n import pymupdf4llm\n-import traceback\n-from typing import Dict, List, Any\n import mcp.types as types\n-from .base import BaseTool\n+from . import BaseTool, ToolRegistry\n \n-\n+@ToolRegistry.register\n class QuickPdfTool(BaseTool):\n- \"\"\"\n- 用于快速预览PDF文件的工具,仅提取文本内容,不处理图片\n- \"\"\"\n+ \"\"\"快速PDF预览工具,不包含图片处理,适用于大文件\"\"\"\n+ name = \"quick_pdf\"\n+ description = \"快速预览PDF文件内容(仅文本,无图片)\"\n+ input_schema = {\n+ \"type\": \"object\",\n+ \"required\": [\"file_path\"],\n+ \"properties\": {\n+ \"file_path\": {\n+ \"type\": \"string\",\n+ \"description\": \"PDF文件的本地路径,例如'/path/to/document.pdf'\",\n+ }\n+ },\n+ }\n \n- @property\n- def name(self) -> str:\n- return \"quick_pdf\"\n- \n- @property\n- def description(self) -> str:\n- return \"快速预览PDF文档内容(仅提取文本,不包含图片)\"\n- \n- @property\n- def input_schema(self) -> Dict[str, Any]:\n- return {\n- \"type\": \"object\",\n- \"required\": [\"file_path\"],\n- \"properties\": {\n- \"file_path\": {\n- \"type\": \"string\",\n- \"description\": \"PDF文件的本地路径,例如'/path/to/document.pdf'\",\n- }\n- },\n- }\n- \n- async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n- \"\"\"\n- 快速预览PDF文档\n- \n- Args:\n- arguments: 参数字典,必须包含'file_path'键\n- \n- Returns:\n- PDF文本内容列表\n- \"\"\"\n+ async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+ \"\"\"快速预览PDF文件内容\"\"\"\n if \"file_path\" not in arguments:\n return [types.TextContent(\n type=\"text\",\n- text=\"错误: 缺少必要参数 'file_path'\"\n+ text=\"Error: Missing required argument 'file_path'\"\n )]\n- \n- return await self._quick_preview_pdf(arguments[\"file_path\"])\n- \n- async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n- \"\"\"\n- 快速预览PDF文件内容,不包含图片处理\n- \n- Args:\n- file_path: PDF文件路径\n \n- Returns:\n- PDF文本内容列表\n- \"\"\"\n+ file_path = arguments[\"file_path\"]\n results = []\n \n # 检查文件是否存在\n if not os.path.exists(file_path):\n@@ -92,113 +55,65 @@\n max_pages = min(num_pages, 50) # 快速模式可以处理更多页\n pages_to_process = list(range(max_pages))\n \n try:\n- # 尝试使用PyMuPDF提取文本(通常比PyPDF2更快更准确)\n- pdf_document = fitz.open(file_path)\n+ # 使用PymuPDF4llm提取内容,但不提取图像\n+ md_content = pymupdf4llm.to_markdown(\n+ doc=file_path,\n+ pages=pages_to_process,\n+ page_chunks=True,\n+ write_images=False # 不提取图像\n+ )\n \n- # 提取文本内容\n- text_content = \"\"\n- \n- # 添加PDF元数据\n- text_content += f\"## PDF文档信息\\n\\n\"\n- text_content += f\"- 页数: {num_pages}\\n\"\n- \n- # 从PyMuPDF获取元数据\n- metadata = pdf_document.metadata\n- if metadata:\n- for key, value in metadata.items():\n- if value and str(value).strip():\n- text_content += f\"- {key}: {value}\\n\"\n- \n # 如果处理的页数少于总页数,添加提示\n if max_pages < num_pages:\n- text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n+ md_content = f\"# PDF文档内容(前{max_pages}页)\\n\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n+ else:\n+ md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n \n- text_content += \"\\n## 内容摘要\\n\\n\"\n- \n- # 逐页提取文本\n- for page_num in range(max_pages):\n- page = pdf_document[page_num]\n- page_text = page.get_text()\n- \n- if page_text.strip():\n- text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n- text_content += page_text.strip() + \"\\n\"\n- \n- pdf_document.close()\n- \n # 添加提取的内容到结果\n results.append(types.TextContent(\n type=\"text\",\n- text=text_content\n+ text=md_content\n ))\n- \n- except Exception as pymupdf_error:\n- # 如果PyMuPDF提取失败,回退到PymuPDF4llm\n+ except Exception as extract_error:\n+ # 如果PymuPDF4llm提取失败,回退到原来的方法\n results.append(types.TextContent(\n type=\"text\",\n- text=f\"警告: 使用PyMuPDF提取内容失败: {str(pymupdf_error)}\\n正在尝试使用备用方法...\"\n+ text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用备用方法...\"\n ))\n \n- try:\n- # 使用PymuPDF4llm提取内容,但不提取图像\n- md_content = pymupdf4llm.to_markdown(\n- doc=file_path,\n- pages=pages_to_process,\n- page_chunks=True,\n- write_images=False # 不提取图像\n- )\n+ # 使用PyPDF2提取文本\n+ text_content = \"\"\n+ with open(file_path, 'rb') as file:\n+ reader = PyPDF2.PdfReader(file)\n \n- # 如果处理的页数少于总页数,添加提示\n+ # 添加PDF元数据\n+ text_content += f\"## PDF文档信息\\n\\n\"\n+ text_content += f\"- 页数: {num_pages}\\n\"\n+ if reader.metadata:\n+ for key, value in reader.metadata.items():\n+ if key.startswith('/'):\n+ key = key[1:]\n+ if value and str(value).strip():\n+ text_content += f\"- {key}: {value}\\n\"\n+ \n+ # 限制处理的页数\n if max_pages < num_pages:\n- md_content = f\"# PDF文档内容(前{max_pages}页)\\n\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n- else:\n- md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n+ text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n \n- # 添加提取的内容到结果\n- results.append(types.TextContent(\n- type=\"text\",\n- text=md_content\n- ))\n- except Exception as extract_error:\n- # 如果PymuPDF4llm提取失败,回退到原来的方法\n- results.append(types.TextContent(\n- type=\"text\",\n- text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用最后的备用方法...\"\n- ))\n+ text_content += \"\\n## 内容摘要\\n\\n\"\n \n- # 使用PyPDF2提取文本\n- text_content = \"\"\n- with open(file_path, 'rb') as file:\n- reader = PyPDF2.PdfReader(file)\n- \n- # 添加PDF元数据\n- text_content += f\"## PDF文档信息\\n\\n\"\n- text_content += f\"- 页数: {num_pages}\\n\"\n- if reader.metadata:\n- for key, value in reader.metadata.items():\n- if key.startswith('/'):\n- key = key[1:]\n- if value and str(value).strip():\n- text_content += f\"- {key}: {value}\\n\"\n- \n- # 限制处理的页数\n- if max_pages < num_pages:\n- text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n- \n- text_content += \"\\n## 内容摘要\\n\\n\"\n- \n- # 逐页提取文本\n- for page_num in range(max_pages):\n- page = reader.pages[page_num]\n- page_text = page.extract_text()\n- if page_text:\n- text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n- text_content += page_text + \"\\n\"\n- \n- # 添加文本内容到结果\n- results.append(types.TextContent(type=\"text\", text=text_content))\n+ # 逐页提取文本\n+ for page_num in range(max_pages):\n+ page = reader.pages[page_num]\n+ page_text = page.extract_text()\n+ if page_text:\n+ text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n+ text_content += page_text + \"\\n\"\n+ \n+ # 添加文本内容到结果\n+ results.append(types.TextContent(type=\"text\", text=text_content))\n \n # 添加提示信息\n results.append(types.TextContent(\n type=\"text\",\n@@ -206,8 +121,9 @@\n ))\n \n return results\n except Exception as e:\n+ import traceback\n error_details = traceback.format_exc()\n return [types.TextContent(\n type=\"text\",\n text=f\"错误: 快速预览PDF失败: {str(e)}\\n详细错误信息: {error_details}\"\n"
},
{
"date": 1741335072961,
"content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,15 +1,25 @@\n+\"\"\"\n+PDF快速预览工具,仅提取文本内容,适用于大型PDF文件\n+\"\"\"\n+\n import os\n+import fitz # PyMuPDF\n import PyPDF2\n import pymupdf4llm\n+import traceback\n+from typing import Dict, List, Any\n import mcp.types as types\n from . import BaseTool, ToolRegistry\n \n @ToolRegistry.register\n class QuickPdfTool(BaseTool):\n- \"\"\"快速PDF预览工具,不包含图片处理,适用于大文件\"\"\"\n+ \"\"\"\n+ 用于快速预览PDF文件的工具,仅提取文本内容,不处理图片\n+ \"\"\"\n+ \n name = \"quick_pdf\"\n- description = \"快速预览PDF文件内容(仅文本,无图片)\"\n+ description = \"快速预览PDF文档内容(仅提取文本,不包含图片)\"\n input_schema = {\n \"type\": \"object\",\n \"required\": [\"file_path\"],\n \"properties\": {\n@@ -19,17 +29,36 @@\n }\n },\n }\n \n- async def execute(self, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n- \"\"\"快速预览PDF文件内容\"\"\"\n+ async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+ \"\"\"\n+ 快速预览PDF文档\n+ \n+ Args:\n+ arguments: 参数字典,必须包含'file_path'键\n+ \n+ Returns:\n+ PDF文本内容列表\n+ \"\"\"\n if \"file_path\" not in arguments:\n return [types.TextContent(\n type=\"text\",\n- text=\"Error: Missing required argument 'file_path'\"\n+ text=\"错误: 缺少必要参数 'file_path'\"\n )]\n+ \n+ return await self._quick_preview_pdf(arguments[\"file_path\"])\n+ \n+ async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n+ \"\"\"\n+ 快速预览PDF文件内容,不包含图片处理\n+ \n+ Args:\n+ file_path: PDF文件路径\n \n- file_path = arguments[\"file_path\"]\n+ Returns:\n+ PDF文本内容列表\n+ \"\"\"\n results = []\n \n # 检查文件是否存在\n if not os.path.exists(file_path):\n@@ -55,65 +84,113 @@\n max_pages = min(num_pages, 50) # 快速模式可以处理更多页\n pages_to_process = list(range(max_pages))\n \n try:\n- # 使用PymuPDF4llm提取内容,但不提取图像\n- md_content = pymupdf4llm.to_markdown(\n- doc=file_path,\n- pages=pages_to_process,\n- page_chunks=True,\n- write_images=False # 不提取图像\n- )\n+ # 尝试使用PyMuPDF提取文本(通常比PyPDF2更快更准确)\n+ pdf_document = fitz.open(file_path)\n \n+ # 提取文本内容\n+ text_content = \"\"\n+ \n+ # 添加PDF元数据\n+ text_content += f\"## PDF文档信息\\n\\n\"\n+ text_content += f\"- 页数: {num_pages}\\n\"\n+ \n+ # 从PyMuPDF获取元数据\n+ metadata = pdf_document.metadata\n+ if metadata:\n+ for key, value in metadata.items():\n+ if value and str(value).strip():\n+ text_content += f\"- {key}: {value}\\n\"\n+ \n # 如果处理的页数少于总页数,添加提示\n if max_pages < num_pages:\n- md_content = f\"# PDF文档内容(前{max_pages}页)\\n\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n- else:\n- md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n+ text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n \n+ text_content += \"\\n## 内容摘要\\n\\n\"\n+ \n+ # 逐页提取文本\n+ for page_num in range(max_pages):\n+ page = pdf_document[page_num]\n+ page_text = page.get_text()\n+ \n+ if page_text.strip():\n+ text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n+ text_content += page_text.strip() + \"\\n\"\n+ \n+ pdf_document.close()\n+ \n # 添加提取的内容到结果\n results.append(types.TextContent(\n type=\"text\",\n- text=md_content\n+ text=text_content\n ))\n- except Exception as extract_error:\n- # 如果PymuPDF4llm提取失败,回退到原来的方法\n+ \n+ except Exception as pymupdf_error:\n+ # 如果PyMuPDF提取失败,回退到PymuPDF4llm\n results.append(types.TextContent(\n type=\"text\",\n- text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用备用方法...\"\n+ text=f\"警告: 使用PyMuPDF提取内容失败: {str(pymupdf_error)}\\n正在尝试使用备用方法...\"\n ))\n \n- # 使用PyPDF2提取文本\n- text_content = \"\"\n- with open(file_path, 'rb') as file:\n- reader = PyPDF2.PdfReader(file)\n+ try:\n+ # 使用PymuPDF4llm提取内容,但不提取图像\n+ md_content = pymupdf4llm.to_markdown(\n+ doc=file_path,\n+ pages=pages_to_process,\n+ page_chunks=True,\n+ write_images=False # 不提取图像\n+ )\n \n- # 添加PDF元数据\n- text_content += f\"## PDF文档信息\\n\\n\"\n- text_content += f\"- 页数: {num_pages}\\n\"\n- if reader.metadata:\n- for key, value in reader.metadata.items():\n- if key.startswith('/'):\n- key = key[1:]\n- if value and str(value).strip():\n- text_content += f\"- {key}: {value}\\n\"\n- \n- # 限制处理的页数\n+ # 如果处理的页数少于总页数,添加提示\n if max_pages < num_pages:\n- text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n+ md_content = f\"# PDF文档内容(前{max_pages}页)\\n\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n+ else:\n+ md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n \n- text_content += \"\\n## 内容摘要\\n\\n\"\n+ # 添加提取的内容到结果\n+ results.append(types.TextContent(\n+ type=\"text\",\n+ text=md_content\n+ ))\n+ except Exception as extract_error:\n+ # 如果PymuPDF4llm提取失败,回退到原来的方法\n+ results.append(types.TextContent(\n+ type=\"text\",\n+ text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用最后的备用方法...\"\n+ ))\n \n- # 逐页提取文本\n- for page_num in range(max_pages):\n- page = reader.pages[page_num]\n- page_text = page.extract_text()\n- if page_text:\n- text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n- text_content += page_text + \"\\n\"\n- \n- # 添加文本内容到结果\n- results.append(types.TextContent(type=\"text\", text=text_content))\n+ # 使用PyPDF2提取文本\n+ text_content = \"\"\n+ with open(file_path, 'rb') as file:\n+ reader = PyPDF2.PdfReader(file)\n+ \n+ # 添加PDF元数据\n+ text_content += f\"## PDF文档信息\\n\\n\"\n+ text_content += f\"- 页数: {num_pages}\\n\"\n+ if reader.metadata:\n+ for key, value in reader.metadata.items():\n+ if key.startswith('/'):\n+ key = key[1:]\n+ if value and str(value).strip():\n+ text_content += f\"- {key}: {value}\\n\"\n+ \n+ # 限制处理的页数\n+ if max_pages < num_pages:\n+ text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n+ \n+ text_content += \"\\n## 内容摘要\\n\\n\"\n+ \n+ # 逐页提取文本\n+ for page_num in range(max_pages):\n+ page = reader.pages[page_num]\n+ page_text = page.extract_text()\n+ if page_text:\n+ text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n+ text_content += page_text + \"\\n\"\n+ \n+ # 添加文本内容到结果\n+ results.append(types.TextContent(type=\"text\", text=text_content))\n \n # 添加提示信息\n results.append(types.TextContent(\n type=\"text\",\n@@ -121,9 +198,8 @@\n ))\n \n return results\n except Exception as e:\n- import traceback\n error_details = traceback.format_exc()\n return [types.TextContent(\n type=\"text\",\n text=f\"错误: 快速预览PDF失败: {str(e)}\\n详细错误信息: {error_details}\"\n"
}
],
"date": 1741332308874,
"name": "Commit-0",
"content": "\"\"\"\nPDF快速预览工具,仅提取文本内容,适用于大型PDF文件\n\"\"\"\n\nimport os\nimport fitz # PyMuPDF\nimport PyPDF2\nimport pymupdf4llm\nimport traceback\nfrom typing import Dict, List, Any\nimport mcp.types as types\nfrom .base import BaseTool\n\n\nclass QuickPdfTool(BaseTool):\n \"\"\"\n 用于快速预览PDF文件的工具,仅提取文本内容,不处理图片\n \"\"\"\n \n @property\n def name(self) -> str:\n return \"quick_pdf\"\n \n @property\n def description(self) -> str:\n return \"快速预览PDF文档内容(仅提取文本,不包含图片)\"\n \n @property\n def input_schema(self) -> Dict[str, Any]:\n return {\n \"type\": \"object\",\n \"required\": [\"file_path\"],\n \"properties\": {\n \"file_path\": {\n \"type\": \"string\",\n \"description\": \"PDF文件的本地路径,例如'/path/to/document.pdf'\",\n }\n },\n }\n \n async def execute(self, arguments: Dict[str, Any]) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n \"\"\"\n 快速预览PDF文档\n \n Args:\n arguments: 参数字典,必须包含'file_path'键\n \n Returns:\n PDF文本内容列表\n \"\"\"\n if \"file_path\" not in arguments:\n return [types.TextContent(\n type=\"text\",\n text=\"错误: 缺少必要参数 'file_path'\"\n )]\n \n return await self._quick_preview_pdf(arguments[\"file_path\"])\n \n async def _quick_preview_pdf(self, file_path: str) -> List[types.TextContent | types.ImageContent | types.EmbeddedResource]:\n \"\"\"\n 快速预览PDF文件内容,不包含图片处理\n \n Args:\n file_path: PDF文件路径\n \n Returns:\n PDF文本内容列表\n \"\"\"\n results = []\n \n # 检查文件是否存在\n if not os.path.exists(file_path):\n return [types.TextContent(\n type=\"text\",\n text=f\"错误: 文件不存在: {file_path}\\n请检查路径是否正确,并确保文件可访问。\"\n )]\n \n try:\n # 添加文件信息\n file_size_mb = os.path.getsize(file_path) / (1024 * 1024)\n results.append(types.TextContent(\n type=\"text\",\n text=f\"# 快速预览模式 - 仅提取文本内容\\n\\n文件大小: {file_size_mb:.2f} MB\"\n ))\n \n # 获取PDF页数\n with open(file_path, 'rb') as file:\n reader = PyPDF2.PdfReader(file)\n num_pages = len(reader.pages)\n \n # 限制处理的页数\n max_pages = min(num_pages, 50) # 快速模式可以处理更多页\n pages_to_process = list(range(max_pages))\n \n try:\n # 尝试使用PyMuPDF提取文本(通常比PyPDF2更快更准确)\n pdf_document = fitz.open(file_path)\n \n # 提取文本内容\n text_content = \"\"\n \n # 添加PDF元数据\n text_content += f\"## PDF文档信息\\n\\n\"\n text_content += f\"- 页数: {num_pages}\\n\"\n \n # 从PyMuPDF获取元数据\n metadata = pdf_document.metadata\n if metadata:\n for key, value in metadata.items():\n if value and str(value).strip():\n text_content += f\"- {key}: {value}\\n\"\n \n # 如果处理的页数少于总页数,添加提示\n if max_pages < num_pages:\n text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n \n text_content += \"\\n## 内容摘要\\n\\n\"\n \n # 逐页提取文本\n for page_num in range(max_pages):\n page = pdf_document[page_num]\n page_text = page.get_text()\n \n if page_text.strip():\n text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n text_content += page_text.strip() + \"\\n\"\n \n pdf_document.close()\n \n # 添加提取的内容到结果\n results.append(types.TextContent(\n type=\"text\",\n text=text_content\n ))\n \n except Exception as pymupdf_error:\n # 如果PyMuPDF提取失败,回退到PymuPDF4llm\n results.append(types.TextContent(\n type=\"text\",\n text=f\"警告: 使用PyMuPDF提取内容失败: {str(pymupdf_error)}\\n正在尝试使用备用方法...\"\n ))\n \n try:\n # 使用PymuPDF4llm提取内容,但不提取图像\n md_content = pymupdf4llm.to_markdown(\n doc=file_path,\n pages=pages_to_process,\n page_chunks=True,\n write_images=False # 不提取图像\n )\n \n # 如果处理的页数少于总页数,添加提示\n if max_pages < num_pages:\n md_content = f\"# PDF文档内容(前{max_pages}页)\\n\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\\n{md_content}\"\n else:\n md_content = f\"# PDF文档内容\\n\\n{md_content}\"\n \n # 添加提取的内容到结果\n results.append(types.TextContent(\n type=\"text\",\n text=md_content\n ))\n except Exception as extract_error:\n # 如果PymuPDF4llm提取失败,回退到原来的方法\n results.append(types.TextContent(\n type=\"text\",\n text=f\"警告: 使用PymuPDF4llm提取内容失败: {str(extract_error)}\\n正在尝试使用最后的备用方法...\"\n ))\n \n # 使用PyPDF2提取文本\n text_content = \"\"\n with open(file_path, 'rb') as file:\n reader = PyPDF2.PdfReader(file)\n \n # 添加PDF元数据\n text_content += f\"## PDF文档信息\\n\\n\"\n text_content += f\"- 页数: {num_pages}\\n\"\n if reader.metadata:\n for key, value in reader.metadata.items():\n if key.startswith('/'):\n key = key[1:]\n if value and str(value).strip():\n text_content += f\"- {key}: {value}\\n\"\n \n # 限制处理的页数\n if max_pages < num_pages:\n text_content += f\"\\n> 注意: 由于文件较大,仅处理前 {max_pages} 页内容。\\n\"\n \n text_content += \"\\n## 内容摘要\\n\\n\"\n \n # 逐页提取文本\n for page_num in range(max_pages):\n page = reader.pages[page_num]\n page_text = page.extract_text()\n if page_text:\n text_content += f\"\\n### 第 {page_num + 1} 页\\n\\n\"\n text_content += page_text + \"\\n\"\n \n # 添加文本内容到结果\n results.append(types.TextContent(type=\"text\", text=text_content))\n \n # 添加提示信息\n results.append(types.TextContent(\n type=\"text\",\n text=\"\\n## 注意\\n\\n快速预览完成!如需查看图片内容,请使用完整的PDF解析工具。\"\n ))\n \n return results\n except Exception as e:\n error_details = traceback.format_exc()\n return [types.TextContent(\n type=\"text\",\n text=f\"错误: 快速预览PDF失败: {str(e)}\\n详细错误信息: {error_details}\"\n )] "
}
]
}