MCP Development Framework

MIT License
OverviewInspectSchema Related Servers Reviews Score
{
    "sourceFile": "mcp_simple_tool/tools/utils/pdf_helpers.py",
    "activeCommit": 0,
    "commits": [
        {
            "activePatchIndex": 1,
            "patches": [
                {
                    "date": 1741332207363,
                    "content": "Index: \n===================================================================\n--- \n+++ \n"
                },
                {
                    "date": 1741332589773,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,101 +1,1 @@\n-\"\"\"\n-PDF处理相关的辅助函数\n-\"\"\"\n-\n-import os\n-import tempfile\n-import fitz  # PyMuPDF\n-from typing import List, Dict, Any\n-from PIL import Image\n-\n-\n-async def extract_images_from_pdf(file_path: str, output_dir: str) -> List[Dict[str, Any]]:\n-    \"\"\"\n-    使用PyMuPDF (fitz) 从PDF中提取图片，这比pdf2image更高效且能提取嵌入图片\n-    \n-    Args:\n-        file_path: PDF文件路径\n-        output_dir: 图片输出目录\n-        \n-    Returns:\n-        提取的图片信息列表\n-    \"\"\"\n-    image_info = []\n-    \n-    try:\n-        # 打开PDF文件\n-        pdf_document = fitz.open(file_path)\n-        \n-        # 遍历每一页\n-        for page_index in range(len(pdf_document)):\n-            page = pdf_document[page_index]\n-            \n-            # 获取页面上的图片\n-            image_list = page.get_images(full=True)\n-            \n-            # 遍历页面上的每个图片\n-            for img_index, img in enumerate(image_list):\n-                xref = img[0]  # 图片的xref号\n-                base_image = pdf_document.extract_image(xref)\n-                image_bytes = base_image[\"image\"]\n-                image_ext = base_image[\"ext\"]  # 图片扩展名\n-                \n-                # 保存图片到文件\n-                image_filename = f\"page_{page_index + 1}_img_{img_index + 1}.{image_ext}\"\n-                image_path = os.path.join(output_dir, image_filename)\n-                \n-                with open(image_path, \"wb\") as img_file:\n-                    img_file.write(image_bytes)\n-                \n-                # 获取图片信息\n-                with Image.open(image_path) as pil_img:\n-                    width, height = pil_img.size\n-                    format_name = pil_img.format\n-                \n-                # 添加图片信息到列表\n-                image_info.append({\n-                    \"filename\": image_filename,\n-                    \"path\": image_path,\n-                    \"page\": page_index + 1,\n-                    \"width\": width,\n-                    \"height\": height,\n-                    \"format\": format_name,\n-                    \"size_bytes\": len(image_bytes)\n-                })\n-        \n-        # 如果没有找到嵌入图片，尝试渲染页面为图片\n-        if not image_info:\n-            for page_index in range(len(pdf_document)):\n-                page = pdf_document[page_index]\n-                \n-                # 将页面渲染为图片\n-                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x缩放以获得更好的质量\n-                image_filename = f\"page_{page_index + 1}_rendered.png\"\n-                image_path = os.path.join(output_dir, image_filename)\n-                \n-                # 保存渲染的图片\n-                pix.save(image_path)\n-                \n-                # 获取图片信息\n-                with Image.open(image_path) as pil_img:\n-                    width, height = pil_img.size\n-                    format_name = pil_img.format\n-                \n-                # 添加图片信息到列表\n-                image_info.append({\n-                    \"filename\": image_filename,\n-                    \"path\": image_path,\n-                    \"page\": page_index + 1,\n-                    \"width\": width,\n-                    \"height\": height,\n-                    \"format\": format_name,\n-                    \"size_bytes\": os.path.getsize(image_path),\n-                    \"type\": \"rendered_page\"\n-                })\n-        \n-        pdf_document.close()\n-        return image_info\n-    \n-    except Exception as e:\n-        print(f\"提取图片时出错: {str(e)}\")\n-        return [] \n\\ No newline at end of file\n+ \n\\ No newline at end of file\n"
                }
            ],
            "date": 1741332207363,
            "name": "Commit-0",
            "content": "\"\"\"\nPDF处理相关的辅助函数\n\"\"\"\n\nimport os\nimport tempfile\nimport fitz  # PyMuPDF\nfrom typing import List, Dict, Any\nfrom PIL import Image\n\n\nasync def extract_images_from_pdf(file_path: str, output_dir: str) -> List[Dict[str, Any]]:\n    \"\"\"\n    使用PyMuPDF (fitz) 从PDF中提取图片，这比pdf2image更高效且能提取嵌入图片\n    \n    Args:\n        file_path: PDF文件路径\n        output_dir: 图片输出目录\n        \n    Returns:\n        提取的图片信息列表\n    \"\"\"\n    image_info = []\n    \n    try:\n        # 打开PDF文件\n        pdf_document = fitz.open(file_path)\n        \n        # 遍历每一页\n        for page_index in range(len(pdf_document)):\n            page = pdf_document[page_index]\n            \n            # 获取页面上的图片\n            image_list = page.get_images(full=True)\n            \n            # 遍历页面上的每个图片\n            for img_index, img in enumerate(image_list):\n                xref = img[0]  # 图片的xref号\n                base_image = pdf_document.extract_image(xref)\n                image_bytes = base_image[\"image\"]\n                image_ext = base_image[\"ext\"]  # 图片扩展名\n                \n                # 保存图片到文件\n                image_filename = f\"page_{page_index + 1}_img_{img_index + 1}.{image_ext}\"\n                image_path = os.path.join(output_dir, image_filename)\n                \n                with open(image_path, \"wb\") as img_file:\n                    img_file.write(image_bytes)\n                \n                # 获取图片信息\n                with Image.open(image_path) as pil_img:\n                    width, height = pil_img.size\n                    format_name = pil_img.format\n                \n                # 添加图片信息到列表\n                image_info.append({\n                    \"filename\": image_filename,\n                    \"path\": image_path,\n                    \"page\": page_index + 1,\n                    \"width\": width,\n                    \"height\": height,\n                    \"format\": format_name,\n                    \"size_bytes\": len(image_bytes)\n                })\n        \n        # 如果没有找到嵌入图片，尝试渲染页面为图片\n        if not image_info:\n            for page_index in range(len(pdf_document)):\n                page = pdf_document[page_index]\n                \n                # 将页面渲染为图片\n                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x缩放以获得更好的质量\n                image_filename = f\"page_{page_index + 1}_rendered.png\"\n                image_path = os.path.join(output_dir, image_filename)\n                \n                # 保存渲染的图片\n                pix.save(image_path)\n                \n                # 获取图片信息\n                with Image.open(image_path) as pil_img:\n                    width, height = pil_img.size\n                    format_name = pil_img.format\n                \n                # 添加图片信息到列表\n                image_info.append({\n                    \"filename\": image_filename,\n                    \"path\": image_path,\n                    \"page\": page_index + 1,\n                    \"width\": width,\n                    \"height\": height,\n                    \"format\": format_name,\n                    \"size_bytes\": os.path.getsize(image_path),\n                    \"type\": \"rendered_page\"\n                })\n        \n        pdf_document.close()\n        return image_info\n    \n    except Exception as e:\n        print(f\"提取图片时出错: {str(e)}\")\n        return [] "
        }
    ]
}