Skip to main content
Glama
image.py1.84 kB
import os import fitz import tempfile from typing import List, Dict, Tuple class ImageExtractor: def __init__(self, output_dir: str = None): """ Args: output_dir: Directory to save extracted images. If None, use system temp dir. """ self.output_dir = ( output_dir if output_dir else tempfile.mkdtemp(prefix="mcp_pdf_images_") ) def extract_images( self, doc: fitz.Document, page_range: range = None ) -> List[Dict[str, str]]: """ Extract images from specific pages. Returns: List of dicts: [{"page": 1, "path": "/tmp/img1.png", "markdown": "![Image](/tmp/img1.png)"}, ...] """ extracted_images = [] if page_range is None: page_range = range(len(doc)) for page_num in page_range: if page_num < 0 or page_num >= len(doc): continue page = doc.load_page(page_num) image_list = page.get_images(full=True) for img_index, img in enumerate(image_list): xref = img[0] base_image = doc.extract_image(xref) image_bytes = base_image["image"] ext = base_image["ext"] # Save image image_filename = f"page{page_num+1}_img{img_index+1}.{ext}" image_path = os.path.join(self.output_dir, image_filename) with open(image_path, "wb") as f: f.write(image_bytes) extracted_images.append( { "page": page_num + 1, "path": image_path, "markdown": f"![Image (Page {page_num+1})]({image_path})", } ) return extracted_images

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rexfelix/readPDF_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server