Skip to main content
Glama
andr3medeiros

PDF Manipulation MCP Server

pdf_auto_crop_page

Automatically crop PDF pages to remove blank margins by detecting content boundaries, optimizing document layout and reducing file size.

Instructions

Automatically crop a PDF page to remove blank margins by detecting content boundaries.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
pdf_pathYes
page_numberNo
paddingNo

Implementation Reference

  • Core handler function decorated with @mcp.tool(). Opens PDF, detects content bounds from words, images, drawings; computes union rect; applies conservative padding (min 20pt, asymmetric); crops pages if significant margins; saves to timestamped file.
    @mcp.tool()
    async def pdf_auto_crop_page(
        pdf_path: str,
        page_number: Optional[int] = None,
        padding: float = 10.0
    ) -> str:
        """Automatically crop a PDF page to remove blank margins by detecting content boundaries."""
        if not os.path.exists(pdf_path):
            return f"Error: PDF file not found: {pdf_path}"
        
        if not validate_pdf_file(pdf_path):
            return f"Error: Invalid PDF file: {pdf_path}"
        
        try:
            # Open PDF document
            doc = fitz.open(pdf_path)
            
            # Determine pages to process
            if page_number is not None:
                if not validate_page_number(doc, page_number):
                    doc.close()
                    return f"Error: Invalid page number {page_number}. Document has {len(doc)} pages."
                pages_to_process = [page_number]
            else:
                pages_to_process = list(range(len(doc)))
            
            cropped_pages = 0
            
            for page_num in pages_to_process:
                page = doc[page_num]
                
                # Get text at word level for tighter bounds
                words = page.get_text("words")
                text_rects = [word[:4] for word in words if len(word) >= 4]
                
                # Get image rectangles  
                images = page.get_images()
                image_rects = [img[:4] for img in images if len(img) >= 4]
                
                # Get drawing objects (lines, shapes, paths) - NO external dependencies
                drawing_rects = []
                try:
                    drawings = page.get_drawings()
                    for drawing in drawings:
                        if 'rect' in drawing:
                            drawing_rects.append(drawing['rect'])
                except Exception:
                    pass
                
                # Combine all rectangles
                all_rects = text_rects + image_rects + drawing_rects
                
                # Filter out invalid rectangles (outside page bounds or with invalid coordinates)
                page_rect = page.rect
                valid_rects = []
                for rect in all_rects:
                    if len(rect) >= 4:
                        try:
                            r = fitz.Rect(rect[:4])
                            # Check if rectangle is valid and within reasonable bounds
                            if (r.is_valid and 
                                r.x0 >= 0 and r.y0 >= 0 and 
                                r.x1 <= page_rect.width and r.y1 <= page_rect.height and
                                r.width > 0 and r.height > 0):
                                valid_rects.append(rect[:4])
                        except Exception:
                            continue
                
                all_rects = valid_rects
                
                
                if all_rects:
                    # Calculate union of all content rectangles
                    content_rect = fitz.Rect(all_rects[0])
                    for rect in all_rects[1:]:
                        content_rect |= fitz.Rect(rect)
                    
                    # More conservative padding strategy to preserve section flow
                    # Use asymmetric padding: less aggressive on sides, more generous on top/bottom
                    page_rect = page.rect
                    
                    # Calculate how much we can crop while preserving document flow
                    # Only crop if there's significant margin (at least 2 points on each side)
                    margin_threshold = 2.0
                    
                    # Check if margins are significant enough to warrant cropping
                    left_margin = content_rect.x0
                    right_margin = page_rect.width - content_rect.x1
                    top_margin = content_rect.y0
                    bottom_margin = page_rect.height - content_rect.y1
                    
                    # Only crop if margins are substantial
                    if (left_margin > margin_threshold or right_margin > margin_threshold or 
                        top_margin > margin_threshold or bottom_margin > margin_threshold):
                        
                        # Conservative padding: preserve more space for better flow
                        conservative_padding = max(padding, 20.0)  # At least 20 points padding
                        
                        # Asymmetric padding: less on sides, more on top/bottom for better section flow
                        content_rect = content_rect + [
                            -min(conservative_padding * 0.5, left_margin * 0.8),   # left: 50% of padding or 80% of margin
                            -min(conservative_padding, bottom_margin * 0.8),       # bottom: full padding or 80% of margin
                            min(conservative_padding * 0.5, right_margin * 0.8),   # right: 50% of padding or 80% of margin
                            min(conservative_padding, top_margin * 0.8)            # top: full padding or 80% of margin
                        ]
                        
                        # Ensure the crop box is within page bounds
                        content_rect.intersect(page_rect)
                        
                        # Apply crop if there's any reduction in size
                        if (content_rect.width < page_rect.width or 
                            content_rect.height < page_rect.height):
                            page.set_cropbox(content_rect)
                            cropped_pages += 1
                else:
                    # No content found, skip this page
                    continue
            
            if cropped_pages == 0:
                doc.close()
                return "No content found to crop on any pages."
            
            # Generate output filename
            output_path = generate_output_filename(pdf_path, "auto_cropped")
            
            # Save the modified PDF
            doc.save(output_path)
            doc.close()
            
            page_info = f"page {page_number + 1}" if page_number is not None else f"{cropped_pages} pages"
            return f"Successfully auto-cropped {page_info}. Output saved to: {output_path}"
            
        except Exception as e:
            return f"Error auto-cropping PDF: {str(e)}"
  • Initialization of the FastMCP server instance. All tools including pdf_auto_crop_page are registered via @mcp.tool() decorators on their handler functions.
    mcp = FastMCP("pdf-manipulation")
  • Utility to generate timestamped output filename used by pdf_auto_crop_page to save results without overwriting input.
    def generate_output_filename(input_path: str, suffix: str = "modified") -> str:
        """Generate a new filename with timestamp to avoid overwriting originals."""
        path = Path(input_path)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        return str(path.parent / f"{path.stem}_{suffix}_{timestamp}{path.suffix}")
  • Validates if input file is a valid PDF by attempting to open it with PyMuPDF; used early in handler.
    def validate_pdf_file(pdf_path: str) -> bool:
        """Validate that the file is a valid PDF."""
        try:
            doc = fitz.open(pdf_path)
            doc.close()
            return True
        except Exception:
            return False
  • Checks if specified page number is valid for the document; used when page_number param provided.
    def validate_page_number(doc: fitz.Document, page_num: int) -> bool:
        """Validate that the page number exists in the document."""
        return 0 <= page_num < len(doc)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/andr3medeiros/pdf-manipulation-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server