Zotero Chunk RAG

Overview Schema Related Servers Score Discussions

render.py•2.87 KiB

"""Rendering utilities -- render table regions from PDFs as PNG images.""" from __future__ import annotations from pathlib import Path import pymupdf from zotero_chunk_rag.models import ExtractedTable def render_table_image( pdf_path: Path | str, page_num: int, bbox: tuple[float, float, float, float], output_path: Path, *, dpi: int = 300, padding: int = 20, ) -> Path: """Render a table region from a PDF page as a PNG image. Parameters ---------- pdf_path: Path to the source PDF. page_num: 1-indexed page number (consistent with the rest of the codebase). bbox: ``(x0, y0, x1, y1)`` bounding box in PDF points. output_path: Destination path for the PNG file. dpi: Resolution for rendering (default 300). padding: PDF-point padding added around the bbox before rendering. The padded region is clipped to page bounds. Returns ------- Path The *output_path* (for convenience in chaining). """ pdf_path = Path(pdf_path) output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) doc = pymupdf.open(str(pdf_path)) try: page = doc[page_num - 1] # convert 1-indexed to 0-indexed page_rect = page.rect x0, y0, x1, y1 = bbox clip = pymupdf.Rect( max(x0 - padding, page_rect.x0), max(y0 - padding, page_rect.y0), min(x1 + padding, page_rect.x1), min(y1 + padding, page_rect.y1), ) pix = page.get_pixmap(clip=clip, dpi=dpi) pix.save(str(output_path)) finally: doc.close() return output_path def render_all_tables( pdf_path: Path | str, tables: list[ExtractedTable], output_dir: Path, *, dpi: int = 300, ) -> dict[str, Path]: """Render all non-artifact tables as PNG images. Parameters ---------- pdf_path: Path to the source PDF. tables: List of ``ExtractedTable`` objects (typically from extraction). output_dir: Directory where PNG files will be written. dpi: Resolution for rendering (default 300). Returns ------- dict[str, Path] Mapping of ``{caption_or_id: output_path}`` for each rendered table. """ output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) result: dict[str, Path] = {} for idx, t in enumerate(tables): if t.artifact_type is not None: continue if t.caption: key = t.caption else: key = f"table_p{t.page_num}_i{t.table_index}" safe_name = f"table_{idx:03d}.png" out = output_dir / safe_name render_table_image( pdf_path, t.page_num, t.bbox, out, dpi=dpi, ) result[key] = out return result

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

render.py•2.87 KiB