Skip to main content
Glama
loader.py1.93 kB
import os import tempfile from urllib.parse import urlparse import httpx import fitz # PyMuPDF class PDFLoader: def __init__(self): pass async def load(self, source: str) -> fitz.Document: """ Load a PDF from a local path or a URL. Args: source: Local file path or URL string. Returns: fitz.Document: Opened PDF document object. Raises: ValueError: If source is invalid or file not found. httpx.HTTPError: If URL download fails. """ if self._is_url(source): return await self._load_from_url(source) else: return self._load_from_local(source) def _is_url(self, source: str) -> bool: try: result = urlparse(source) return all([result.scheme, result.netloc]) except ValueError: return False def _load_from_local(self, path: str) -> fitz.Document: if not os.path.exists(path): raise FileNotFoundError(f"PDF file not found at: {path}") try: return fitz.open(path) except Exception as e: raise ValueError(f"Failed to open local PDF: {str(e)}") async def _load_from_url(self, url: str) -> fitz.Document: async with httpx.AsyncClient() as client: response = await client.get(url, follow_redirects=True) response.raise_for_status() # Open PDF from memory stream try: # stream=response.content allows opening directly from bytes # filetype="pdf" hint is sometimes useful but fitz usually auto-detects doc = fitz.open(stream=response.content, filetype="pdf") return doc except Exception as e: raise ValueError(f"Failed to open PDF from URL: {str(e)}")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rexfelix/readPDF_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server