Skip to main content
Glama

pdf-tools-mcp

metadata.py1.69 kB
import fitz import logging from pathlib import Path from config import DATA_DIR, uuid4_pdf_re logger = logging.getLogger(__name__) async def get_metadata(file_name: str): """ Extract metadata and content from all pages of a PDF file. """ if not uuid4_pdf_re.match(file_name): logger.error("Input file must be in the 'data' folder and have a .pdf extension.") return False file_path = Path(DATA_DIR, file_name) # Initialize result dictionary result = { "metadata": {}, "pages": [], } try: # Open the PDF file doc = fitz.open(file_path) # Extract metadata metadata = doc.metadata if metadata is None: logger.error("No metadata found in the PDF.") return False result["metadata"] = metadata # Get additional document info result["metadata"]["page_count"] = len(doc) # Extract content from each page for page_num, page in enumerate(doc): # Get page dimensions rect = page.rect # Store page information page_info = { "page_number": page_num + 1, "width": rect.width, "height": rect.height, "rotation": page.rotation, } result["pages"].append(page_info) # Close the document doc.close() return result except Exception as e: logger.error(f"Error processing PDF file: {str(e)}") return { "success": False, "error": str(e) }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/danielkennedy1/pdf-tools-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server