Skip to main content
Glama

Code Execution Server

pdf_read_api.py1.81 kB
import requests import fitz import io import traceback def sync_read_pdf(url: str) -> str: try: headers = { "Accept": "application/pdf", "User-Agent": "Mozilla/5.0" } if "arxiv.org/abs/" in url: paper_id = url.split('/')[-1] url = f"https://arxiv.org/pdf/{paper_id}.pdf" response = None try: response = requests.get(url, headers=headers, stream=True, timeout=15) response.raise_for_status() except Exception as e: print(f"\033[91mrequest failed: {e},{traceback.format_exc()}\033[0m") return "Failed to read the PDF" if response is None or response.content is None or len(response.content) == 0: print("Content is None or empty") return "Failed to get the PDF content" try: pdf_bytes = io.BytesIO(response.content) doc = fitz.open(stream=pdf_bytes, filetype="pdf") text = "" for i, page in enumerate(doc): text += page.get_text() return text.strip() except Exception as e: print(f"\033[91mRead failed {e},{traceback.format_exc()}\033[0m") return "Failed to read the PDF" except Exception as e: print(f"\033[91mRead failed: {e},{traceback.format_exc()}\033[0m") return "Failed to read the PDF" import asyncio async def read_pdf_from_url(url: str) -> str: import functools loop = asyncio.get_running_loop() return await loop.run_in_executor(None, functools.partial(sync_read_pdf, url)) async def main(): url = "https://arxiv.org/pdf/2305.14342" content = await read_pdf_from_url(url) print(content) if __name__ == "__main__": asyncio.run(main())

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sjtu-sai-agents/mcp_sandbox'

If you have feedback or need assistance with the MCP directory API, please join our Discord server