Skip to main content
Glama
paper.py2.79 kB
# apaper/models/paper.py from dataclasses import dataclass from datetime import datetime from ..utils.pdf_reader import read_pdf @dataclass class Paper: """Standardized paper format with core fields for academic sources""" # Core fields (required, but allows empty values or defaults) paper_id: str # Unique identifier (e.g., arXiv ID, PMID, DOI) title: str # Paper title authors: list[str] # List of author names abstract: str # Abstract text doi: str # Digital Object Identifier published_date: datetime # Publication date pdf_url: str # Direct PDF link url: str # URL to paper page source: str # Source platform (e.g., 'arxiv', 'pubmed') # Optional fields updated_date: datetime | None = None # Last updated date categories: list[str] | None = None # Subject categories keywords: list[str] | None = None # Keywords citations: int = 0 # Citation count references: list[str] | None = None # List of reference IDs/DOIs extra: dict | None = None # Source-specific extra metadata def __post_init__(self): """Post-initialization to handle default values""" if self.authors is None: self.authors = [] if self.categories is None: self.categories = [] if self.keywords is None: self.keywords = [] if self.references is None: self.references = [] if self.extra is None: self.extra = {} def to_dict(self) -> dict: """Convert paper to dictionary format for serialization""" return { "paper_id": self.paper_id, "title": self.title, "authors": self.authors, "abstract": self.abstract, "doi": self.doi, "published_date": ( self.published_date.isoformat() if self.published_date else None ), "pdf_url": self.pdf_url, "url": self.url, "source": self.source, "updated_date": ( self.updated_date.isoformat() if self.updated_date else None ), "categories": self.categories, "keywords": self.keywords, "citations": self.citations, "references": self.references, "extra": self.extra, } def read_content(self) -> str: """ Read the full text content of this paper's PDF. Returns: str: Extracted text content from the paper's PDF Raises: ValueError: If no PDF URL is available Exception: If PDF cannot be read or processed """ if not self.pdf_url: raise ValueError("No PDF URL available for this paper") return read_pdf(self.pdf_url)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jiahaoxiang2000/all-in-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server