Zotero Chunk RAG

Overview Schema Related Servers Score Discussions

zotero-chunk-mcp
src
zotero_chunk_rag

openalex_client.py•5.48 KiB

"""OpenAlex API client for citation data.""" import logging import time from dataclasses import dataclass import httpx logger = logging.getLogger(__name__) OPENALEX_API = "https://api.openalex.org" @dataclass class CitationData: """Citation information for a paper.""" openalex_id: str doi: str | None cited_by_count: int references: list[str] class OpenAlexClient: """Client for OpenAlex API. Rate limits: - Anonymous: 1 request/second - Polite pool (with email): 10 requests/second """ def __init__(self, email: str | None = None): """Initialize client. Args: email: Optional email for polite pool (faster rate limits). Set via config.openalex_email or OPENALEX_EMAIL env var. """ self.headers = {} if email: self.headers["User-Agent"] = f"mailto:{email}" self._rate_limit_delay = 0.1 # 10 req/sec else: self._rate_limit_delay = 1.0 # 1 req/sec self._last_request = 0.0 def _rate_limit(self): """Enforce rate limiting.""" elapsed = time.time() - self._last_request if elapsed < self._rate_limit_delay: time.sleep(self._rate_limit_delay - elapsed) self._last_request = time.time() def get_work_by_doi(self, doi: str) -> CitationData | None: """Get citation data for a DOI. Args: doi: The DOI to look up (with or without https://doi.org/ prefix) Returns: CitationData if found, None otherwise """ self._rate_limit() # Normalize DOI - remove common prefixes if doi.startswith("https://doi.org/"): doi = doi[16:] elif doi.startswith("http://doi.org/"): doi = doi[15:] try: url = f"{OPENALEX_API}/works/doi:{doi}" resp = httpx.get(url, headers=self.headers, timeout=10.0) if resp.status_code == 404: return None resp.raise_for_status() data = resp.json() return CitationData( openalex_id=data["id"], doi=doi, cited_by_count=data.get("cited_by_count", 0), references=[ref for ref in data.get("referenced_works", []) if ref], ) except Exception as e: logger.warning(f"OpenAlex lookup failed for {doi}: {e}") return None def get_citing_works(self, openalex_id: str, limit: int = 100) -> list[dict]: """Get works that cite a given paper. Args: openalex_id: The OpenAlex ID of the paper limit: Maximum number of citing works to return Returns: List of work dictionaries with metadata """ self._rate_limit() try: url = f"{OPENALEX_API}/works" params = {"filter": f"cites:{openalex_id}", "per-page": min(limit, 200)} resp = httpx.get(url, params=params, headers=self.headers, timeout=10.0) resp.raise_for_status() return resp.json().get("results", []) except Exception as e: logger.warning(f"Failed to get citing works: {e}") return [] def get_references(self, openalex_id: str, limit: int = 100) -> list[dict]: """Get works that a paper references (its bibliography). Args: openalex_id: The OpenAlex ID of the paper limit: Maximum number of references to return Returns: List of work dictionaries with metadata """ self._rate_limit() # OpenAlex stores references as a list of OpenAlex IDs # We need to fetch those works try: # First get the work to get its references url = f"{OPENALEX_API}/works/{openalex_id}" resp = httpx.get(url, headers=self.headers, timeout=10.0) resp.raise_for_status() work = resp.json() referenced_works = work.get("referenced_works", []) if not referenced_works: return [] # Fetch details for the referenced works (up to limit) referenced_works = referenced_works[:limit] self._rate_limit() # Use filter to get multiple works at once ids_filter = "|".join(referenced_works) url = f"{OPENALEX_API}/works" params = {"filter": f"openalex_id:{ids_filter}", "per-page": min(limit, 200)} resp = httpx.get(url, params=params, headers=self.headers, timeout=10.0) resp.raise_for_status() return resp.json().get("results", []) except Exception as e: logger.warning(f"Failed to get references: {e}") return [] @staticmethod def format_work(work: dict) -> dict: """Format an OpenAlex work into a simpler structure. Args: work: Raw OpenAlex work dictionary Returns: Simplified work dictionary """ authors = [] for authorship in work.get("authorships", [])[:3]: author = authorship.get("author", {}) name = author.get("display_name", "") if name: authors.append(name) return { "title": work.get("title", ""), "authors": ", ".join(authors), "year": work.get("publication_year"), "doi": work.get("doi"), "cited_by_count": work.get("cited_by_count", 0), "openalex_id": work.get("id"), }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ccam80/zotero-chunk-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

openalex_client.py•5.48 KiB