Unpaywall MCP Server

Overview Schema Related Servers Score Discussions

unpaywall-mcp
src

index.ts

index.ts•13.7 KiB

import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListToolsRequestSchema, } from "@modelcontextprotocol/sdk/types.js"; import { createRequire } from "module"; import type pdfParseType from "pdf-parse"; const require = createRequire(import.meta.url); const pdfParse: typeof pdfParseType = require("pdf-parse"); // Tool name constants const TOOL_GET_BY_DOI = "unpaywall_get_by_doi" as const; const TOOL_SEARCH_TITLES = "unpaywall_search_titles" as const; const TOOL_GET_FULLTEXT_LINKS = "unpaywall_get_fulltext_links" as const; const TOOL_FETCH_PDF_TEXT = "unpaywall_fetch_pdf_text" as const; type GetByDoiArgs = { doi: string; email?: string; // optional override; otherwise uses UNPAYWALL_EMAIL env var }; type SearchTitlesArgs = { query: string; is_oa?: boolean; page?: number; // 1-based page index per Unpaywall docs (50 results per page) email?: string; // optional override }; type FetchPdfTextArgs = { doi?: string; // if provided, we will resolve best OA PDF via Unpaywall pdf_url?: string; // optional direct PDF URL (takes precedence if provided) email?: string; // required if using DOI truncate_chars?: number; // optional truncation to avoid massive outputs (default 20000) }; function normalizeDoi(input: string): string { let doi = input.trim(); // Strip common DOI URL prefixes doi = doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, ""); // Strip leading 'doi:' prefix doi = doi.replace(/^doi:/i, ""); return doi.trim(); } async function fetchUnpaywallByDoi(doi: string, email: string) { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 20_000); try { const url = `https://api.unpaywall.org/v2/${encodeURIComponent(doi)}?email=${encodeURIComponent(email)}`; const resp = await fetch(url, { signal: controller.signal, headers: { "Accept": "application/json" } }); if (!resp.ok) { const text = await resp.text().catch(() => ""); throw new Error(`Unpaywall HTTP ${resp.status}: ${text.slice(0, 400)}`); } return await resp.json(); } finally { clearTimeout(timeout); } } async function downloadPdfAsBuffer(url: string, maxBytes = 30 * 1024 * 1024) { // Limit to 30MB by default to avoid extremely large downloads const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 30_000); try { const resp = await fetch(url, { signal: controller.signal, headers: { "Accept": "application/pdf, application/octet-stream;q=0.9,*/*;q=0.8", }, redirect: "follow", }); if (!resp.ok) { const text = await resp.text().catch(() => ""); throw new Error(`PDF download HTTP ${resp.status}: ${text.slice(0, 400)}`); } const reader = resp.body?.getReader(); if (!reader) return Buffer.from(await resp.arrayBuffer()); const chunks: Uint8Array[] = []; let received = 0; while (true) { const { done, value } = await reader.read(); if (done) break; if (value) { received += value.byteLength; if (received > maxBytes) throw new Error(`PDF exceeds size limit of ${maxBytes} bytes`); chunks.push(value); } } return Buffer.concat(chunks); } finally { clearTimeout(timeout); } } async function searchUnpaywallTitles(args: { query: string; email: string; is_oa?: boolean; page?: number }) { const { query, email, is_oa, page } = args; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 20_000); try { const params = new URLSearchParams(); params.set("query", query); if (typeof is_oa === "boolean") params.set("is_oa", String(is_oa)); if (page && Number.isFinite(page) && page > 1) params.set("page", String(Math.floor(page))); params.set("email", email); const url = `https://api.unpaywall.org/v2/search?${params.toString()}`; const resp = await fetch(url, { signal: controller.signal, headers: { "Accept": "application/json" } }); if (!resp.ok) { const text = await resp.text().catch(() => ""); throw new Error(`Unpaywall search HTTP ${resp.status}: ${text.slice(0, 400)}`); } return await resp.json(); } finally { clearTimeout(timeout); } } async function main() { const server = new Server( { name: "unpaywall-mcp", version: "0.1.1", }, { capabilities: { tools: {}, }, } ); server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: TOOL_GET_BY_DOI, description: "Fetch Unpaywall metadata for a DOI (accepts DOI, DOI URL, or 'doi:' prefix). Requires an email address via env UNPAYWALL_EMAIL or the optional 'email' argument.", inputSchema: { type: "object", properties: { doi: { type: "string", description: "DOI string or DOI URL, e.g. 10.1038/nphys1170 or https://doi.org/10.1038/nphys1170" }, email: { type: "string", description: "Email to identify your requests to Unpaywall (optional override)" }, }, required: ["doi"], additionalProperties: false, }, }, { name: TOOL_SEARCH_TITLES, description: "Search Unpaywall for article titles matching a query. Supports optional is_oa filter and pagination (50 results per page).", inputSchema: { type: "object", properties: { query: { type: "string", description: "Title search query (supports phrase, boolean operators per Unpaywall docs)" }, is_oa: { type: "boolean", description: "If true, only return OA results; if false, only closed; omit for all" }, page: { type: "integer", minimum: 1, description: "Page number (50 results per page)" }, email: { type: "string", description: "Email to identify your requests to Unpaywall (optional override)" }, }, required: ["query"], additionalProperties: false, }, }, { name: TOOL_GET_FULLTEXT_LINKS, description: "Given a DOI, return best open-access links (best PDF URL and open URL) plus Unpaywall locations metadata.", inputSchema: { type: "object", properties: { doi: { type: "string", description: "DOI string or DOI URL" }, email: { type: "string", description: "Email to identify your requests to Unpaywall (optional override)" }, }, required: ["doi"], additionalProperties: false, }, }, { name: TOOL_FETCH_PDF_TEXT, description: "Download and extract text from best OA PDF for a DOI, or from a provided PDF URL.", inputSchema: { type: "object", properties: { doi: { type: "string", description: "DOI string or DOI URL. Used if pdf_url is not provided." }, pdf_url: { type: "string", description: "Direct PDF URL to download and parse (takes precedence over DOI)." }, email: { type: "string", description: "Email to identify requests to Unpaywall (required when resolving via DOI)." }, truncate_chars: { type: "integer", minimum: 1000, description: "Max characters of extracted text to return (default 20000)." }, }, required: [], additionalProperties: false, }, }, ], }; }); server.setRequestHandler(CallToolRequestSchema, async (req) => { const tool = req.params.name; try { if (tool === TOOL_GET_BY_DOI) { const args = (req.params.arguments ?? {}) as Partial<GetByDoiArgs>; const rawDoi = (args.doi ?? "").toString().trim(); if (!rawDoi) { return { content: [ { type: "text", text: "Missing required argument: 'doi'" }, ], isError: true, }; } const email = (args.email || process.env.UNPAYWALL_EMAIL || "").toString().trim(); if (!email) { return { content: [ { type: "text", text: "Unpaywall requires an email. Set UNPAYWALL_EMAIL env var for the server or pass 'email' in the tool arguments.", }, ], isError: true, }; } const doi = normalizeDoi(rawDoi); const data = await fetchUnpaywallByDoi(doi, email); return { content: [{ type: "json", json: data }], }; } if (tool === TOOL_SEARCH_TITLES) { const args = (req.params.arguments ?? {}) as Partial<SearchTitlesArgs>; const query = (args.query ?? "").toString().trim(); if (!query) { return { content: [{ type: "text", text: "Missing required argument: 'query'" }], isError: true }; } const email = (args.email || process.env.UNPAYWALL_EMAIL || "").toString().trim(); if (!email) { return { content: [{ type: "text", text: "Unpaywall requires an email. Set UNPAYWALL_EMAIL or pass 'email'." }], isError: true }; } const page = args.page && Number.isFinite(args.page) ? Math.max(1, Math.floor(Number(args.page))) : undefined; const is_oa = typeof args.is_oa === "boolean" ? args.is_oa : undefined; const data = await searchUnpaywallTitles({ query, email, is_oa, page }); return { content: [{ type: "json", json: data }] }; } if (tool === TOOL_GET_FULLTEXT_LINKS) { const args = (req.params.arguments ?? {}) as Partial<GetByDoiArgs>; const rawDoi = (args.doi ?? "").toString().trim(); if (!rawDoi) { return { content: [{ type: "text", text: "Missing required argument: 'doi'" }], isError: true }; } const email = (args.email || process.env.UNPAYWALL_EMAIL || "").toString().trim(); if (!email) { return { content: [{ type: "text", text: "Unpaywall requires an email. Set UNPAYWALL_EMAIL or pass 'email'." }], isError: true }; } const doi = normalizeDoi(rawDoi); const obj = await fetchUnpaywallByDoi(doi, email); const best = obj?.best_oa_location ?? null; const locations: any[] = Array.isArray(obj?.oa_locations) ? obj.oa_locations : []; const pickPdfFrom = (locs: any[]) => locs.find(l => l?.url_for_pdf) || locs.find(l => l?.url); const bestPdfUrl = best?.url_for_pdf || best?.url || (pickPdfFrom(locations)?.url_for_pdf || pickPdfFrom(locations)?.url) || null; const bestOpenUrl = best?.url || (locations.find(l => l?.url)?.url) || null; const result = { doi: obj?.doi ?? doi, title: obj?.title ?? null, is_oa: obj?.is_oa ?? null, oa_status: obj?.oa_status ?? null, best_pdf_url: bestPdfUrl, best_open_url: bestOpenUrl, best_oa_location: best, oa_locations: locations, }; return { content: [{ type: "json", json: result }] }; } if (tool === TOOL_FETCH_PDF_TEXT) { const args = (req.params.arguments ?? {}) as Partial<FetchPdfTextArgs>; const truncate = args.truncate_chars && Number.isFinite(args.truncate_chars) ? Math.max(1000, Math.floor(Number(args.truncate_chars))) : 20000; let pdfUrl = (args.pdf_url ?? "").toString().trim(); if (!pdfUrl) { const rawDoi = (args.doi ?? "").toString().trim(); if (!rawDoi) { return { content: [{ type: "text", text: "Provide either 'pdf_url' or 'doi'" }], isError: true }; } const email = (args.email || process.env.UNPAYWALL_EMAIL || "").toString().trim(); if (!email) { return { content: [{ type: "text", text: "Unpaywall requires an email. Set UNPAYWALL_EMAIL or pass 'email'." }], isError: true }; } const doi = normalizeDoi(rawDoi); const obj = await fetchUnpaywallByDoi(doi, email); const best = obj?.best_oa_location ?? null; const locations: any[] = Array.isArray(obj?.oa_locations) ? obj.oa_locations : []; const pickPdfFrom = (locs: any[]) => locs.find(l => l?.url_for_pdf) || locs.find(l => l?.url); pdfUrl = best?.url_for_pdf || (pickPdfFrom(locations)?.url_for_pdf || pickPdfFrom(locations)?.url) || ""; if (!pdfUrl) { return { content: [{ type: "text", text: "No OA PDF URL found for the provided DOI." }], isError: true }; } } // Download and parse PDF const pdfBuffer = await downloadPdfAsBuffer(pdfUrl); const parsed = await pdfParse(pdfBuffer); const text = parsed.text || ""; const truncated = text.length > truncate; const output = { pdf_url: pdfUrl, length_chars: text.length, truncated, text: truncated ? text.slice(0, truncate) : text, metadata: { n_pages: parsed.numpages ?? undefined, info: parsed.info ?? undefined, metadata: parsed.metadata ?? undefined, }, }; return { content: [{ type: "json", json: output }] }; } return { content: [ { type: "text", text: `Unknown tool: ${tool}` }, ], isError: true, }; } catch (err: any) { return { content: [ { type: "text", text: `Error calling ${tool}: ${err?.message || String(err)}`, }, ], isError: true, }; } }); const transport = new StdioServerTransport(); await server.connect(transport); } main().catch((err) => { console.error("Fatal error in Unpaywall MCP server:", err); process.exit(1); });

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ElliotPadfield/unpaywall-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•13.7 KiB