search_dois
Search over 125 million research DOIs using full-text queries and filters by resource type, funder, year, repository, and more.
Instructions
Search DataCite's index of 125M+ research DOIs. Supports full-text queries and filters by resource type, funder, year, repository, and more.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | No | ||
| resource_type | No | ||
| funder_ror_id | No | ||
| affiliation_ror_id | No | ||
| client_id | No | ||
| provider_id | No | ||
| prefix | No | ||
| published_year | No | ||
| sort | No | ||
| page_size | No | ||
| page_cursor | No |
Implementation Reference
- src/tools/search-dois.ts:83-163 (handler)The registerTool function registers the 'search_dois' MCP tool. The handler (lines 88-161) parses Zod-validated input, builds DataCite API query parameters with Elasticsearch-style field filters, calls the DataCite /dois endpoint with caching, extracts cursor-based pagination, formats results via formatDoiSummary, and returns the response as JSON.
export function registerTool(server: McpServer): void { server.tool( "search_dois", "Search DataCite's index of 125M+ research DOIs. Supports full-text queries and filters by resource type, funder, year, repository, and more.", SearchSchema.shape, async (params) => { const input = SearchSchema.parse(params); const apiParams: Record<string, string | number | boolean> = { "page[size]": input.page_size, detail: true, }; // Build Elasticsearch query clauses. Field-path filters are appended here // rather than as dedicated API params because: // - types.resourceTypeGeneral:{value} is verified to work; resource-type-id // requires lowercase-hyphenated values (computationalnotebook → broken). // - funderIdentifier / affiliationIdentifier require full https://ror.org/ URIs; // the dedicated funder-id / affiliation-id params have the same requirement // but the query-path form is verified correct against the live index. const queryClauses: string[] = []; if (input.query) queryClauses.push(`(${input.query})`); if (input.resource_type) queryClauses.push(`types.resourceTypeGeneral:${input.resource_type}`); if (input.funder_ror_id) queryClauses.push(`fundingReferences.funderIdentifier:"${normalizeRorId(input.funder_ror_id)}"`); if (input.affiliation_ror_id) queryClauses.push(`creators.affiliation.affiliationIdentifier:"${normalizeRorId(input.affiliation_ror_id)}"`); if (input.published_year) queryClauses.push(`publicationYear:${input.published_year}`); if (queryClauses.length) apiParams["query"] = queryClauses.join(" AND "); if (input.client_id) apiParams["client-id"] = input.client_id; if (input.provider_id) apiParams["provider-id"] = input.provider_id; if (input.prefix) apiParams["prefix"] = input.prefix; if (input.sort && input.sort !== "relevance") apiParams["sort"] = input.sort; if (input.page_cursor) apiParams["page[cursor]"] = input.page_cursor; const cacheKey = JSON.stringify( Object.entries(apiParams).sort(([a], [b]) => a.localeCompare(b)) ); try { const response = await getCached<SearchResponse>( searchCache, cacheKey, () => dataciteClient.get<SearchResponse>("/dois", apiParams) ); // Extract next cursor from links.next URL let next_cursor: string | null = null; if (response.links?.next) { try { const nextUrl = new URL(response.links.next); next_cursor = nextUrl.searchParams.get("page[cursor]"); } catch { // ignore parse errors } } const results = (response.data ?? []).map(formatDoiSummary); return { content: [ { type: "text" as const, text: JSON.stringify( { results, total_results: response.meta?.total ?? results.length, next_cursor, }, null, 2 ), }, ], }; } catch (err) { const msg = err instanceof Error ? err.message : String(err); throw apiError(msg); } } ); } - src/tools/search-dois.ts:9-68 (schema)SearchSchema defines the input schema using Zod: optional query string, resource_type enum (29 values), funder_ror_id, affiliation_ror_id, client_id, provider_id, prefix, published_year, sort enum, page_size (default 10, max 100), and page_cursor.
const SearchSchema = z.object({ query: z.string().optional(), resource_type: z .enum([ "Audiovisual", "Book", "BookChapter", "Collection", "ComputationalNotebook", "ConferencePaper", "ConferenceProceeding", "DataPaper", "Dataset", "Dissertation", "Event", "Image", "Instrument", "InteractiveResource", "Journal", "JournalArticle", "Model", "OutputManagementPlan", "PeerReview", "PhysicalObject", "Preprint", "Report", "Service", "Software", "Sound", "Standard", "StudyRegistration", "Text", "Workflow", "Other", ]) .optional(), funder_ror_id: z.string().optional(), affiliation_ror_id: z.string().optional(), client_id: z.string().optional(), provider_id: z.string().optional(), prefix: z.string().optional(), published_year: z.number().int().optional(), sort: z .enum([ "relevance", "created", "-created", "updated", "-updated", "published", "-published", "citation-count", "-citation-count", "view-count", "-view-count", ]) .optional(), page_size: z.number().int().min(1).max(100).default(10), page_cursor: z.string().optional(), }); - src/tools/index.ts:12-13 (registration)registerSearchDois(server) is called inside registerAllTools() to register the tool on the MCP server instance.
export function registerAllTools(server: McpServer): void { registerSearchDois(server); - src/tools/search-dois.ts:75-81 (helper)normalizeRorId normalizes ROR identifiers (bare ID, ror.org/..., http/https variants) to the canonical https://ror.org/... form, used by the handler for funder and affiliation filters.
function normalizeRorId(raw: string): string { const s = raw.trim(); if (s.startsWith("https://ror.org/")) return s; if (s.startsWith("http://ror.org/")) return `https://ror.org/${s.slice("http://ror.org/".length)}`; if (s.startsWith("ror.org/")) return `https://${s}`; return `https://ror.org/${s}`; } - src/utils/formatters.ts:43-62 (helper)formatDoiSummary is called by the handler to transform each DataCite DoiRecord into a condensed response object with doi, title, creators, year, resource_type, publisher, abstract_snippet, and counts.
export function formatDoiSummary(record: DoiRecord): object { const a = record.attributes; const title = a.titles?.[0]?.title ?? "(no title)"; const creators = (a.creators ?? []).slice(0, 3).map(formatCreator); const firstDesc = a.descriptions?.[0]?.description ?? ""; const abstract_snippet = firstDesc.length > 300 ? firstDesc.slice(0, 300) + "…" : firstDesc; return { doi: a.doi ?? record.id, title, creators, year: a.publicationYear, resource_type: a.types?.resourceTypeGeneral ?? a.resourceTypeGeneral, publisher: a.publisher, abstract_snippet: abstract_snippet || undefined, view_count: a.viewCount, download_count: a.downloadCount, citation_count: a.citationCount, }; }