NCBI Entrez MCP Server

index.ts•27 KiB

import { ensureZodSafeParseAsync } from "./lib/zod-compat.js"; ensureZodSafeParseAsync(); import { McpAgent } from "agents/mcp"; import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import JSZip from "jszip"; import { JsonToSqlDO } from "./do.js"; import { getParserFor } from "./lib/parsers.js"; import { ToolRegistry } from "./tools/index.js"; import type { ToolContext } from "./tools/index.js"; // Define our MCP agent for NCBI Entrez E-utilities export class EntrezMCP extends McpAgent implements ToolContext { server = new McpServer( { name: "Complete NCBI APIs MCP Server", version: "1.0.0", title: "NCBI Entrez & External APIs Gateway", }, { capabilities: { tools: { listChanged: true, }, }, }, ); baseUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; defaultEmail = "entrez-mcp-server@example.com"; defaultTool = "entrez-mcp-server"; // Instance-based environment storage instead of static private workerEnv: Env | undefined; // Proper Durable Object constructor that captures the environment constructor(ctx?: any, env?: Env) { super(ctx, env); if (env) { this.workerEnv = env; console.log("EntrezMCP constructor: environment bindings available:", { hasJsonToSqlDO: !!env.JSON_TO_SQL_DO, hasMcpObject: !!env.MCP_OBJECT, hasApiKey: !!env.NCBI_API_KEY, }); } else { console.log("EntrezMCP constructor: no environment provided"); } } // Optional Entrez API key - accessed from environment via method getApiKey(): string | undefined { // Access through instance environment const apiKey = this.workerEnv?.NCBI_API_KEY || EntrezMCP.currentEnv?.NCBI_API_KEY; // Don't return literal placeholder strings from environment if (apiKey && apiKey.startsWith("${") && apiKey.endsWith("}")) { return undefined; } return apiKey; } // Static property to hold the current environment during request processing (fallback) public static currentEnv: Env | undefined; // Method to set environment on this instance public setEnvironment(env: Env): void { this.workerEnv = env; } // Helper method to get environment (prefer instance, fallback to static) getEnvironment(): Env | undefined { return this.workerEnv || EntrezMCP.currentEnv; } // Helper method to validate database names isValidDatabase(db: string): boolean { // Valid databases list sourced from current EInfo endpoint (2025-06) const validDbs = [ "pubmed", "pmc", "protein", "nuccore", "ipg", "nucleotide", "structure", "genome", "annotinfo", "assembly", "bioproject", "biosample", "blastdbinfo", "books", "cdd", "clinvar", "gap", "gapplus", "grasp", "dbvar", "gene", "gds", "geoprofiles", "medgen", "mesh", "nlmcatalog", "omim", "orgtrack", "proteinclusters", "pcassay", "protfam", "pccompound", "pcsubstance", "seqannot", "snp", "sra", "taxonomy", "biocollections", "gtr", // Additional databases observed via EInfo but previously missing "pubmedhealth", "nucgss", "nucest", "biosystems", "unigene", "popset", "probe", ]; return validDbs.includes(db.toLowerCase()); } // Helper method to parse and validate response async parseResponse( response: Response, toolName: string, requestedRetmode?: string, ): Promise<string | any> { if (!response.ok) { const errorText = await response.text(); throw new Error( `${toolName} request failed: ${response.status} ${response.statusText}. Response: ${errorText}`, ); } // For BLAST results, check if the response is compressed if (toolName.includes("BLAST Get")) { const contentType = response.headers.get("content-type") || ""; const contentEncoding = response.headers.get("content-encoding") || ""; // If it's a ZIP file or compressed content, try to extract readable data if ( contentType.includes("application/zip") || contentType.includes("application/x-zip") || response.headers.get("content-disposition")?.includes(".zip") || contentEncoding.includes("gzip") || contentEncoding.includes("deflate") ) { const arrayBuffer = await response.arrayBuffer(); // Handle gzip/deflate first if (contentEncoding === "gzip" || contentEncoding === "deflate") { const decompressionStream = new DecompressionStream(contentEncoding); const decompressedStream = new Response( arrayBuffer, ).body!.pipeThrough(decompressionStream); return await new Response(decompressedStream).text(); } // Check for ZIP file signature ('PK') and handle it const firstBytes = new Uint8Array(arrayBuffer.slice(0, 4)); if (firstBytes[0] === 0x50 && firstBytes[1] === 0x4b) { // ZIP file signature try { const zip = await JSZip.loadAsync(arrayBuffer); const fileNames = Object.keys(zip.files); if (fileNames.length > 0) { // Find the primary XML file, often with an XInclude const primaryXmlFile = fileNames.find( (name) => name.endsWith(".xml") && !name.includes("_"), ); const primaryFile = primaryXmlFile ? zip.file(primaryXmlFile) : zip.file(fileNames[0]); if (primaryFile) { const primaryContent = await primaryFile.async("string"); // Check for XInclude and resolve it const includeMatch = primaryContent.match( /<xi:include\s+href="([^"]+)"/, ); if (includeMatch && includeMatch[1]) { const includedFileName = includeMatch[1]; const includedFile = zip.file(includedFileName); if (includedFile) { return await includedFile.async("string"); // Return the content of the included file } else { throw new Error( `XInclude file '${includedFileName}' not found in the BLAST archive.`, ); } } return primaryContent; // Return primary content if no include found } } throw new Error("ZIP archive from BLAST was empty."); } catch (zipError) { throw new Error( `Failed to decompress BLAST result archive: ${zipError instanceof Error ? zipError.message : String(zipError)}`, ); } } } } const data = await response.text(); // Skip error checking for BLAST and PMC tools as they have different response formats if ( toolName.includes("BLAST") || toolName.includes("PMC") || toolName.includes("PubChem") ) { return data; } // Try parsing as JSON for modern API responses // Check if JSON was explicitly requested or if content-type indicates JSON if ( requestedRetmode === "json" || toolName.includes("JSON") || response.headers.get("content-type")?.includes("application/json") ) { try { return JSON.parse(data); } catch { // Fall through to text processing } } // Check for common NCBI error patterns (only for E-utilities tools). Perform case-insensitive scan. const lowerData = data.toLowerCase(); if ( lowerData.includes("<e>") || lowerData.includes('"error"') || lowerData.includes("error") ) { // Capture NCBI error messages accurately const errorMatch = // Match XML error tags like <e> or <e> data.match(/<Error[^>]*>([\s\S]*?)<\/Error>/i) || data.match(/<ERROR[^>]*>([\s\S]*?)<\/ERROR>/i) || // Match JSON style "ERROR":"message" data.match(/"ERROR"\s*:\s*"([^"]*)"/i) || // Generic 'error' text in plain responses data.match(/error['":]?\s*([^"',}\n]*)/i); if (errorMatch) { throw new Error(`NCBI ${toolName} error: ${errorMatch[1]}`); } } // Apply XML optimization before returning return this.optimizeXmlResponse(data, toolName); } // Helper method to optimize XML responses by removing verbose elements private optimizeXmlResponse(data: string, toolName: string): string { if (typeof data !== "string" || !data.includes("<?xml")) { return data; } let optimized = data; // Remove DTD declarations (biggest token wasters) optimized = optimized.replace(/<!DOCTYPE[\s\S]*?>/g, ""); // Remove XML processing instructions optimized = optimized.replace(/<\?xml[\s\S]*?\?>/g, ""); // Remove empty elements optimized = optimized.replace(/<([^>/]+)>\s*<\/\1>/g, ""); // Remove XML comments optimized = optimized.replace(//g, ""); // Tool-specific optimizations optimized = this.applyToolSpecificOptimizations(optimized, toolName); // Compact whitespace optimized = optimized.replace(/>\s+</g, "><"); optimized = optimized.replace(/\s+/g, " "); return optimized.trim(); } // Apply tool-specific optimizations private applyToolSpecificOptimizations( data: string, toolName: string, ): string { switch (toolName) { case "EInfo": // Remove description tags but keep field names and types data = data.replace(/<Description>[\s\S]*?<\/Description>/g, ""); // Remove verbose menu names, keep short names data = data.replace(/<MenuName>[\s\S]*?<\/MenuName>/g, ""); // Remove term counts (usually empty anyway) data = data.replace(/<TermCount[^>]*\/>/g, ""); break; case "ESummary": // Remove Type attributes from Items data = data.replace(/\s*Type="[^"]*"/g, ""); // Remove redundant empty items data = data.replace(/<Item Name="[^"]*"><\/Item>/g, ""); break; case "ESearch": // Remove verbose translation sets if they exist data = data.replace(/<TranslationSet>[\s\S]*?<\/TranslationSet>/g, ""); break; } return data; } // Query suggestion engine for common patterns and improvements suggestQueryImprovements(query: string, database: string): string[] { const suggestions: string[] = []; const trimmed = query.trim().toLowerCase(); // Suggest field-specific searches for common terms if (database === "pubmed") { if (trimmed.includes("cancer") && !trimmed.includes("[")) { suggestions.push( "Try 'cancer[Title]' or 'cancer[MeSH]' for more precise results", ); } if (trimmed.includes("therapy") && !trimmed.includes("[")) { suggestions.push( "Consider 'therapy[Title/Abstract]' or 'therapeutic[MeSH]' for treatment-focused searches", ); } if (trimmed.match(/\b\d{4}\b/) && !trimmed.includes("[date]")) { suggestions.push( "Add '[Date]' after years for publication date searches (e.g., '2023[Date]')", ); } if (trimmed.includes(" and ") && !trimmed.includes("AND")) { suggestions.push( "Use uppercase 'AND' for Boolean operators instead of 'and'", ); } } // Suggest Boolean operator improvements if ( !trimmed.includes(" and ") && !trimmed.includes(" or ") && trimmed.split(" ").length > 3 ) { suggestions.push( "Consider using Boolean operators (AND, OR) to combine multiple terms effectively", ); } // Suggest date range searches if (trimmed.includes("recent") || trimmed.includes("latest")) { suggestions.push( "Use date fields for time-based searches: '2023[Date]:2024[Date]' or 'last 5 years[Date]'", ); } // Suggest author search improvements if (trimmed.match(/[a-z]+\s+[a-z]\b/i) && !trimmed.includes("[author]")) { suggestions.push( "For author searches, use format 'LastName FirstInitial[Author]' (e.g., 'Smith J[Author]')", ); } return suggestions; } // Enhanced query validation with helpful suggestions validateQuery( query: string, database: string, ): { valid: boolean; message?: string; suggestion?: string } { if (!query || query.trim() === "") { return { valid: false, message: "Query cannot be empty" }; } const trimmed = query.trim(); // Check for common syntax errors if (trimmed.includes("[") && !trimmed.includes("]")) { return { valid: false, message: "Unclosed field tag detected", suggestion: "Add closing bracket ']' to complete field specification (e.g., 'cancer[Title]')", }; } if (trimmed.includes("]") && !trimmed.includes("[")) { return { valid: false, message: "Closing bracket without opening bracket", suggestion: "Add opening bracket '[' before field name (e.g., 'cancer[Title]')", }; } // Check for potentially problematic quotes const singleQuotes = (trimmed.match(/'/g) || []).length; const doubleQuotes = (trimmed.match(/"/g) || []).length; if (singleQuotes % 2 !== 0 || doubleQuotes % 2 !== 0) { return { valid: false, message: "Unmatched quotes detected", suggestion: "Ensure all quotes are properly paired or remove them for simple searches", }; } // Check for overly complex Boolean operators const boolCount = (trimmed.match(/\b(AND|OR|NOT)\b/gi) || []).length; if (boolCount > 10) { return { valid: false, message: "Query too complex", suggestion: "Consider breaking into multiple simpler queries for better performance", }; } // Note: Field tag validation removed - NCBI E-utilities already validates // field tags and returns proper errors. Client-side validation was causing // false positives by rejecting valid PubMed abbreviations like [tiab], [dp], etc. // See: https://pubmed.ncbi.nlm.nih.gov/help/#search-tags return { valid: true }; } // Smart retmode selection based on use case getOptimalRetmode( tool: string, database: string, intendedUse?: string, ): string { // For staging operations, prefer structured formats if (intendedUse === "staging" || intendedUse === "analysis") { if (tool === "efetch" && database === "pubmed") return "xml"; if (tool === "einfo" || tool === "esummary") return "xml"; return "json"; // Default to JSON for other structured operations } // For sequence analysis, prefer appropriate formats if (intendedUse === "sequence_analysis") { if (database === "nuccore" || database === "protein") return "fasta"; return "gb"; // GenBank format for detailed sequence info } // For citation/bibliography, prefer structured formats if (intendedUse === "citation" || intendedUse === "bibliography") { return "xml"; } // Default to JSON for most operations (more compact than XML) return "json"; } // Determine if response should be staged due to size shouldStageResponse( data: string, toolName: string, ): { shouldStage: boolean; reason: string; estimatedTokens: number } { // Simple token estimation (roughly 4 chars = 1 token) const estimatedTokens = Math.ceil(data.length / 4); const tokenThreshold = 5000; // Stage responses likely to exceed 5k tokens // Always stage very large responses if (estimatedTokens > tokenThreshold) { return { shouldStage: true, reason: `Response too large (${estimatedTokens} estimated tokens > ${tokenThreshold} threshold)`, estimatedTokens, }; } // Stage responses with high structural complexity const complexityIndicators = [ (data.match(/<Field>/g) || []).length > 20, // EInfo with many fields (data.match(/<DocSum>/g) || []).length > 10, // ESummary with many summaries (data.match(/<Link>/g) || []).length > 50, // ELink with many links ]; if (complexityIndicators.some((indicator) => indicator)) { return { shouldStage: true, reason: "Response has high structural complexity, staging for efficient querying", estimatedTokens, }; } return { shouldStage: false, reason: "Response size manageable for direct return", estimatedTokens, }; } // Helper method to format response data (handles both strings and objects) formatResponseData(data: any): string { if (typeof data === "string") { return data; } else if (typeof data === "object" && data !== null) { // Check if this is an ESearch result with query translations that could be improved if ( data.esearchresult && (data.esearchresult.translationset || data.esearchresult.querytranslation) ) { return this.formatESearchResponse(data); } return JSON.stringify(data, null, 2); } else { return String(data); } } // Enhanced formatter for ESearch responses with cleaned up query translations private formatESearchResponse(data: any): string { const result = data.esearchresult; let output = ""; // Basic search info output += `Search Results Summary:\n`; output += `========================\n`; output += `Total Results: ${result.count || "0"}\n`; output += `Returned: ${result.retmax || "0"}\n`; output += `Starting at: ${result.retstart || "0"}\n\n`; // Clean up query translations if (result.translationset && result.translationset.length > 0) { output += `Query Interpretation:\n`; output += `====================\n`; for (const translation of result.translationset) { output += `Your search: "${translation.from}"\n`; output += `Expanded to include:\n`; const cleanedTerms = this.extractMeaningfulTerms(translation.to); for (const term of cleanedTerms) { output += ` • ${term}\n`; } output += "\n"; } } // If query translation exists but no translation set, show cleaned version if ( result.querytranslation && (!result.translationset || result.translationset.length === 0) ) { output += `Search Terms Used:\n`; output += `==================\n`; const cleanedTerms = this.extractMeaningfulTerms(result.querytranslation); for (const term of cleanedTerms) { output += ` • ${term}\n`; } output += "\n"; } // Article IDs if (result.idlist && result.idlist.length > 0) { output += `Article IDs Found:\n`; output += `==================\n`; output += result.idlist.join(", ") + "\n\n"; } // Add raw technical details for power users (collapsed) output += `Technical Details (Full Query):\n`; output += `===============================\n`; output += `${result.querytranslation || "No query translation available"}\n\n`; // Include any other fields that might be present const otherFields = Object.keys(result).filter( (key) => ![ "count", "retmax", "retstart", "idlist", "translationset", "querytranslation", ].includes(key), ); if (otherFields.length > 0) { output += `Additional Information:\n`; output += `======================\n`; for (const field of otherFields) { output += `${field}: ${JSON.stringify(result[field])}\n`; } } return output; } // Extract meaningful search terms from NCBI's verbose Boolean query private extractMeaningfulTerms(queryString: string): string[] { const terms = new Set<string>(); // Match patterns like "term"[Field] or just quoted terms const patterns = [ // MeSH terms: "diabetes mellitus"[MeSH Terms] /"([^"]+)"\[MeSH Terms\]/g, // Supplementary concepts: "covid-19 vaccines"[Supplementary Concept] /"([^"]+)"\[Supplementary Concept\]/g, // All fields (only include if not already captured): "meaningful term"[All Fields] /"([^"]+)"\[All Fields\]/g, // Other field types /"([^"]+)"\[Title\]/g, /"([^"]+)"\[Author\]/g, /"([^"]+)"\[Journal\]/g, ]; for (const pattern of patterns) { let match: RegExpExecArray | null; match = pattern.exec(queryString); while (match !== null) { const term = match[1].trim(); if (term && term.length > 2) { // Filter out very short terms // Add field type annotation for clarity if (pattern.source.includes("MeSH Terms")) { terms.add(`${term} (MeSH term)`); } else if (pattern.source.includes("Supplementary Concept")) { terms.add(`${term} (medical concept)`); } else if (pattern.source.includes("Title")) { terms.add(`${term} (in title)`); } else if (pattern.source.includes("Author")) { terms.add(`${term} (author name)`); } else if (pattern.source.includes("Journal")) { terms.add(`${term} (journal name)`); } else { // For All Fields, only add if it's not a duplicate of a more specific field const simplePattern = term .toLowerCase() .replace(/[^a-z0-9\s]/g, ""); const alreadyHasSpecific = Array.from(terms).some((existingTerm) => existingTerm.toLowerCase().includes(simplePattern), ); if (!alreadyHasSpecific) { terms.add(`${term} (anywhere in article)`); } } } match = pattern.exec(queryString); } } // If no structured terms found, try to extract basic quoted terms if (terms.size === 0) { const basicQuotedTerms = queryString.match(/"([^"]+)"/g); if (basicQuotedTerms) { basicQuotedTerms.forEach((quotedTerm) => { const term = quotedTerm.replace(/"/g, "").trim(); if (term.length > 2) { terms.add(term); } }); } } return Array.from(terms).sort(); } // Helper method to build URL with validation buildUrl(endpoint: string, params: URLSearchParams): string { // Remove empty parameters const cleanParams = new URLSearchParams(); params.forEach((value, key) => { if (value && value.trim() !== "") { cleanParams.append(key, value.trim()); } }); // Automatically attach API key if available const apiKey = this.getApiKey(); if (apiKey) { cleanParams.append("api_key", apiKey); } return `${this.baseUrl}${endpoint}?${cleanParams}`; } // Intelligent staging bypass logic shouldBypassStaging( entities: any[], diagnostics: any, payloadSize: number, ): { bypass: boolean; reason: string } { const entityCount = entities.length; // Count different entity types const entityTypes = new Set(entities.map((e) => e.type)); const entityTypeCount = entityTypes.size; // Estimate potential table count (entity types + junction tables) const estimatedTableCount = entityTypeCount + Math.max(0, entityTypeCount - 1); // Check various bypass conditions // 1. Very small payload (< 1KB) if (payloadSize < 1024) { return { bypass: true, reason: "Small dataset (< 1KB) returned directly - no staging overhead needed", }; } // 2. Very few entities (< 10 total) if (entityCount < 10) { return { bypass: true, reason: "Few entities extracted (< 10) - direct return more efficient than SQL staging", }; } // 3. Only 1-2 entity types (minimal relational benefit) if (entityTypeCount <= 2 && entityCount < 25) { return { bypass: true, reason: "Simple structure with few entity types - SQL staging provides minimal benefit", }; } // 4. Poor data quality / parsing failure if ( diagnostics.failed_extractions?.length > 0 || diagnostics.mesh_availability === "none" ) { if (entityCount < 15) { return { bypass: true, reason: "Limited data extraction success - returning parsed results directly", }; } } // 5. Single article with basic info only if (entityCount < 20 && entityTypeCount <= 3 && payloadSize < 2048) { return { bypass: true, reason: "Single article with basic information - staging unnecessary for simple data", }; } // If none of the bypass conditions are met, proceed with staging return { bypass: false, reason: "Dataset complexity justifies SQL staging for efficient querying", }; } async init() { // Register all tools using the new tool registry const toolRegistry = new ToolRegistry(this); toolRegistry.registerAll(); } } // Temporary alias for migration export class MyMCP extends EntrezMCP {} export default { async fetch( request: Request, env: Env, ctx: ExecutionContext, ): Promise<Response> { // Set the environment for the EntrezMCP class to access EntrezMCP.currentEnv = env; const url = new URL(request.url); const startTime = Date.now(); // Handle CORS preflight requests if (request.method === "OPTIONS") { return new Response(null, { headers: { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "GET, POST, OPTIONS", "Access-Control-Allow-Headers": "Content-Type, Accept, MCP-Protocol-Version, Mcp-Session-Id", "Access-Control-Max-Age": "86400", }, }); } try { // NEW: Streamable HTTP transport (MCP 2025-06-18 specification) if (url.pathname === "/mcp" || url.pathname.startsWith("/mcp/")) { const protocolVersion = request.headers.get("MCP-Protocol-Version"); const sessionId = request.headers.get("Mcp-Session-Id"); console.log("MCP Streamable HTTP request:", { method: request.method, pathname: url.pathname, protocolVersion, hasSessionId: !!sessionId, }); // Use EntrezMCP.serve() for Streamable HTTP transport const response = await EntrezMCP.serve("/mcp").fetch(request, env, ctx); // Add MCP protocol headers if provided in request or if this is a successful response if (response instanceof Response) { const headers = new Headers(response.headers); // Add CORS headers for browser compatibility headers.set("Access-Control-Allow-Origin", "*"); headers.set( "Access-Control-Expose-Headers", "MCP-Protocol-Version, mcp-session-id", ); // Echo back protocol version if provided if (protocolVersion) { headers.set("MCP-Protocol-Version", protocolVersion); } else { // Set current protocol version if not specified headers.set("MCP-Protocol-Version", "2025-11-25"); } // Generate session ID for new connections (if not provided and this is initialization) if (!sessionId && request.method === "POST") { try { const requestBody = await request.clone().text(); if (requestBody.includes('"method":"initialize"')) { const newSessionId = crypto.randomUUID(); headers.set("mcp-session-id", newSessionId); console.log( "Generated new session ID for initialization:", newSessionId, ); } } catch (e) { // Ignore errors when trying to read request body } } return new Response(response.body, { status: response.status, statusText: response.statusText, headers, }); } return response; } return new Response( `NCBI Entrez MCP Server ================================ A comprehensive Model Context Protocol server for NCBI APIs including: - E-utilities (PubMed, Gene, Protein, Nucleotide databases) - BLAST sequence analysis - PubChem compound/substance search - PMC full-text articles - Advanced data staging with SQL querying Available Endpoints: - /mcp (Streamable HTTP transport) Protocol Version: 2025-11-25 For usage instructions, connect with an MCP client.`, { status: 200, headers: { "Content-Type": "text/plain", "Access-Control-Allow-Origin": "*", }, }, ); } catch (error) { // Enhanced error logging with request context console.error("MCP Server Error:", { method: request.method, url: url.pathname, error: error instanceof Error ? error.message : String(error), duration: Date.now() - startTime, userAgent: request.headers.get("User-Agent"), }); return new Response( JSON.stringify({ error: "Internal server error", message: error instanceof Error ? error.message : String(error), timestamp: new Date().toISOString(), endpoint: url.pathname, }), { status: 500, headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*", }, }, ); } }, }; export { JsonToSqlDO }; // Temporary UserVault class to satisfy migration export class UserVault { constructor(state: any, env: any) { // Minimal implementation } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/QuentinCody/entrez-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•27 KiB