Skip to main content
Glama
response-formatter.ts16 kB
/** * Token-efficient response formatter for NCBI API responses * Provides context-aware, compact formatting to optimize LLM token usage */ export interface FormattingOptions { maxTokens?: number; intendedUse?: "search" | "analysis" | "citation" | "full"; includeMetadata?: boolean; compactMode?: boolean; } export interface SearchStructuredResult { count: number; retmax: number; retstart: number; idlist: string[]; queryTranslation?: string; webEnv?: string; queryKey?: string; warnings?: string[]; suggestions?: string[]; } export class ResponseFormatter { private static readonly DEFAULT_MAX_TOKENS = 500; private static readonly COMPACT_MAX_TOKENS = 200; /** * Format ESearch results as structured JSON (Code Mode friendly) * Returns structured data suitable for programmatic use */ static formatSearchStructured(data: any): SearchStructuredResult { if (!data?.esearchresult) { return { count: 0, retmax: 0, retstart: 0, idlist: [], warnings: ["Invalid search result format"], }; } const result = data.esearchresult; const count = Number(result.count ?? 0); const retmax = Number(result.retmax ?? 0); const retstart = Number(result.retstart ?? 0); const idlist = Array.isArray(result.idlist) ? result.idlist : []; const structured: SearchStructuredResult = { count, retmax, retstart, idlist, }; // Add optional fields if present if (result.querytranslation) { structured.queryTranslation = result.querytranslation; } if (result.webenv) { structured.webEnv = result.webenv; } if (result.querykey) { structured.queryKey = result.querykey; } // Add warnings based on result characteristics const warnings: string[] = []; if (count === 0) { warnings.push("No results found; try broader keywords or MeSH terms"); } else if (count > 10000) { warnings.push("Large result set; consider adding filters or narrowing the query"); } if (warnings.length > 0) { structured.warnings = warnings; } // Add suggestions for next steps const suggestions: string[] = []; if (count > 0) { suggestions.push("Use summary operation for article metadata"); suggestions.push("Use fetch operation with rettype='abstract' for full abstracts"); if (retmax < count) { suggestions.push(`Increase retmax (currently ${retmax}) to retrieve more results`); } } if (suggestions.length > 0) { structured.suggestions = suggestions; } return structured; } /** * Format ESummary responses efficiently based on intended use and database */ static formatESummary( data: any, options: FormattingOptions = {}, database?: string, ): string { const { intendedUse = "analysis", maxTokens = ResponseFormatter.DEFAULT_MAX_TOKENS, compactMode = false, } = options; if (typeof data === "string") return data; if (!data?.result) return JSON.stringify(data); const results = Object.keys(data.result) .filter((key) => key !== "uids") .map((uid) => data.result[uid]) .filter((item) => item && typeof item === "object"); if (results.length === 0) return "No results found"; // Route to database-specific formatters switch (database) { case "gene": return ResponseFormatter.formatGeneSummary( results, intendedUse, compactMode, ); case "protein": return ResponseFormatter.formatProteinSummary( results, intendedUse, compactMode, ); case "nucleotide": case "nuccore": return ResponseFormatter.formatNucleotideSummary( results, intendedUse, compactMode, ); case "pubmed": default: // Default to PubMed formatting switch (intendedUse) { case "search": return ResponseFormatter.formatSearchSummary(results, compactMode); case "citation": return ResponseFormatter.formatCitationSummary( results, compactMode, ); case "analysis": return ResponseFormatter.formatAnalysisSummary(results, maxTokens); default: return ResponseFormatter.formatCompactSummary(results, maxTokens); } } } /** * Format search results for quick scanning */ private static formatSearchSummary(results: any[], compact: boolean): string { return results .map((item, idx) => { const authors = ResponseFormatter.getAuthors(item, compact ? 1 : 3); const title = item.title || "No title"; const journal = item.source || item.fulljournalname || "Unknown journal"; const year = ResponseFormatter.extractYear( item.pubdate || item.sortpubdate, ); if (compact) { return `${idx + 1}. ${authors} (${year}). ${ResponseFormatter.truncateText(title, 60)}. ${journal}`; } return `**${item.uid}**: ${authors} (${year})\n📄 ${title}\n📚 ${journal}\n`; }) .join(compact ? "\n" : "\n"); } /** * Format for citation purposes */ private static formatCitationSummary( results: any[], compact: boolean, ): string { return results .map((item) => { const authors = ResponseFormatter.getAuthors(item, compact ? 2 : 5); const title = item.title || "No title"; const journal = item.source || item.fulljournalname || "Unknown journal"; const year = ResponseFormatter.extractYear( item.pubdate || item.sortpubdate, ); const volume = item.volume; const pages = item.pages; const pmid = item.uid; let citation = `${authors} ${title} ${journal}. ${year}`; if (volume) citation += `;${volume}`; if (pages) citation += `:${pages}`; citation += `. PMID: ${pmid}`; return citation; }) .join("\n\n"); } /** * Format for analysis with key metadata */ private static formatAnalysisSummary( results: any[], maxTokens: number, ): string { const estimatedTokensPerItem = Math.floor(maxTokens / results.length); return results .map((item) => { const authors = ResponseFormatter.getAuthors(item, 3); const title = item.title || "No title"; const journal = item.source || item.fulljournalname || "Unknown journal"; const year = ResponseFormatter.extractYear( item.pubdate || item.sortpubdate, ); const pmid = item.uid; // Include abstract if available and space allows const hasAbstract = item.attributes?.includes("Has Abstract"); const pubTypes = Array.isArray(item.pubtype) ? item.pubtype.join(", ") : item.pubtype || ""; let summary = `**PMID ${pmid}** (${year})\n${ResponseFormatter.truncateText(title, 100)}\n${authors} | ${journal}`; if (estimatedTokensPerItem > 50) { if (pubTypes) summary += `\n📑 ${pubTypes}`; if (hasAbstract) summary += "\n✅ Has Abstract"; } return summary; }) .join("\n\n"); } /** * Ultra-compact format for large result sets */ private static formatCompactSummary( results: any[], maxTokens: number, ): string { const tokensPerItem = Math.min(50, Math.floor(maxTokens / results.length)); return results .map((item, idx) => { const firstAuthor = item.authors?.[0]?.name || item.sortfirstauthor || "Unknown"; const year = ResponseFormatter.extractYear( item.pubdate || item.sortpubdate, ); const title = ResponseFormatter.truncateText( item.title || "No title", tokensPerItem > 30 ? 80 : 40, ); return `${idx + 1}. ${firstAuthor} (${year}): ${title} [PMID: ${item.uid}]`; }) .join("\n"); } /** * Format EFetch results based on return type */ static formatEFetch( data: any, rettype?: string, options: FormattingOptions = {}, ): string { const { intendedUse = "analysis", compactMode = false } = options; if (typeof data === "string") { // Handle XML/text formats if (rettype === "abstract" || data.includes("<Abstract>")) { return ResponseFormatter.formatAbstractXML(data, compactMode); } if (rettype === "fasta" || data.startsWith(">")) { return ResponseFormatter.formatFasta(data, compactMode); } // Clean up XML by removing redundant tags and formatting if (data.includes("<") && data.includes(">")) { return ResponseFormatter.cleanXmlResponse(data, compactMode); } // Truncate very long raw responses return data.length > 2000 && compactMode ? data.substring(0, 2000) + "\n\n[...truncated...]" : data; } return JSON.stringify(data); } /** * Extract and format abstract from XML */ private static formatAbstractXML(xml: string, compact: boolean): string { try { // Extract title const titleMatch = xml.match(/<ArticleTitle>(.*?)<\/ArticleTitle>/s); const title = titleMatch?.[1]?.replace(/<[^>]*>/g, "") || "No title"; // Extract abstract sections const abstractMatch = xml.match(/<Abstract>(.*?)<\/Abstract>/s); if (!abstractMatch) return xml; const abstractContent = abstractMatch[1]; const sections = abstractContent.match( /<AbstractText[^>]*Label="([^"]*)"[^>]*>(.*?)<\/AbstractText>/gs, ) || []; if (sections.length === 0) { // No labeled sections, get plain text const plainText = abstractContent.replace(/<[^>]*>/g, "").trim(); return compact ? `${ResponseFormatter.truncateText(title, 80)}\n\n${ResponseFormatter.truncateText(plainText, 300)}` : `**${title}**\n\n${plainText}`; } // Format labeled sections const formattedSections = sections.map((section) => { const labelMatch = section.match(/Label="([^"]*)"/); const label = labelMatch?.[1] || ""; const content = section.replace(/<[^>]*>/g, "").trim(); return compact ? `**${label}**: ${ResponseFormatter.truncateText(content, 150)}` : `**${label.toUpperCase()}**: ${content}`; }); const titleSection = compact ? ResponseFormatter.truncateText(title, 60) : `**${title}**`; return `${titleSection}\n\n${formattedSections.join("\n\n")}`; } catch (error) { return xml; } } /** * Format FASTA sequences */ private static formatFasta(fasta: string, compact: boolean): string { const sequences = fasta .split("\n>") .map((seq) => (seq.startsWith(">") ? seq : ">" + seq)); if (compact && sequences.length > 1) { return sequences .map((seq, idx) => { const lines = seq.split("\n"); const header = lines[0]; const seqLength = lines.slice(1).join("").length; return `${idx + 1}. ${header} (${seqLength} bp/aa)`; }) .join("\n"); } return fasta; } /** * Helper methods */ private static getAuthors(item: any, maxCount: number): string { if (!item.authors || !Array.isArray(item.authors)) { return item.sortfirstauthor || "Unknown authors"; } const authors = item.authors .slice(0, maxCount) .map((a: any) => a.name || a); if (item.authors.length > maxCount) { authors.push("et al."); } return authors.join(", "); } private static extractYear(dateStr: string): string { if (!dateStr) return "Unknown"; const yearMatch = dateStr.match(/(\d{4})/); return yearMatch?.[1] || dateStr.substring(0, 4) || "Unknown"; } private static truncateText(text: string, maxLength: number): string { if (!text || text.length <= maxLength) return text; return text.substring(0, maxLength - 3) + "..."; } /** * Format gene database summaries */ private static formatGeneSummary( results: any[], intendedUse: string, compact: boolean, ): string { return results .map((item, idx) => { const geneId = item.uid; const name = item.name || "Unknown gene"; const description = item.description || "No description available"; const organism = item.organism || "Unknown organism"; const chromosome = item.chromosome || "Unknown"; const maplocation = item.maplocation || "Unknown location"; const geneType = item.genetype || "Unknown type"; if (compact) { return `${idx + 1}. **${name}** (ID: ${geneId}) - ${ResponseFormatter.truncateText(description, 80)} [${organism}]`; } return `**Gene ID ${geneId}** - ${name} 📋 ${description} 🧬 **Organism**: ${organism} 🧭 **Location**: Chromosome ${chromosome}, ${maplocation} 🔬 **Type**: ${geneType}`; }) .join(compact ? "\n" : "\n\n"); } /** * Format protein database summaries */ private static formatProteinSummary( results: any[], intendedUse: string, compact: boolean, ): string { return results .map((item, idx) => { const proteinId = item.uid; const title = item.title || "Unknown protein"; const organism = item.organism || "Unknown organism"; const length = item.slen || "Unknown"; const accession = item.caption || item.accessionversion || proteinId; if (compact) { return `${idx + 1}. **${accession}** - ${ResponseFormatter.truncateText(title, 60)} [${organism}]`; } return `**Protein ID ${proteinId}** - ${accession} 📋 ${title} 🧬 **Organism**: ${organism} 📏 **Length**: ${length} amino acids`; }) .join(compact ? "\n" : "\n\n"); } /** * Format nucleotide database summaries */ private static formatNucleotideSummary( results: any[], intendedUse: string, compact: boolean, ): string { return results .map((item, idx) => { const seqId = item.uid; const title = item.title || "Unknown sequence"; const organism = item.organism || "Unknown organism"; const length = item.slen || "Unknown"; const accession = item.caption || item.accessionversion || seqId; const moltype = item.moltype || "Unknown"; if (compact) { return `${idx + 1}. **${accession}** - ${ResponseFormatter.truncateText(title, 60)} [${organism}]`; } return `**Sequence ID ${seqId}** - ${accession} 📋 ${title} 🧬 **Organism**: ${organism} 🧪 **Molecule Type**: ${moltype} 📏 **Length**: ${length} nucleotides`; }) .join(compact ? "\n" : "\n\n"); } /** * Clean XML responses by removing redundant tags and improving readability */ private static cleanXmlResponse(xml: string, compact: boolean): string { let cleaned = xml; // Remove XML declaration and DOCTYPE cleaned = cleaned.replace(/<\?xml[^>]*\?>/g, ""); cleaned = cleaned.replace(/<!DOCTYPE[^>]*>/g, ""); // Remove common redundant wrapper tags but preserve content const redundantWrappers = [ "PubmedArticleSet", "PubmedArticle", "MedlineCitation", "Article", "AuthorList", "KeywordList", "PublicationTypeList", "ArticleIdList", "History", ]; redundantWrappers.forEach((tag) => { const regex = new RegExp(`<${tag}[^>]*>|<\/${tag}>`, "gi"); cleaned = cleaned.replace(regex, ""); }); // Convert important tags to readable format const tagReplacements = [ { from: /<ArticleTitle>/gi, to: "\n**TITLE**: " }, { from: /<\/ArticleTitle>/gi, to: "" }, { from: /<AbstractText[^>]*Label="([^"]*)"[^>]*>/gi, to: "\n**$1**: " }, { from: /<AbstractText[^>]*>/gi, to: "\n**ABSTRACT**: " }, { from: /<\/AbstractText>/gi, to: "" }, { from: /<Keyword[^>]*>/gi, to: "• " }, { from: /<\/Keyword>/gi, to: "\n" }, { from: /<PMID[^>]*>/gi, to: "\n**PMID**: " }, { from: /<\/PMID>/gi, to: "" }, { from: /<LastName>/gi, to: "" }, { from: /<\/LastName>/gi, to: ", " }, { from: /<ForeName>/gi, to: "" }, { from: /<\/ForeName>/gi, to: " " }, { from: /<Initials>/gi, to: "" }, { from: /<\/Initials>/gi, to: "" }, ]; tagReplacements.forEach(({ from, to }) => { cleaned = cleaned.replace(from, to); }); // Remove remaining XML tags if in compact mode if (compact) { cleaned = cleaned.replace(/<[^>]*>/g, " "); } // Clean up whitespace cleaned = cleaned .replace(/\n\s*\n\s*\n/g, "\n\n") // Remove excessive line breaks .replace(/\s+/g, " ") // Normalize spaces .replace(/\n\s+/g, "\n") // Remove leading spaces after newlines .trim(); return cleaned; } /** * Estimate token count (rough approximation) */ static estimateTokens(text: string): number { // Rough estimate: ~4 characters per token return Math.ceil(text.length / 4); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/QuentinCody/entrez-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server