Skip to main content
Glama

Open Search MCP

by flyanima
MIT License
2
  • Apple
  • Linux
pubmed-search-client.ts14.4 kB
import axios, { AxiosInstance } from 'axios'; import { Logger } from '../../utils/logger.js'; /** * PubMed 搜索客户端 - 专注于医学论文搜索场景 * 支持论文搜索、作者搜索、主题搜索、论文详情获取 */ interface SearchOptions { retmax?: number; retstart?: number; sort?: 'relevance' | 'pub_date' | 'author'; mindate?: string; maxdate?: string; } interface AuthorOptions extends SearchOptions { exactMatch?: boolean; } interface MeSHOptions extends SearchOptions { majorTopic?: boolean; } interface PubmedPaper { pmid: string; title: string; abstract: string; authors: string[]; journal: string; publicationDate: string; doi?: string; meshTerms: string[]; articleType: string; language: string; } interface SearchResult { query: string; papers: PubmedPaper[]; totalResults: number; retstart: number; retmax: number; } export class PubmedSearchClient { private httpClient: AxiosInstance; private logger: Logger; private requestCount = 0; private lastRequestTime = 0; private readonly REQUEST_DELAY = 1000; // 1秒延迟,遵循NCBI建议 // PubMed主要医学主题分类 private readonly MESH_CATEGORIES = { 'A': 'Anatomy', 'B': 'Organisms', 'C': 'Diseases', 'D': 'Chemicals and Drugs', 'E': 'Analytical, Diagnostic and Therapeutic Techniques', 'F': 'Psychiatry and Psychology', 'G': 'Phenomena and Processes', 'H': 'Disciplines and Occupations', 'I': 'Anthropology, Education, Sociology, and Social Phenomena', 'J': 'Technology, Industry, and Agriculture', 'K': 'Humanities', 'L': 'Information Science', 'M': 'Named Groups', 'N': 'Health Care', 'V': 'Publication Characteristics', 'Z': 'Geographicals' }; constructor() { this.logger = new Logger('PubmedSearch'); this.httpClient = axios.create({ baseURL: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils', timeout: 30000, headers: { 'User-Agent': 'Open-Search-MCP/2.0 (https://github.com/open-search-mcp)' } }); } /** * 通用API请求方法(带速率限制) */ private async makeRequest(endpoint: string, params: Record<string, any> = {}): Promise<any> { // 实施速率限制 const now = Date.now(); const timeSinceLastRequest = now - this.lastRequestTime; if (timeSinceLastRequest < this.REQUEST_DELAY) { const waitTime = this.REQUEST_DELAY - timeSinceLastRequest; this.logger.info(`Rate limiting: waiting ${waitTime}ms`); await new Promise(resolve => setTimeout(resolve, waitTime)); } this.requestCount++; this.lastRequestTime = Date.now(); try { const response = await this.httpClient.get(endpoint, { params }); return response.data; } catch (error) { this.logger.error('PubMed API request failed:', error); throw error; } } /** * 解析PubMed XML响应 */ private async parsePubmedXML(xmlData: string): Promise<any> { try { // 动态导入xml2js以处理ES模块 const { parseStringPromise } = await import('xml2js'); const result = await parseStringPromise(xmlData, { explicitArray: false, mergeAttrs: true, trim: true, ignoreAttrs: false }); return result; } catch (error) { this.logger.error('XML parsing failed:', error); throw new Error('Failed to parse PubMed response'); } } /** * 格式化论文数据 */ private formatPaper(article: any): PubmedPaper { const pmid = article.MedlineCitation?.PMID?._ || article.MedlineCitation?.PMID || ''; // 提取标题 const title = article.MedlineCitation?.Article?.ArticleTitle || ''; // 提取摘要 let abstract = ''; const abstractText = article.MedlineCitation?.Article?.Abstract?.AbstractText; if (abstractText) { if (Array.isArray(abstractText)) { abstract = abstractText.map(text => typeof text === 'object' ? text._ || text : text).join(' '); } else { abstract = typeof abstractText === 'object' ? abstractText._ || abstractText : abstractText; } } // 提取作者 let authors: string[] = []; const authorList = article.MedlineCitation?.Article?.AuthorList?.Author; if (authorList) { const authorsArray = Array.isArray(authorList) ? authorList : [authorList]; authors = authorsArray.map((author: any) => { const lastName = author.LastName || ''; const foreName = author.ForeName || author.Initials || ''; return `${foreName} ${lastName}`.trim(); }).filter(name => name); } // 提取期刊信息 const journal = article.MedlineCitation?.Article?.Journal?.Title || article.MedlineCitation?.Article?.Journal?.ISOAbbreviation || ''; // 提取发表日期 let publicationDate = ''; const pubDate = article.MedlineCitation?.Article?.Journal?.JournalIssue?.PubDate; if (pubDate) { const year = pubDate.Year || ''; const month = pubDate.Month || ''; const day = pubDate.Day || ''; publicationDate = `${year}-${month}-${day}`.replace(/--+/g, '-').replace(/^-|-$/g, ''); } // 提取DOI let doi = ''; const articleIds = article.PubmedData?.ArticleIdList?.ArticleId; if (articleIds) { const idsArray = Array.isArray(articleIds) ? articleIds : [articleIds]; const doiId = idsArray.find((id: any) => id.IdType === 'doi'); doi = doiId?._ || doiId || ''; } // 提取MeSH术语 let meshTerms: string[] = []; const meshHeadingList = article.MedlineCitation?.MeshHeadingList?.MeshHeading; if (meshHeadingList) { const meshArray = Array.isArray(meshHeadingList) ? meshHeadingList : [meshHeadingList]; meshTerms = meshArray.map((mesh: any) => { const descriptor = mesh.DescriptorName; return typeof descriptor === 'object' ? descriptor._ || descriptor : descriptor; }).filter(term => term); } // 提取文章类型 const publicationTypes = article.MedlineCitation?.Article?.PublicationTypeList?.PublicationType; let articleType = 'Research Article'; if (publicationTypes) { const typesArray = Array.isArray(publicationTypes) ? publicationTypes : [publicationTypes]; articleType = typesArray[0]?._ || typesArray[0] || 'Research Article'; } // 提取语言 const language = article.MedlineCitation?.Article?.Language || 'eng'; return { pmid, title, abstract, authors, journal, publicationDate, doi, meshTerms, articleType, language }; } /** * 搜索PubMed论文 */ async searchPapers(query: string, options: SearchOptions = {}): Promise<SearchResult> { const retmax = Math.min(options.retmax || 10, 100); // 限制最大结果数 const retstart = options.retstart || 0; this.logger.info(`Searching PubMed: ${query}`); // 第一步:使用esearch获取PMID列表 const searchParams = { db: 'pubmed', term: query, retmax, retstart, retmode: 'xml', sort: options.sort || 'relevance' }; // 添加日期过滤 if (options.mindate) (searchParams as any).mindate = options.mindate; if (options.maxdate) (searchParams as any).maxdate = options.maxdate; const searchXml = await this.makeRequest('/esearch.fcgi', searchParams); const searchResult = await this.parsePubmedXML(searchXml); if (!searchResult.eSearchResult) { throw new Error('Invalid PubMed search response format'); } const totalResults = parseInt(searchResult.eSearchResult.Count) || 0; const pmids = searchResult.eSearchResult.IdList?.Id || []; const pmidArray = Array.isArray(pmids) ? pmids : (pmids ? [pmids] : []); if (pmidArray.length === 0) { return { query, papers: [], totalResults, retstart, retmax }; } // 第二步:使用efetch获取详细信息 const fetchParams = { db: 'pubmed', id: pmidArray.join(','), retmode: 'xml' }; const fetchXml = await this.makeRequest('/efetch.fcgi', fetchParams); const fetchResult = await this.parsePubmedXML(fetchXml); if (!fetchResult.PubmedArticleSet) { throw new Error('Invalid PubMed fetch response format'); } const articles = fetchResult.PubmedArticleSet.PubmedArticle || []; const articlesArray = Array.isArray(articles) ? articles : [articles]; const papers = articlesArray.map((article: any) => this.formatPaper(article)); return { query, papers, totalResults, retstart, retmax }; } /** * 根据PMID获取论文详情 */ async getPaperDetails(pmid: string): Promise<PubmedPaper> { this.logger.info(`Getting paper details: ${pmid}`); // 清理PMID格式 const cleanPmid = pmid.replace(/^(PMID:|pmid:)/i, ''); const params = { db: 'pubmed', id: cleanPmid, retmode: 'xml' }; const xmlData = await this.makeRequest('/efetch.fcgi', params); const parsed = await this.parsePubmedXML(xmlData); if (!parsed.PubmedArticleSet || !parsed.PubmedArticleSet.PubmedArticle) { throw new Error(`Paper not found: ${pmid}`); } const article = Array.isArray(parsed.PubmedArticleSet.PubmedArticle) ? parsed.PubmedArticleSet.PubmedArticle[0] : parsed.PubmedArticleSet.PubmedArticle; return this.formatPaper(article); } /** * 按作者搜索论文 */ async searchByAuthor(author: string, options: AuthorOptions = {}): Promise<SearchResult> { this.logger.info(`Searching papers by author: ${author}`); // 构建作者查询 const authorQuery = options.exactMatch ? `"${author}"[Author]` : `${author}[Author]`; return await this.searchPapers(authorQuery, options); } /** * 按MeSH术语搜索论文 */ async searchByMeSH(meshTerm: string, options: MeSHOptions = {}): Promise<SearchResult> { this.logger.info(`Searching papers by MeSH term: ${meshTerm}`); // 构建MeSH查询 const meshQuery = options.majorTopic ? `${meshTerm}[MeSH Major Topic]` : `${meshTerm}[MeSH Terms]`; return await this.searchPapers(meshQuery, options); } /** * 智能搜索 - 根据查询自动选择最佳方法 */ async smartSearch(query: string, options: any = {}): Promise<any> { this.logger.info(`Smart search: ${query}`); const intent = this.analyzeSearchIntent(query); switch (intent.type) { case 'pmid': const paperDetails = await this.getPaperDetails(intent.pmid!); return { type: 'paper_details', query, result: paperDetails }; case 'author': const authorResult = await this.searchByAuthor(intent.author!, options); return { type: 'author_search', query, result: authorResult }; case 'mesh': const meshResult = await this.searchByMeSH(intent.meshTerm!, options); return { type: 'mesh_search', query, result: meshResult }; default: // 'general' const searchResult = await this.searchPapers(query, options); return { type: 'general_search', query, result: searchResult }; } } /** * 分析搜索意图 */ private analyzeSearchIntent(query: string): any { const normalizedQuery = query.toLowerCase().trim(); // PMID查询 const pmidPattern = /(?:pmid:)?(\d{8,})/i; const pmidMatch = query.match(pmidPattern); if (pmidMatch) { return { type: 'pmid', pmid: pmidMatch[1] }; } // 作者查询 if (normalizedQuery.includes('papers by') || normalizedQuery.includes('author:') || normalizedQuery.includes('by ') || normalizedQuery.includes('researcher')) { const author = this.extractAuthor(query); if (author) { return { type: 'author', author }; } } // MeSH术语查询 if (normalizedQuery.includes('mesh:') || this.isMeSHTerm(normalizedQuery)) { const meshTerm = this.extractMeSHTerm(query); if (meshTerm) { return { type: 'mesh', meshTerm }; } } // 默认:一般搜索 return { type: 'general' }; } /** * 从查询中提取作者名 */ private extractAuthor(query: string): string | undefined { const authorWords = ['papers by', 'author:', 'by ', 'researcher', 'author']; let author = query; authorWords.forEach(word => { author = author.replace(new RegExp(word, 'gi'), '').trim(); }); return author || undefined; } /** * 判断是否为MeSH术语 */ private isMeSHTerm(query: string): boolean { // 简单的MeSH术语检测 const meshKeywords = [ 'disease', 'treatment', 'therapy', 'diagnosis', 'syndrome', 'disorder', 'cancer', 'tumor', 'infection', 'virus', 'bacteria', 'drug', 'medicine' ]; return meshKeywords.some(keyword => query.includes(keyword)); } /** * 从查询中提取MeSH术语 */ private extractMeSHTerm(query: string): string | undefined { const meshMatch = query.match(/mesh:\s*(.+)/i); return meshMatch ? meshMatch[1].trim() : query; } /** * 获取支持的MeSH分类 */ getSupportedMeSHCategories(): Record<string, string> { return { ...this.MESH_CATEGORIES }; } /** * 获取使用统计 */ getUsageStats(): any { return { requestsUsed: this.requestCount, rateLimits: '1 request per second (recommended)', features: ['search', 'paper_details', 'author_search', 'mesh_search'], supportedCategories: Object.keys(this.MESH_CATEGORIES).length, lastRequestTime: this.lastRequestTime }; } /** * 验证PMID格式 */ validatePMID(pmid: string): boolean { const pmidPattern = /^\d{8,}$/; const cleanPmid = pmid.replace(/^(PMID:|pmid:)/i, ''); return pmidPattern.test(cleanPmid); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/flyanima/open-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server