Skip to main content
Glama
BioRxivSearcher.ts7.44 kB
/** * bioRxiv API集成模块 * 支持bioRxiv和medRxiv预印本论文搜索 */ import axios from 'axios'; import * as fs from 'fs'; import * as path from 'path'; import { Paper, PaperFactory } from '../models/Paper.js'; import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js'; import { TIMEOUTS, USER_AGENT } from '../config/constants.js'; import { logDebug } from '../utils/Logger.js'; interface BioRxivSearchOptions extends SearchOptions { /** 搜索天数范围 */ days?: number; /** 服务器类型 */ server?: 'biorxiv' | 'medrxiv'; } interface BioRxivResponse { messages: Array<{ status: string; count: number; }>; collection: BioRxivPaper[]; } interface BioRxivPaper { doi: string; title: string; authors: string; author_corresponding: string; author_corresponding_institution: string; date: string; version: string; type: string; license: string; category: string; jatsxml: string; abstract: string; published?: string; server: string; } export class BioRxivSearcher extends PaperSource { private readonly serverType: 'biorxiv' | 'medrxiv'; constructor(serverType: 'biorxiv' | 'medrxiv' = 'biorxiv') { super(serverType, `https://api.biorxiv.org/details/${serverType}`); this.serverType = serverType; } getCapabilities(): PlatformCapabilities { return { search: true, download: true, fullText: true, citations: false, requiresApiKey: false, supportedOptions: ['maxResults', 'days', 'category'] }; } /** * 搜索bioRxiv/medRxiv论文 */ async search(query: string, options: BioRxivSearchOptions = {}): Promise<Paper[]> { try { // 计算日期范围 const days = options.days || 30; const endDate = new Date().toISOString().split('T')[0]; const startDate = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString().split('T')[0]; // 构建搜索URL const searchUrl = `${this.baseUrl}/${startDate}/${endDate}`; const params: Record<string, any> = { cursor: 0 }; // 添加分类过滤 if (query && query !== '*') { // 将查询转换为分类格式 const category = query.toLowerCase().replace(/\s+/g, '_'); params.category = category; } logDebug(`${this.serverType} API Request: GET ${searchUrl}`); logDebug(`${this.serverType} Request params:`, params); const response = await axios.get(searchUrl, { params, timeout: TIMEOUTS.DEFAULT, headers: { 'User-Agent': USER_AGENT } }); logDebug(`${this.serverType} API Response: ${response.status} ${response.statusText}`); const papers = this.parseSearchResponse(response.data, query, options); logDebug(`${this.serverType} Parsed ${papers.length} papers`); return papers.slice(0, options.maxResults || 10); } catch (error: any) { logDebug(`${this.serverType} Search Error:`, error.message); this.handleHttpError(error, 'search'); } } /** * 下载PDF文件 */ async downloadPdf(paperId: string, options: DownloadOptions = {}): Promise<string> { try { const savePath = options.savePath || './downloads'; // 构建PDF URL const pdfUrl = `https://www.${this.serverType}.org/content/${paperId}v1.full.pdf`; // 确保保存目录存在 if (!fs.existsSync(savePath)) { fs.mkdirSync(savePath, { recursive: true }); } const filename = `${paperId.replace(/\//g, '_')}.pdf`; const filePath = path.join(savePath, filename); // 检查文件是否已存在 if (fs.existsSync(filePath) && !options.overwrite) { return filePath; } const response = await axios.get(pdfUrl, { responseType: 'stream', timeout: TIMEOUTS.DOWNLOAD, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' } }); const writer = fs.createWriteStream(filePath); response.data.pipe(writer); return new Promise((resolve, reject) => { writer.on('finish', () => resolve(filePath)); writer.on('error', reject); }); } catch (error) { this.handleHttpError(error, 'download PDF'); } } /** * 读取论文全文内容 */ async readPaper(paperId: string, options: DownloadOptions = {}): Promise<string> { try { const savePath = options.savePath || './downloads'; const filePath = path.join(savePath, `${paperId.replace(/\//g, '_')}.pdf`); // 如果PDF不存在,先下载 if (!fs.existsSync(filePath)) { await this.downloadPdf(paperId, options); } return `PDF file downloaded at: ${filePath}. Full text extraction requires additional PDF parsing implementation.`; } catch (error) { this.handleHttpError(error, 'read paper'); } } /** * 解析搜索响应 */ private parseSearchResponse(data: BioRxivResponse, query: string, options: BioRxivSearchOptions): Paper[] { if (!data.collection || !Array.isArray(data.collection)) { return []; } // 如果有查询词,进行文本匹配过滤 let filteredCollection = data.collection; if (query && query !== '*' && query.trim()) { const queryLower = query.toLowerCase(); filteredCollection = data.collection.filter(item => item.title.toLowerCase().includes(queryLower) || item.abstract.toLowerCase().includes(queryLower) || item.authors.toLowerCase().includes(queryLower) || item.category.toLowerCase().includes(queryLower) ); } return filteredCollection.map(item => this.parseBioRxivPaper(item)) .filter(paper => paper !== null) as Paper[]; } /** * 解析单个bioRxiv论文 */ private parseBioRxivPaper(item: BioRxivPaper): Paper | null { try { // 解析作者 const authors = item.authors.split(';').map(author => author.trim()); // 解析日期 const publishedDate = this.parseDate(item.date); const year = publishedDate?.getFullYear(); // 构建URL const paperUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}`; const pdfUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}.full.pdf`; return PaperFactory.create({ paperId: item.doi, title: this.cleanText(item.title), authors: authors, abstract: this.cleanText(item.abstract), doi: item.doi, publishedDate: publishedDate, pdfUrl: pdfUrl, url: paperUrl, source: this.serverType, categories: [item.category], keywords: [], citationCount: 0, year: year, extra: { version: item.version, type: item.type, license: item.license, server: item.server, corresponding_author: item.author_corresponding, corresponding_institution: item.author_corresponding_institution } }); } catch (error) { logDebug(`Error parsing ${this.serverType} paper:`, error); return null; } } } /** * medRxiv搜索器 - 继承自BioRxivSearcher */ export class MedRxivSearcher extends BioRxivSearcher { constructor() { super('medrxiv'); } }

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Dianel555/paper-search-mcp-nodejs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server