Skip to main content
Glama
h-lu
by h-lu
server.py32.8 kB
# paper_search_mcp/server.py """ MCP Server - 学术论文搜索服务 2025 最佳实践版本: - 使用工厂函数减少代码重复 - 统一的错误处理 - 日志记录替代 print() - 清晰的模块化结构 支持的学术平台: - arXiv: 预印本服务器 (物理、数学、计算机科学) - PubMed: 生物医学文献 - bioRxiv/medRxiv: 生物/医学预印本 - Semantic Scholar: AI 驱动的学术搜索 - CrossRef: DOI 引用数据库 - IACR: 密码学预印本 - Google Scholar: 学术搜索引擎 - RePEc/IDEAS: 经济学论文库 """ from typing import List, Dict, Optional, Any import logging import os from mcp.server.fastmcp import FastMCP from .academic_platforms.arxiv import ArxivSearcher from .academic_platforms.pubmed import PubMedSearcher from .academic_platforms.biorxiv import BioRxivSearcher from .academic_platforms.medrxiv import MedRxivSearcher from .academic_platforms.google_scholar import GoogleScholarSearcher from .academic_platforms.iacr import IACRSearcher from .academic_platforms.semantic import SemanticSearcher from .academic_platforms.crossref import CrossRefSearcher from .academic_platforms.repec import RePECSearcher from .academic_platforms.sci_hub import SciHubFetcher, check_paper_year from .paper import Paper # ============================================================ # 配置 # ============================================================ # PDF 下载目录,可通过环境变量 PAPER_DOWNLOAD_PATH 配置 # 默认为用户目录下的 paper_downloads (跨平台兼容) # - macOS: ~/paper_downloads # - Linux: ~/paper_downloads # - Windows: C:\Users\<username>\paper_downloads from pathlib import Path def get_download_path() -> str: """获取下载路径,支持跨平台 注意:此函数每次调用时都会重新计算路径,以确保: 1. 环境变量 PAPER_DOWNLOAD_PATH 的变化能够生效 2. MCP 在不同环境下运行时能正确获取 HOME 目录 """ env_path = os.environ.get("PAPER_DOWNLOAD_PATH") if env_path: return env_path # 使用 Path.home() 获取跨平台的用户主目录 return str(Path.home() / "paper_downloads") # ============================================================ # 日志配置 # ============================================================ logger = logging.getLogger(__name__) # ============================================================ # MCP Server 初始化 # ============================================================ mcp = FastMCP("paper_search_server") # ============================================================ # 搜索器实例(单例) # ============================================================ SEARCHERS = { 'arxiv': ArxivSearcher(), 'pubmed': PubMedSearcher(), 'biorxiv': BioRxivSearcher(), 'medrxiv': MedRxivSearcher(), 'google_scholar': GoogleScholarSearcher(), 'iacr': IACRSearcher(), 'semantic': SemanticSearcher(), 'crossref': CrossRefSearcher(), 'repec': RePECSearcher(), } # Sci-Hub 实例(单独管理,仅用于下载) SCIHUB = SciHubFetcher() # ============================================================ # 通用搜索和操作函数 # ============================================================ async def _search( searcher_name: str, query: str, max_results: int = 10, **kwargs ) -> List[Dict]: """通用搜索函数""" searcher = SEARCHERS.get(searcher_name) if not searcher: logger.error(f"Unknown searcher: {searcher_name}") return [] try: papers = searcher.search(query, max_results=max_results, **kwargs) return [paper.to_dict() for paper in papers] except Exception as e: logger.error(f"Search failed for {searcher_name}: {e}") return [] async def _download( searcher_name: str, paper_id: str, save_path: Optional[str] = None ) -> str: """通用下载函数""" if save_path is None: save_path = get_download_path() searcher = SEARCHERS.get(searcher_name) if not searcher: return f"Error: Unknown searcher {searcher_name}" try: return searcher.download_pdf(paper_id, save_path) except NotImplementedError as e: return str(e) except Exception as e: logger.error(f"Download failed for {searcher_name}: {e}") return f"Error downloading: {str(e)}" async def _read( searcher_name: str, paper_id: str, save_path: Optional[str] = None ) -> str: """通用阅读函数""" if save_path is None: save_path = get_download_path() searcher = SEARCHERS.get(searcher_name) if not searcher: return f"Error: Unknown searcher {searcher_name}" try: return searcher.read_paper(paper_id, save_path) except NotImplementedError as e: return str(e) except Exception as e: logger.error(f"Read failed for {searcher_name}: {e}") return f"Error reading paper: {str(e)}" # ============================================================ # arXiv 工具 # ============================================================ @mcp.tool() async def search_arxiv(query: str, max_results: int = 10) -> List[Dict]: """Search preprints on arXiv - major open-access preprint server. USE THIS TOOL WHEN: - Searching for PREPRINTS (not peer-reviewed yet) - You need free, immediate access to full-text PDFs - Searching in: Physics, Mathematics, Computer Science, Statistics, Quantitative Biology, Quantitative Finance, Electrical Engineering NOTE: arXiv is a PREPRINT server - papers may not be peer-reviewed. For peer-reviewed papers, use search_crossref or search_semantic. WORKFLOW: 1. search_arxiv(query) -> get paper_id (e.g., '2106.12345') 2. download_arxiv(paper_id) -> get PDF (always available) 3. read_arxiv_paper(paper_id) -> get full text as Markdown Args: query: Search terms in any supported field. max_results: Number of results (default: 10). Returns: List of paper dicts with: paper_id, title, authors, abstract, published_date, pdf_url, categories. Example: search_arxiv("quantum computing error correction", max_results=5) """ return await _search('arxiv', query, max_results) @mcp.tool() async def download_arxiv(paper_id: str, save_path: Optional[str] = None) -> str: """Download PDF from arXiv (always free and available). Args: paper_id: arXiv ID (e.g., '2106.12345', '2312.00001v2'). save_path: Directory to save PDF (default: ~/paper_downloads). Returns: Path to downloaded PDF file. Example: download_arxiv("2106.12345") """ return await _download('arxiv', paper_id, save_path) @mcp.tool() async def read_arxiv_paper(paper_id: str, save_path: Optional[str] = None) -> str: """Download and extract full text from arXiv paper as Markdown. Args: paper_id: arXiv ID (e.g., '2106.12345'). save_path: Directory to save PDF (default: ~/paper_downloads). Returns: Full paper text in Markdown format. Example: read_arxiv_paper("2106.12345") """ return await _read('arxiv', paper_id, save_path) # ============================================================ # PubMed 工具 # ============================================================ @mcp.tool() async def search_pubmed(query: str, max_results: int = 10) -> List[Dict]: """Search biomedical literature on PubMed (NCBI database). USE THIS TOOL WHEN: - Searching for medical, clinical, or biomedical research - You need peer-reviewed published papers (not preprints) - Searching for drug studies, clinical trials, disease research DOMAIN: Medicine, Biology, Pharmacology, Public Health, Clinical Research, Genetics, Biochemistry. LIMITATION: PubMed provides metadata/abstracts ONLY, not full PDFs. WORKFLOW FOR FULL TEXT: 1. search_pubmed(query) -> get DOI from results 2. download_scihub(doi) -> download PDF (if published before 2023) Args: query: Medical/scientific terms (e.g., 'cancer immunotherapy', 'COVID-19 vaccine'). max_results: Number of results (default: 10). Returns: List of paper dicts with: paper_id (PMID), title, authors, abstract, published_date, doi, url. Example: search_pubmed("CRISPR gene therapy", max_results=5) """ return await _search('pubmed', query, max_results) @mcp.tool() async def download_pubmed(paper_id: str, save_path: Optional[str] = None) -> str: """PubMed does NOT support direct PDF downloads. PubMed is a metadata database - it does not host PDFs. INSTEAD (try in order): 1. download_scihub(doi) - if published before 2023 2. download_semantic(id) - last resort Args: paper_id: PMID (unused). save_path: Unused. Returns: Error message with alternatives. """ return await _download('pubmed', paper_id, save_path) @mcp.tool() async def read_pubmed_paper(paper_id: str, save_path: Optional[str] = None) -> str: """PubMed does NOT support direct paper reading. INSTEAD (try in order): 1. read_scihub_paper(doi) - if published before 2023 2. read_semantic_paper(id) - last resort Args: paper_id: PMID (unused). save_path: Unused. Returns: Error message with alternatives. """ return await _read('pubmed', paper_id, save_path) # ============================================================ # bioRxiv 工具 # ============================================================ @mcp.tool() async def search_biorxiv(query: str, max_results: int = 10) -> List[Dict]: """Search biology preprints on bioRxiv. USE THIS TOOL WHEN: - Searching for cutting-edge biology research (preprints) - You need the latest findings before peer review - Searching by CATEGORY, not keyword (see below) DOMAIN: Molecular Biology, Cell Biology, Genetics, Neuroscience, Bioinformatics, Evolutionary Biology, Microbiology, etc. NOTE: bioRxiv search uses CATEGORY names, not keywords. Categories: 'neuroscience', 'cell_biology', 'genetics', 'genomics', 'bioinformatics', 'cancer_biology', 'immunology', etc. WORKFLOW: 1. search_biorxiv(category) -> get DOI 2. download_biorxiv(doi) or read_biorxiv_paper(doi) Args: query: Category name (e.g., 'neuroscience', 'cell_biology'). max_results: Number of results (default: 10). Returns: List of recent preprints in that category. Example: search_biorxiv("neuroscience", max_results=5) """ return await _search('biorxiv', query, max_results) @mcp.tool() async def download_biorxiv(paper_id: str, save_path: Optional[str] = None) -> str: """Download PDF from bioRxiv (free and open access). Args: paper_id: bioRxiv DOI (e.g., '10.1101/2024.01.01.123456'). save_path: Directory to save PDF. Returns: Path to downloaded PDF. """ return await _download('biorxiv', paper_id, save_path) @mcp.tool() async def read_biorxiv_paper(paper_id: str, save_path: Optional[str] = None) -> str: """Download and extract full text from bioRxiv paper. Args: paper_id: bioRxiv DOI. save_path: Directory to save PDF. Returns: Full paper text in Markdown format. """ return await _read('biorxiv', paper_id, save_path) # ============================================================ # medRxiv 工具 # ============================================================ @mcp.tool() async def search_medrxiv(query: str, max_results: int = 10) -> List[Dict]: """Search medical preprints on medRxiv. USE THIS TOOL WHEN: - Searching for clinical/medical research preprints - You need latest COVID-19, epidemiology, or clinical studies - Searching by CATEGORY, not keyword (see below) DOMAIN: Epidemiology, Infectious Diseases, Cardiology, Oncology, Public Health, Psychiatry, Health Informatics, etc. NOTE: medRxiv search uses CATEGORY names, not keywords. Categories: 'infectious_diseases', 'epidemiology', 'cardiology', 'oncology', 'health_informatics', 'psychiatry', etc. WORKFLOW: 1. search_medrxiv(category) -> get DOI 2. download_medrxiv(doi) or read_medrxiv_paper(doi) Args: query: Category name (e.g., 'infectious_diseases', 'epidemiology'). max_results: Number of results (default: 10). Returns: List of recent preprints in that category. Example: search_medrxiv("infectious_diseases", max_results=5) """ return await _search('medrxiv', query, max_results) @mcp.tool() async def download_medrxiv(paper_id: str, save_path: Optional[str] = None) -> str: """Download PDF from medRxiv (free and open access). Args: paper_id: medRxiv DOI (e.g., '10.1101/2024.01.01.12345678'). save_path: Directory to save PDF. Returns: Path to downloaded PDF. """ return await _download('medrxiv', paper_id, save_path) @mcp.tool() async def read_medrxiv_paper(paper_id: str, save_path: Optional[str] = None) -> str: """Download and extract full text from medRxiv paper. Args: paper_id: medRxiv DOI. save_path: Directory to save PDF. Returns: Full paper text in Markdown format. """ return await _read('medrxiv', paper_id, save_path) # ============================================================ # Google Scholar 工具 # ============================================================ @mcp.tool() async def search_google_scholar(query: str, max_results: int = 10) -> List[Dict]: """Search academic papers on Google Scholar (broad coverage). USE THIS TOOL WHEN: - You need broad academic search across ALL disciplines - You want citation counts and "cited by" information - Other specialized tools don't cover your topic COVERAGE: All academic disciplines, books, theses, patents. LIMITATIONS: - Uses web scraping (may be rate-limited) - Does NOT support PDF download FOR FULL TEXT (try in order): 1. download_arxiv(id) - if arXiv preprint 2. download_scihub(doi) - if published before 2023 3. download_semantic(id) - last resort Args: query: Search terms (any academic topic). max_results: Number of results (default: 10, keep small to avoid blocks). Returns: List of paper dicts with: title, authors, abstract snippet, citations count, url, source. Example: search_google_scholar("climate change economic impact", max_results=5) """ return await _search('google_scholar', query, max_results) # ============================================================ # IACR ePrint 工具 # ============================================================ @mcp.tool() async def search_iacr( query: str, max_results: int = 10, fetch_details: bool = True ) -> List[Dict]: """Search cryptography papers on IACR ePrint Archive. USE THIS TOOL WHEN: - Searching for cryptography or security research - You need papers on encryption, blockchain, zero-knowledge proofs - Looking for security protocols, hash functions, signatures DOMAIN: Cryptography ONLY - encryption, signatures, protocols, blockchain, secure computation, zero-knowledge, hash functions. All papers are FREE and open access with PDF download. WORKFLOW: 1. search_iacr(query) -> get paper_id (e.g., '2024/123') 2. download_iacr(paper_id) or read_iacr_paper(paper_id) Args: query: Crypto terms (e.g., 'zero knowledge', 'homomorphic encryption'). max_results: Number of results (default: 10). fetch_details: Get full metadata per paper (default: True). Returns: List of paper dicts with: paper_id, title, authors, abstract, published_date, pdf_url. Example: search_iacr("post-quantum cryptography", max_results=5) """ searcher = SEARCHERS['iacr'] try: papers = searcher.search(query, max_results, fetch_details) return [paper.to_dict() for paper in papers] if papers else [] except Exception as e: logger.error(f"IACR search failed: {e}") return [] @mcp.tool() async def download_iacr(paper_id: str, save_path: Optional[str] = None) -> str: """Download PDF from IACR ePrint (always free). Args: paper_id: IACR ID (e.g., '2024/123', '2009/101'). save_path: Directory to save PDF. Returns: Path to downloaded PDF. Example: download_iacr("2024/123") """ return await _download('iacr', paper_id, save_path) @mcp.tool() async def read_iacr_paper(paper_id: str, save_path: Optional[str] = None) -> str: """Download and extract full text from IACR paper. Args: paper_id: IACR ID (e.g., '2024/123'). save_path: Directory to save PDF. Returns: Full paper text in Markdown format. Example: read_iacr_paper("2024/123") """ return await _read('iacr', paper_id, save_path) # ============================================================ # Semantic Scholar 工具 # ============================================================ @mcp.tool() async def search_semantic( query: str, year: Optional[str] = None, max_results: int = 10 ) -> List[Dict]: """Search papers on Semantic Scholar - general-purpose academic search engine. USE THIS TOOL WHEN: - You want to search across ALL academic disciplines - You need citation counts and influence metrics - You want to filter by publication year - You need open-access PDF links when available COVERAGE: ALL academic fields - sciences, humanities, medicine, etc. Indexes 200M+ papers from journals, conferences, and preprints. WORKFLOW: 1. search_semantic(query) -> get paper_id or DOI 2. download_semantic(paper_id) -> get PDF (if open-access) 3. If no PDF: use download_scihub(doi) for older papers Args: query: Search terms (any topic, any field). year: Optional year filter: '2023', '2020-2023', '2020-', '-2019'. max_results: Number of results (default: 10). Returns: List of paper dicts with: paper_id, title, authors, abstract, published_date, doi, citations, url, pdf_url (if available). Example: search_semantic("climate change impact agriculture", year="2020-", max_results=5) """ kwargs = {'year': year} if year else {} return await _search('semantic', query, max_results, **kwargs) @mcp.tool() async def download_semantic(paper_id: str, save_path: Optional[str] = None) -> str: """Download PDF via Semantic Scholar (open-access only, use as LAST RESORT). DOWNLOAD PRIORITY (try in order): 1. If arXiv paper -> use download_arxiv(arxiv_id) (always works) 2. If published before 2023 -> use download_scihub(doi) 3. Use this tool as last resort (may not have PDF) Args: paper_id: Semantic Scholar ID, or prefixed: 'DOI:xxx', 'ARXIV:xxx', 'PMID:xxx' save_path: Directory to save PDF. Returns: Path to downloaded PDF, or error if not available. """ return await _download('semantic', paper_id, save_path) @mcp.tool() async def read_semantic_paper(paper_id: str, save_path: Optional[str] = None) -> str: """Read paper via Semantic Scholar (open-access only, use as LAST RESORT). DOWNLOAD PRIORITY (try in order): 1. If arXiv paper -> use read_arxiv_paper(arxiv_id) 2. If published before 2023 -> use read_scihub_paper(doi) 3. Use this tool as last resort Args: paper_id: Semantic Scholar ID or prefixed ID (DOI:, ARXIV:, PMID:). save_path: Directory to save PDF. Returns: Full paper text in Markdown format. """ return await _read('semantic', paper_id, save_path) # ============================================================ # CrossRef 工具 # ============================================================ @mcp.tool() async def search_crossref( query: str, max_results: int = 10, **kwargs ) -> List[Dict]: """Search academic papers in CrossRef - the largest DOI citation database. USE THIS TOOL WHEN: - You need to find papers by DOI or citation metadata - You want to search across all academic publishers (not just preprints) - You need publication metadata like journal, volume, issue, citations - You want to verify if a DOI exists or get its metadata CrossRef indexes 150M+ scholarly works from thousands of publishers. Results include DOI, authors, title, abstract, citations, and publisher info. Args: query: Search terms (e.g., 'machine learning', 'CRISPR gene editing'). max_results: Number of results (default: 10, max: 1000). **kwargs: Optional filters: - filter: 'has-full-text:true,from-pub-date:2020' - sort: 'relevance' | 'published' | 'cited' - order: 'asc' | 'desc' Returns: List of paper metadata dicts with keys: paper_id (DOI), title, authors, abstract, doi, published_date, citations, url. Example: search_crossref("attention mechanism transformer", max_results=5) """ return await _search('crossref', query, max_results, **kwargs) @mcp.tool() async def get_crossref_paper_by_doi(doi: str) -> Dict: """Get paper metadata from CrossRef using its DOI. USE THIS TOOL WHEN: - You have a DOI and need full metadata (title, authors, journal, etc.) - You want to verify a DOI exists - You need citation count for a specific paper Args: doi: Digital Object Identifier (e.g., '10.1038/nature12373'). Returns: Paper metadata dict, or empty dict {} if DOI not found. Example: get_crossref_paper_by_doi("10.1038/nature12373") """ searcher = SEARCHERS['crossref'] try: paper = searcher.get_paper_by_doi(doi) return paper.to_dict() if paper else {} except Exception as e: logger.error(f"CrossRef DOI lookup failed: {e}") return {} @mcp.tool() async def download_crossref(paper_id: str, save_path: Optional[str] = None) -> str: """CrossRef does NOT support direct PDF downloads. CrossRef is a metadata/citation database only - it does not host PDFs. INSTEAD (try in order): 1. download_arxiv(id) - if arXiv preprint (always works) 2. download_scihub(doi) - if published before 2023 3. download_semantic(id) - last resort (may not have PDF) Args: paper_id: DOI (e.g., '10.1038/nature12373'). save_path: Unused. Returns: Error message explaining alternatives. """ return await _download('crossref', paper_id, save_path) @mcp.tool() async def read_crossref_paper(paper_id: str, save_path: Optional[str] = None) -> str: """CrossRef does NOT support direct paper reading. CrossRef provides metadata only, not full-text content. INSTEAD (try in order): 1. read_arxiv_paper(id) - if arXiv preprint 2. read_scihub_paper(doi) - if published before 2023 3. read_semantic_paper(id) - last resort Args: paper_id: DOI (e.g., '10.1038/nature12373'). save_path: Unused. Returns: Error message explaining alternatives. """ return await _read('crossref', paper_id, save_path) # ============================================================ # RePEc/IDEAS 工具 # ============================================================ @mcp.tool() async def search_repec( query: str, max_results: int = 10, year_from: Optional[int] = None, year_to: Optional[int] = None, search_field: str = 'all', sort_by: str = 'relevance', doc_type: str = 'all', series: Optional[str] = None, ) -> List[Dict]: """Search economics papers on RePEc/IDEAS - the largest open economics bibliography. USE THIS TOOL WHEN: - Searching for ECONOMICS research (macro, micro, finance, econometrics) - You need working papers from NBER, Federal Reserve, World Bank, etc. - You want to find papers by JEL classification - Searching for economic policy analysis COVERAGE: 4.5M+ items including: - Working Papers: NBER, Fed banks, ECB, IMF, World Bank - Journal Articles: AER, JPE, QJE, Econometrica, etc. - Books and Book Chapters SEARCH SYNTAX: - Boolean: + for AND, | for OR, ~ for NOT (e.g., 'money ~liquidity') - Phrase: use double quotes (e.g., '"monetary policy"') - Author(Year): e.g., 'Acemoglu (2019)' or 'Kydland Prescott (1977)' - Synonyms: automatic (labor=labour, USA=United States) - Word stemming: automatic (find matches finds, finding, findings) LIMITATION: RePEc provides metadata only, not full PDFs. PDFs are hosted at original institutions (often freely available). Args: query: Search terms with optional boolean operators. max_results: Number of results (default: 10). year_from: Optional start year filter (e.g., 2020). year_to: Optional end year filter (e.g., 2025). search_field: Where to search, one of: - 'all': Whole record (default) - 'abstract': Abstract only - 'keywords': Keywords only - 'title': Title only - 'author': Author only sort_by: How to sort results, one of: - 'relevance': Most relevant (default) - 'newest': Most recent first - 'oldest': Oldest first - 'citations': Most cited first - 'recent_relevant': Recent and relevant - 'relevant_cited': Relevant and cited doc_type: Document type filter, one of: - 'all': All types (default) - 'articles': Journal articles - 'papers': Working papers (NBER, Fed, etc.) - 'chapters': Book chapters - 'books': Books - 'software': Software components series: Institution/journal series to search within, one of: - Institutions: 'nber', 'imf', 'worldbank', 'ecb', 'bis', 'cepr', 'iza' - Federal Reserve: 'fed', 'fed_ny', 'fed_chicago', 'fed_stlouis' - Top 5 Journals: 'aer', 'jpe', 'qje', 'econometrica', 'restud' - Other journals: 'jfe', 'jme', 'aej_macro', 'aej_micro', 'aej_applied' Returns: List of paper dicts with: paper_id (RePEc handle), title, authors, abstract, published_date, url, categories (JEL codes). Example: search_repec('inflation', series='nber') # Search NBER only search_repec('causal inference', series='aer', sort_by='newest') search_repec('machine learning', series='fed', year_from=2020) """ kwargs = { 'year_from': year_from, 'year_to': year_to, 'search_field': search_field, 'sort_by': sort_by, 'doc_type': doc_type, 'series': series, } # Remove None values kwargs = {k: v for k, v in kwargs.items() if v is not None} return await _search('repec', query, max_results, **kwargs) @mcp.tool() async def download_repec(paper_id: str, save_path: Optional[str] = None) -> str: """RePEc/IDEAS does NOT support direct PDF downloads. RePEc is a metadata index - PDFs are hosted at original institutions. INSTEAD (try in order): 1. Visit paper URL - many NBER/Fed papers are freely available 2. download_scihub(doi) - if published before 2023 Args: paper_id: RePEc handle (unused). save_path: Unused. Returns: Error message with alternatives. """ return await _download('repec', paper_id, save_path) @mcp.tool() async def read_repec_paper(paper_id: str, save_path: Optional[str] = None) -> str: """RePEc/IDEAS does NOT support direct paper reading. INSTEAD (try in order): 1. Visit paper URL - many NBER/Fed papers are freely available 2. read_scihub_paper(doi) - if published before 2023 Args: paper_id: RePEc handle (unused). save_path: Unused. Returns: Error message with alternatives. """ return await _read('repec', paper_id, save_path) @mcp.tool() async def get_repec_paper(url_or_handle: str) -> Dict: """Get detailed paper information from RePEc/IDEAS. Fetches complete metadata from an IDEAS paper detail page, including abstract, authors, keywords, and JEL codes that may be missing from search results. USE THIS WHEN: - You have a paper URL/handle from search results and need the abstract - You want complete author information for a specific paper - You need JEL classification codes or keywords Args: url_or_handle: Paper URL or RePEc handle, e.g.: - URL: "https://ideas.repec.org/p/nbr/nberwo/32000.html" - Handle: "RePEc:nbr:nberwo:32000" Returns: Paper dict with: paper_id, title, authors, abstract, keywords, categories (JEL codes), published_date, url, pdf_url (if available), doi (if found), and extra info like journal name. Example: get_repec_paper("https://ideas.repec.org/a/aea/aecrev/v110y2020i1p1-40.html") """ searcher = SEARCHERS['repec'] paper = searcher.get_paper_details(url_or_handle) if paper: return paper.to_dict() else: return {"error": f"Failed to fetch paper details from: {url_or_handle}"} # ============================================================ # Sci-Hub 工具(仅下载,不搜索) # ============================================================ @mcp.tool() async def download_scihub(doi: str, save_path: Optional[str] = None) -> str: """Download paper PDF via Sci-Hub using DOI (for older papers only). USE THIS TOOL WHEN: - You have a DOI and need the full PDF - The paper was published BEFORE 2023 - The paper is behind a paywall and not on arXiv - You first searched CrossRef and got the DOI WORKFLOW: search_crossref(query) -> get DOI -> download_scihub(doi) Args: doi: Paper DOI (e.g., '10.1038/nature12373', '10.1126/science.1234567'). save_path: Directory to save PDF (default: ~/paper_downloads). Returns: Path to downloaded PDF file (e.g., 'downloads/scihub_10.1038_xxx.pdf'), or error message if download fails. Example: download_scihub("10.1038/nature12373") # 2013 Nature paper """ if save_path is None: save_path = get_download_path() try: return SCIHUB.download_pdf(doi, save_path) except Exception as e: logger.error(f"Sci-Hub download failed: {e}") return f"Error: {e}" @mcp.tool() async def read_scihub_paper(doi: str, save_path: Optional[str] = None) -> str: """Download and extract full text from paper via Sci-Hub (older papers only). USE THIS TOOL WHEN: - You need the complete text content of a paper (not just abstract) - The paper was published BEFORE 2023 - You want to analyze, summarize, or answer questions about a paper This downloads the PDF and extracts text as clean Markdown format, suitable for LLM processing. Includes paper metadata at the start. WORKFLOW: search_crossref(query) -> get DOI -> read_scihub_paper(doi) Args: doi: Paper DOI (e.g., '10.1038/nature12373'). save_path: Directory to save PDF (default: ~/paper_downloads). Returns: Full paper text in Markdown format with metadata header, or error message if download/extraction fails. Example: read_scihub_paper("10.1038/nature12373") """ if save_path is None: save_path = get_download_path() try: return SCIHUB.read_paper(doi, save_path) except Exception as e: logger.error(f"Sci-Hub read failed: {e}") return f"Error: {e}" # ============================================================ # 服务器入口 # ============================================================ if __name__ == "__main__": # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) # 运行 MCP 服务器 mcp.run(transport="stdio")

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/h-lu/paper-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server