search_scihub

Search and download academic papers from Sci-Hub using DOI or URL. Automatically detects available mirrors and retrieves PDF files when needed.

Instructions

Search and download papers from Sci-Hub using DOI or paper URL. Automatically detects and uses the fastest available mirror.

Input Schema

TableJSON Schema

Name	Required	Description
`doiOrUrl`	Yes	DOI (e.g., "10.1038/nature12373") or full paper URL
`downloadPdf`	No	Whether to download the PDF file
`savePath`	No	Directory to save the PDF file (if downloadPdf is true)

Implementation Reference

src/mcp/handleToolCall.ts:291-313 (handler)

Executes the 'search_scihub' tool by calling the SciHub searcher's search method and optionally downloading the PDF.

case 'search_scihub': {
  const { doiOrUrl, downloadPdf, savePath } = args;
  const resolvedSavePath = savePath || './downloads';

  const results = await searchers.scihub.search(doiOrUrl);
  if (results.length === 0) {
    return jsonTextResponse(`No paper found on Sci-Hub for: ${doiOrUrl}`);
  }

  const paper = results[0];
  let responseText = `Found paper on Sci-Hub:\n\n${JSON.stringify(PaperFactory.toDict(paper), null, 2)}`;

  if (downloadPdf && paper.pdfUrl) {
    try {
      const filePath = await searchers.scihub.downloadPdf(doiOrUrl, { savePath: resolvedSavePath });
      responseText += `\n\nPDF downloaded successfully to: ${filePath}`;
    } catch (downloadError: any) {
      responseText += `\n\nFailed to download PDF: ${downloadError.message}`;
    }
  }

  return jsonTextResponse(responseText);
}

src/mcp/schemas.ts:134-140 (schema)

Zod input schema for validating arguments to the 'search_scihub' tool.

export const SearchSciHubSchema = z
  .object({
    doiOrUrl: z.string().min(1),
    downloadPdf: z.boolean().optional().default(false),
    savePath: z.string().optional()
  })
  .strip();

src/mcp/tools.ts:314-337 (registration)

Registers the 'search_scihub' tool in the TOOLS array with metadata and JSON input schema.

{
  name: 'search_scihub',
  description:
    'Search and download papers from Sci-Hub using DOI or paper URL. Automatically detects and uses the fastest available mirror.',
  inputSchema: {
    type: 'object',
    properties: {
      doiOrUrl: {
        type: 'string',
        description: 'DOI (e.g., "10.1038/nature12373") or full paper URL'
      },
      downloadPdf: {
        type: 'boolean',
        description: 'Whether to download the PDF file',
        default: false
      },
      savePath: {
        type: 'string',
        description: 'Directory to save the PDF file (if downloadPdf is true)'
      }
    },
    required: ['doiOrUrl']
  }
},

src/platforms/SciHubSearcher.ts:241-345 (helper)

Core helper function in SciHubSearcher that fetches paper information from Sci-Hub mirrors using web scraping to extract PDF URLs and metadata.

private async fetchPaperInfo(doiOrUrl: string): Promise<Paper | null> {
  let currentMirror = await this.getCurrentMirror();
  let retries = 0;
  
  // 清理 DOI 格式
  const cleanedQuery = doiOrUrl.replace(/^doi:\s*/i, '');
  
  while (retries < this.maxRetries) {
    try {
      const searchUrl = `${currentMirror}/${cleanedQuery}`;
      logDebug(`Searching on ${currentMirror} for: ${cleanedQuery}`);
      
      const response = await this.axiosInstance.get(searchUrl);
      
      if (response.status === 200) {
        const $ = cheerio.load(response.data);
        
        // 检查是否找到论文
        const pdfFrame = $('#pdf');
        const pdfEmbed = $('embed[type="application/pdf"]');
        const pdfIframe = $('iframe[src*=".pdf"]');
        
        let pdfUrl = '';
        
        // 尝试多种方式获取 PDF URL
        if (pdfFrame.length > 0) {
          pdfUrl = pdfFrame.attr('src') || '';
        } else if (pdfEmbed.length > 0) {
          pdfUrl = pdfEmbed.attr('src') || '';
        } else if (pdfIframe.length > 0) {
          pdfUrl = pdfIframe.attr('src') || '';
        } else {
          // 查找下载按钮
          const downloadButton = $('button[onclick*="download"]');
          if (downloadButton.length > 0) {
            const onclickAttr = downloadButton.attr('onclick') || '';
            const match = onclickAttr.match(/location\.href='([^']+)'/);
            if (match) {
              pdfUrl = match[1];
            }
          }
        }
        
        // 处理相对 URL
        if (pdfUrl && !pdfUrl.startsWith('http')) {
          if (pdfUrl.startsWith('//')) {
            pdfUrl = 'https:' + pdfUrl;
          } else if (pdfUrl.startsWith('/')) {
            pdfUrl = currentMirror + pdfUrl;
          }
        }
        
        if (pdfUrl) {
          // 提取标题（尝试从页面标题或 citation 信息获取）
          let title = $('title').text();
          const citation = $('#citation').text();
          if (citation) {
            // 从引用信息中提取标题
            const titleMatch = citation.match(/([^.]+)\./);
            if (titleMatch) {
              title = titleMatch[1].trim();
            }
          }
          
          // 清理标题
          title = title.replace(/\s*\|\s*Sci-Hub.*$/, '')
                      .replace(/Sci-Hub\s*:\s*/, '')
                      .trim();
          
          return PaperFactory.create({
            paperId: cleanedQuery,
            title: title || `Paper: ${cleanedQuery}`,
            source: 'scihub',
            authors: [],
            abstract: '',
            doi: this.isValidDOIOrURL(cleanedQuery) && cleanedQuery.includes('10.') 
                 ? cleanedQuery 
                 : '',
            publishedDate: null,
            pdfUrl: pdfUrl,
            url: searchUrl,
            extra: {
              mirror: currentMirror,
              fetchedAt: new Date().toISOString()
            }
          });
        } else {
          logDebug(`Paper not found on ${currentMirror}`);
          currentMirror = await this.markMirrorFailed(currentMirror);
          retries++;
        }
      } else {
        logDebug(`Unexpected status ${response.status} from ${currentMirror}`);
        currentMirror = await this.markMirrorFailed(currentMirror);
        retries++;
      }
    } catch (error: any) {
      logDebug(`Error fetching from ${currentMirror}:`, error.message);
      currentMirror = await this.markMirrorFailed(currentMirror);
      retries++;
    }
  }
  
  return null;
}

Paper Search MCP

search_scihub

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API