Academic MCP Server

scihub_adapter.py•9.71 KiB

""" Sci-Hub Adapter Provides access to Sci-Hub for academic paper retrieval """ from typing import List, Dict, Any, Optional import requests from .base_adapter import BaseAdapter class SciHubAdapter(BaseAdapter): """Adapter for Sci-Hub paper search and retrieval""" def __init__(self): """Initialize Sci-Hub adapter""" self.source_name = "scihub" try: from scihub import SciHub self.sh = SciHub() self.sh.timeout = 30 self.available = True except ImportError: print("Warning: scihub library not available. Install with: pip install scihub") self.available = False def search_by_keywords(self, keywords: str, num_results: int = 10) -> List[Dict[str, Any]]: """ Search papers by keywords using CrossRef API + Sci-Hub Args: keywords: Search query num_results: Number of results to return Returns: List of paper dictionaries """ if not self.available: return [{"error": "Sci-Hub library not available"}] papers = [] try: # Use CrossRef API for keyword search url = f"https://api.crossref.org/works?query={keywords}&rows={num_results}" response = requests.get(url, timeout=10) if response.status_code == 200: data = response.json() for item in data['message']['items'][:num_results]: doi = item.get('DOI') if doi: # Get paper info from Sci-Hub paper_info = self._fetch_from_scihub(doi) if paper_info: # Combine CrossRef metadata with Sci-Hub data title = item.get('title', [''])[0] if item.get('title') else '' authors = self._format_authors(item.get('author', [])) papers.append({ 'id': doi, 'title': title, 'authors': authors, 'abstract': item.get('abstract', 'N/A'), 'publication_date': self._format_date(item.get('created', {})), 'journal': item.get('container-title', [''])[0] if item.get('container-title') else 'N/A', 'url': f"https://doi.org/{doi}", 'pdf_url': paper_info.get('url', ''), 'source': 'scihub' }) except Exception as e: print(f"Error searching Sci-Hub by keywords: {e}") return papers def search_advanced( self, title: Optional[str] = None, author: Optional[str] = None, journal: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, term: Optional[str] = None, num_results: int = 10 ) -> List[Dict[str, Any]]: """ Advanced search using CrossRef API Args: title: Paper title author: Author name journal: Journal name start_date: Start date (YYYY-MM-DD) end_date: End date (YYYY-MM-DD) term: General search term num_results: Number of results Returns: List of paper dictionaries """ if not self.available: return [{"error": "Sci-Hub library not available"}] # Build CrossRef query query_parts = [] if title: query_parts.append(f"title:{title}") if author: query_parts.append(f"author:{author}") if journal: query_parts.append(f"container-title:{journal}") if term: query_parts.append(term) query = " ".join(query_parts) if query_parts else "research" # Use keyword search with the constructed query return self.search_by_keywords(query, num_results) def get_metadata(self, identifier: str) -> Dict[str, Any]: """ Get metadata for a paper by DOI Args: identifier: DOI of the paper Returns: Dictionary with paper metadata """ if not self.available: return {"error": "Sci-Hub library not available"} try: # Get from Sci-Hub result = self._fetch_from_scihub(identifier) if result: # Also get CrossRef metadata for more details crossref_url = f"https://api.crossref.org/works/{identifier}" response = requests.get(crossref_url, timeout=10) if response.status_code == 200: data = response.json()['message'] return { 'id': identifier, 'title': data.get('title', [''])[0] if data.get('title') else result.get('title', ''), 'authors': self._format_authors(data.get('author', [])), 'abstract': data.get('abstract', 'N/A'), 'publication_date': self._format_date(data.get('created', {})), 'journal': data.get('container-title', [''])[0] if data.get('container-title') else 'N/A', 'url': f"https://doi.org/{identifier}", 'pdf_url': result.get('url', ''), 'source': 'scihub' } return {"error": f"Paper with DOI {identifier} not found"} except Exception as e: return {"error": f"Error retrieving metadata: {str(e)}"} def download_pdf(self, identifier: str, output_path: str = None) -> str: """ Download PDF from Sci-Hub Args: identifier: DOI of the paper output_path: Path to save the PDF Returns: Status message """ if not self.available: return "Error: Sci-Hub library not available" try: result = self._fetch_from_scihub(identifier) if result and result.get('url'): if output_path is None: output_path = f"paper_{identifier.replace('/', '_')}.pdf" self.sh.download(result['url'], path=output_path) return f"PDF successfully downloaded to {output_path}" else: return f"Error: Could not find PDF for DOI {identifier}" except Exception as e: return f"Error downloading PDF: {str(e)}" def _fetch_from_scihub(self, doi: str) -> Optional[Dict[str, Any]]: """ Fetch paper from Sci-Hub Args: doi: DOI of the paper Returns: Dictionary with paper info or None """ try: result = self.sh.fetch(doi) return result except Exception as e: print(f"Error fetching from Sci-Hub: {e}") return None def _format_authors(self, authors: List[Dict]) -> str: """Format author list from CrossRef data""" if not authors: return "N/A" author_names = [] for author in authors[:5]: # Limit to first 5 authors given = author.get('given', '') family = author.get('family', '') if given or family: author_names.append(f"{given} {family}".strip()) result = ", ".join(author_names) if len(authors) > 5: result += " et al." return result if result else "N/A" def _format_date(self, date_dict: Dict) -> str: """Format date from CrossRef data""" if not date_dict: return "N/A" date_parts = date_dict.get('date-parts', [[]]) if date_parts and date_parts[0]: parts = date_parts[0] if len(parts) >= 1: year = parts[0] month = parts[1] if len(parts) >= 2 else 1 day = parts[2] if len(parts) >= 3 else 1 return f"{year}-{month:02d}-{day:02d}" return "N/A" def get_source_name(self) -> str: """ Get the name of the data source Returns: String name of the database """ return self.source_name def search_by_title(self, title: str) -> Dict[str, Any]: """ Search for a paper by title using CrossRef + Sci-Hub Args: title: Paper title Returns: Dictionary with paper info """ if not self.available: return {"error": "Sci-Hub library not available"} try: # Search CrossRef for the title url = f"https://api.crossref.org/works?query.title={title}&rows=1" response = requests.get(url, timeout=10) if response.status_code == 200: data = response.json() if data['message']['items']: item = data['message']['items'][0] doi = item.get('DOI') if doi: # Get full metadata return self.get_metadata(doi) return {"error": f"Paper with title '{title}' not found"} except Exception as e: return {"error": f"Error searching by title: {str(e)}"}

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nanyang12138/Academic-MCP-Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

scihub_adapter.py•9.71 KiB