search-google-scholar
Search for academic research articles using Google Scholar to find relevant medical and scientific publications for research purposes.
Instructions
Search for academic research articles using Google Scholar
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | Academic topic or research query to search for |
Input Schema (JSON Schema)
{
"properties": {
"query": {
"description": "Academic topic or research query to search for",
"type": "string"
}
},
"required": [
"query"
],
"type": "object"
}
Implementation Reference
- src/utils.ts:730-1008 (handler)Core handler function that uses Puppeteer to scrape Google Scholar search results, extracts article data with anti-detection measures, and returns structured GoogleScholarArticle[]export async function searchGoogleScholar( query: string, ): Promise<GoogleScholarArticle[]> { let browser; try { console.log(`🔍 Scraping Google Scholar for: ${query}`); // Add random delay to avoid rate limiting await randomDelay(2000, 5000); // Enhanced browser configuration for better anti-detection browser = await puppeteer.launch({ headless: true, args: [ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--disable-gpu", "--disable-web-security", "--disable-features=VizDisplayCompositor", "--disable-blink-features=AutomationControlled", "--disable-extensions", "--disable-plugins", "--disable-images", "--disable-javascript", "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", ], }); const page = await browser.newPage(); // Enhanced stealth configuration await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, "webdriver", { get: () => undefined, }); }); // Random viewport size const viewports = [ { width: 1920, height: 1080 }, { width: 1366, height: 768 }, { width: 1440, height: 900 }, { width: 1536, height: 864 }, ]; const randomViewport = viewports[Math.floor(Math.random() * viewports.length)]; await page.setViewport(randomViewport); // Rotate user agents const userAgents = [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", ]; const randomUA = userAgents[Math.floor(Math.random() * userAgents.length)]; await page.setUserAgent(randomUA); // Enhanced headers await page.setExtraHTTPHeaders({ "Accept-Language": "en-US,en;q=0.9,es;q=0.8", "Accept-Encoding": "gzip, deflate, br", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Cache-Control": "no-cache", Pragma: "no-cache", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", }); // Navigate to Google Scholar with enhanced query const searchUrl = `${GOOGLE_SCHOLAR_API_BASE}?q=${encodeURIComponent(query)}&hl=en&as_sdt=0%2C5&as_ylo=2020`; await page.goto(searchUrl, { waitUntil: "networkidle2", timeout: 45000, }); // Wait for results with multiple fallback selectors try { await page.waitForSelector(".gs_r, .gs_ri, .gs_or, [data-rp]", { timeout: 20000, }); } catch (error) { // Try alternative selectors try { await page.waitForSelector(".g, .rc, .r", { timeout: 10000 }); } catch (error2) { console.error("No search results found or page structure changed"); return []; } } // Enhanced data extraction with better selectors return await page.evaluate(() => { const results: GoogleScholarArticle[] = []; // Multiple selector strategies for different Google Scholar layouts const selectors = [ ".gs_r, .gs_ri, .gs_or", ".g, .rc, .r", "[data-rp]", ".gs_rt, .gs_ri", ]; let articleElements: NodeListOf<Element> | null = null; for (const selector of selectors) { articleElements = document.querySelectorAll(selector); if (articleElements.length > 0) break; } if (!articleElements || articleElements.length === 0) { return results; } articleElements.forEach((element) => { try { // Enhanced title extraction const titleSelectors = [ ".gs_rt a, .gs_rt", "h3 a, h3", "a[data-clk]", ".gs_rt a", ".rc h3 a", ".r h3 a", ]; let title = ""; let url = ""; for (const selector of titleSelectors) { const titleElement = element.querySelector(selector); if (titleElement) { title = titleElement.textContent?.trim() || ""; url = (titleElement as HTMLAnchorElement)?.href || ""; if (title) break; } } // Enhanced authors/venue extraction const authorSelectors = [ ".gs_a, .gs_authors, .gs_venue", '[class*="author"]', '[class*="venue"]', ".gs_a", ".rc .s", ".r .s", ]; let authors = ""; for (const selector of authorSelectors) { const authorElement = element.querySelector(selector); if (authorElement) { authors = authorElement.textContent?.trim() || ""; if (authors) break; } } // Enhanced abstract extraction const abstractSelectors = [ ".gs_rs, .gs_rs_a, .gs_snippet", '[class*="snippet"]', '[class*="abstract"]', ".gs_rs", ".rc .st", ".r .st", ]; let abstract = ""; for (const selector of abstractSelectors) { const abstractElement = element.querySelector(selector); if (abstractElement) { abstract = abstractElement.textContent?.trim() || ""; if (abstract) break; } } // Enhanced citation extraction const citationSelectors = [ ".gs_fl a, .gs_fl", '[class*="citation"]', 'a[href*="cites"]', ".gs_fl", ".rc .f", ".r .f", ]; let citations = ""; for (const selector of citationSelectors) { const citationElement = element.querySelector(selector); if (citationElement) { citations = citationElement.textContent?.trim() || ""; if (citations) break; } } // Enhanced year extraction with better patterns let year = ""; const yearPatterns = [ /(\d{4})/g, /\((\d{4})\)/g, /(\d{4})\s*[–-]/g, /(\d{4})\s*$/g, ]; const textSources = [authors, title, abstract, citations]; for (const text of textSources) { for (const pattern of yearPatterns) { const matches = text.match(pattern); if (matches) { const years = matches .map((m) => m.replace(/\D/g, "")) .filter((y) => y.length === 4); const validYears = years.filter( (y) => parseInt(y) >= 1900 && parseInt(y) <= new Date().getFullYear() + 1, ); if (validYears.length > 0) { year = validYears[validYears.length - 1]; // Get most recent year break; } } } if (year) break; } // Enhanced journal extraction let journal = ""; const journalPatterns = [ /- ([^-]+)$/, /, ([^,]+)$/, /in ([^,]+)/, /([A-Z][^,]+(?:Journal|Review|Medicine|Health|Science|Research))/i, /([A-Z][^,]+(?:Lancet|Nature|Science|NEJM|JAMA|BMJ))/i, ]; for (const pattern of journalPatterns) { const match = authors.match(pattern); if (match) { journal = match[1].trim(); break; } } // Quality filter - only include substantial results if (title && title.length > 10 && title.length < 500) { results.push({ title: title.substring(0, 500), // Limit title length authors: authors.substring(0, 300), // Limit authors length abstract: abstract.substring(0, 1000), // Limit abstract length journal: journal.substring(0, 200), // Limit journal length year, citations: citations.substring(0, 100), // Limit citations length url: url.substring(0, 500), // Limit URL length }); } } catch (error) { console.error("Error processing article element:", error); // Skip this iteration } }); return results; }); } catch (error) { console.error("Error scraping Google Scholar:", error); return []; } finally { if (browser) { await browser.close(); } } }
- src/index.ts:173-189 (registration)MCP server.tool registration for 'search-google-scholar', including input schema (zod) and wrapper handler calling the core implementationserver.tool( "search-google-scholar", "Search for academic research articles using Google Scholar", { query: z .string() .describe("Academic topic or research query to search for"), }, async ({ query }) => { try { const articles = await searchGoogleScholar(query); return formatGoogleScholarArticles(articles, query); } catch (error: any) { return createErrorResponse("searching Google Scholar", error); } }, );
- src/types.ts:59-69 (schema)TypeScript type definition for GoogleScholarArticle, defining the output structure from the searchGoogleScholar handlerexport type GoogleScholarArticle = { title: string; authors?: string; abstract?: string; journal?: string; year?: string; citations?: string; url?: string; pdf_url?: string; related_articles?: string[]; };
- src/utils.ts:523-538 (helper)Helper function to format searchGoogleScholar results into a formatted MCP text responseexport function formatGoogleScholarArticles(articles: any[], query: string) { if (articles.length === 0) { return createMCPResponse( `No academic articles found for "${query}". This could be due to no results matching your query, rate limiting, or network issues.`, ); } let result = `**Academic Research Search: "${query}"**\n\n`; result += `Found ${articles.length} article(s)\n\n`; articles.forEach((article, index) => { result += formatArticleItem(article, index); }); return createMCPResponse(result); }
- src/constants.ts:6-6 (helper)Constant defining the Google Scholar base URL used in searchGoogleScholarexport const GOOGLE_SCHOLAR_API_BASE = "https://scholar.google.com/scholar";