search-google-scholar
Search for academic research articles using Google Scholar to find relevant medical and scientific publications for research purposes.
Instructions
Search for academic research articles using Google Scholar
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | Academic topic or research query to search for |
Implementation Reference
- src/utils.ts:730-1008 (handler)Core handler function that uses Puppeteer to scrape Google Scholar search results, extracts article data with anti-detection measures, and returns structured GoogleScholarArticle[]export async function searchGoogleScholar( query: string, ): Promise<GoogleScholarArticle[]> { let browser; try { console.log(`š Scraping Google Scholar for: ${query}`); // Add random delay to avoid rate limiting await randomDelay(2000, 5000); // Enhanced browser configuration for better anti-detection browser = await puppeteer.launch({ headless: true, args: [ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--disable-gpu", "--disable-web-security", "--disable-features=VizDisplayCompositor", "--disable-blink-features=AutomationControlled", "--disable-extensions", "--disable-plugins", "--disable-images", "--disable-javascript", "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", ], }); const page = await browser.newPage(); // Enhanced stealth configuration await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, "webdriver", { get: () => undefined, }); }); // Random viewport size const viewports = [ { width: 1920, height: 1080 }, { width: 1366, height: 768 }, { width: 1440, height: 900 }, { width: 1536, height: 864 }, ]; const randomViewport = viewports[Math.floor(Math.random() * viewports.length)]; await page.setViewport(randomViewport); // Rotate user agents const userAgents = [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", ]; const randomUA = userAgents[Math.floor(Math.random() * userAgents.length)]; await page.setUserAgent(randomUA); // Enhanced headers await page.setExtraHTTPHeaders({ "Accept-Language": "en-US,en;q=0.9,es;q=0.8", "Accept-Encoding": "gzip, deflate, br", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Cache-Control": "no-cache", Pragma: "no-cache", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", }); // Navigate to Google Scholar with enhanced query const searchUrl = `${GOOGLE_SCHOLAR_API_BASE}?q=${encodeURIComponent(query)}&hl=en&as_sdt=0%2C5&as_ylo=2020`; await page.goto(searchUrl, { waitUntil: "networkidle2", timeout: 45000, }); // Wait for results with multiple fallback selectors try { await page.waitForSelector(".gs_r, .gs_ri, .gs_or, [data-rp]", { timeout: 20000, }); } catch (error) { // Try alternative selectors try { await page.waitForSelector(".g, .rc, .r", { timeout: 10000 }); } catch (error2) { console.error("No search results found or page structure changed"); return []; } } // Enhanced data extraction with better selectors return await page.evaluate(() => { const results: GoogleScholarArticle[] = []; // Multiple selector strategies for different Google Scholar layouts const selectors = [ ".gs_r, .gs_ri, .gs_or", ".g, .rc, .r", "[data-rp]", ".gs_rt, .gs_ri", ]; let articleElements: NodeListOf<Element> | null = null; for (const selector of selectors) { articleElements = document.querySelectorAll(selector); if (articleElements.length > 0) break; } if (!articleElements || articleElements.length === 0) { return results; } articleElements.forEach((element) => { try { // Enhanced title extraction const titleSelectors = [ ".gs_rt a, .gs_rt", "h3 a, h3", "a[data-clk]", ".gs_rt a", ".rc h3 a", ".r h3 a", ]; let title = ""; let url = ""; for (const selector of titleSelectors) { const titleElement = element.querySelector(selector); if (titleElement) { title = titleElement.textContent?.trim() || ""; url = (titleElement as HTMLAnchorElement)?.href || ""; if (title) break; } } // Enhanced authors/venue extraction const authorSelectors = [ ".gs_a, .gs_authors, .gs_venue", '[class*="author"]', '[class*="venue"]', ".gs_a", ".rc .s", ".r .s", ]; let authors = ""; for (const selector of authorSelectors) { const authorElement = element.querySelector(selector); if (authorElement) { authors = authorElement.textContent?.trim() || ""; if (authors) break; } } // Enhanced abstract extraction const abstractSelectors = [ ".gs_rs, .gs_rs_a, .gs_snippet", '[class*="snippet"]', '[class*="abstract"]', ".gs_rs", ".rc .st", ".r .st", ]; let abstract = ""; for (const selector of abstractSelectors) { const abstractElement = element.querySelector(selector); if (abstractElement) { abstract = abstractElement.textContent?.trim() || ""; if (abstract) break; } } // Enhanced citation extraction const citationSelectors = [ ".gs_fl a, .gs_fl", '[class*="citation"]', 'a[href*="cites"]', ".gs_fl", ".rc .f", ".r .f", ]; let citations = ""; for (const selector of citationSelectors) { const citationElement = element.querySelector(selector); if (citationElement) { citations = citationElement.textContent?.trim() || ""; if (citations) break; } } // Enhanced year extraction with better patterns let year = ""; const yearPatterns = [ /(\d{4})/g, /\((\d{4})\)/g, /(\d{4})\s*[ā-]/g, /(\d{4})\s*$/g, ]; const textSources = [authors, title, abstract, citations]; for (const text of textSources) { for (const pattern of yearPatterns) { const matches = text.match(pattern); if (matches) { const years = matches .map((m) => m.replace(/\D/g, "")) .filter((y) => y.length === 4); const validYears = years.filter( (y) => parseInt(y) >= 1900 && parseInt(y) <= new Date().getFullYear() + 1, ); if (validYears.length > 0) { year = validYears[validYears.length - 1]; // Get most recent year break; } } } if (year) break; } // Enhanced journal extraction let journal = ""; const journalPatterns = [ /- ([^-]+)$/, /, ([^,]+)$/, /in ([^,]+)/, /([A-Z][^,]+(?:Journal|Review|Medicine|Health|Science|Research))/i, /([A-Z][^,]+(?:Lancet|Nature|Science|NEJM|JAMA|BMJ))/i, ]; for (const pattern of journalPatterns) { const match = authors.match(pattern); if (match) { journal = match[1].trim(); break; } } // Quality filter - only include substantial results if (title && title.length > 10 && title.length < 500) { results.push({ title: title.substring(0, 500), // Limit title length authors: authors.substring(0, 300), // Limit authors length abstract: abstract.substring(0, 1000), // Limit abstract length journal: journal.substring(0, 200), // Limit journal length year, citations: citations.substring(0, 100), // Limit citations length url: url.substring(0, 500), // Limit URL length }); } } catch (error) { console.error("Error processing article element:", error); // Skip this iteration } }); return results; }); } catch (error) { console.error("Error scraping Google Scholar:", error); return []; } finally { if (browser) { await browser.close(); } } }
- src/index.ts:173-189 (registration)MCP server.tool registration for 'search-google-scholar', including input schema (zod) and wrapper handler calling the core implementationserver.tool( "search-google-scholar", "Search for academic research articles using Google Scholar", { query: z .string() .describe("Academic topic or research query to search for"), }, async ({ query }) => { try { const articles = await searchGoogleScholar(query); return formatGoogleScholarArticles(articles, query); } catch (error: any) { return createErrorResponse("searching Google Scholar", error); } }, );
- src/types.ts:59-69 (schema)TypeScript type definition for GoogleScholarArticle, defining the output structure from the searchGoogleScholar handlerexport type GoogleScholarArticle = { title: string; authors?: string; abstract?: string; journal?: string; year?: string; citations?: string; url?: string; pdf_url?: string; related_articles?: string[]; };
- src/utils.ts:523-538 (helper)Helper function to format searchGoogleScholar results into a formatted MCP text responseexport function formatGoogleScholarArticles(articles: any[], query: string) { if (articles.length === 0) { return createMCPResponse( `No academic articles found for "${query}". This could be due to no results matching your query, rate limiting, or network issues.`, ); } let result = `**Academic Research Search: "${query}"**\n\n`; result += `Found ${articles.length} article(s)\n\n`; articles.forEach((article, index) => { result += formatArticleItem(article, index); }); return createMCPResponse(result); }
- src/constants.ts:6-6 (helper)Constant defining the Google Scholar base URL used in searchGoogleScholarexport const GOOGLE_SCHOLAR_API_BASE = "https://scholar.google.com/scholar";