Skip to main content
Glama

search-google-scholar

Search for academic research articles using Google Scholar to find relevant medical and scientific publications for research purposes.

Instructions

Search for academic research articles using Google Scholar

Input Schema

NameRequiredDescriptionDefault
queryYesAcademic topic or research query to search for

Input Schema (JSON Schema)

{ "properties": { "query": { "description": "Academic topic or research query to search for", "type": "string" } }, "required": [ "query" ], "type": "object" }

Implementation Reference

  • Core handler function that uses Puppeteer to scrape Google Scholar search results, extracts article data with anti-detection measures, and returns structured GoogleScholarArticle[]
    export async function searchGoogleScholar( query: string, ): Promise<GoogleScholarArticle[]> { let browser; try { console.log(`🔍 Scraping Google Scholar for: ${query}`); // Add random delay to avoid rate limiting await randomDelay(2000, 5000); // Enhanced browser configuration for better anti-detection browser = await puppeteer.launch({ headless: true, args: [ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--disable-gpu", "--disable-web-security", "--disable-features=VizDisplayCompositor", "--disable-blink-features=AutomationControlled", "--disable-extensions", "--disable-plugins", "--disable-images", "--disable-javascript", "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", ], }); const page = await browser.newPage(); // Enhanced stealth configuration await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, "webdriver", { get: () => undefined, }); }); // Random viewport size const viewports = [ { width: 1920, height: 1080 }, { width: 1366, height: 768 }, { width: 1440, height: 900 }, { width: 1536, height: 864 }, ]; const randomViewport = viewports[Math.floor(Math.random() * viewports.length)]; await page.setViewport(randomViewport); // Rotate user agents const userAgents = [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", ]; const randomUA = userAgents[Math.floor(Math.random() * userAgents.length)]; await page.setUserAgent(randomUA); // Enhanced headers await page.setExtraHTTPHeaders({ "Accept-Language": "en-US,en;q=0.9,es;q=0.8", "Accept-Encoding": "gzip, deflate, br", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", "Cache-Control": "no-cache", Pragma: "no-cache", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", }); // Navigate to Google Scholar with enhanced query const searchUrl = `${GOOGLE_SCHOLAR_API_BASE}?q=${encodeURIComponent(query)}&hl=en&as_sdt=0%2C5&as_ylo=2020`; await page.goto(searchUrl, { waitUntil: "networkidle2", timeout: 45000, }); // Wait for results with multiple fallback selectors try { await page.waitForSelector(".gs_r, .gs_ri, .gs_or, [data-rp]", { timeout: 20000, }); } catch (error) { // Try alternative selectors try { await page.waitForSelector(".g, .rc, .r", { timeout: 10000 }); } catch (error2) { console.error("No search results found or page structure changed"); return []; } } // Enhanced data extraction with better selectors return await page.evaluate(() => { const results: GoogleScholarArticle[] = []; // Multiple selector strategies for different Google Scholar layouts const selectors = [ ".gs_r, .gs_ri, .gs_or", ".g, .rc, .r", "[data-rp]", ".gs_rt, .gs_ri", ]; let articleElements: NodeListOf<Element> | null = null; for (const selector of selectors) { articleElements = document.querySelectorAll(selector); if (articleElements.length > 0) break; } if (!articleElements || articleElements.length === 0) { return results; } articleElements.forEach((element) => { try { // Enhanced title extraction const titleSelectors = [ ".gs_rt a, .gs_rt", "h3 a, h3", "a[data-clk]", ".gs_rt a", ".rc h3 a", ".r h3 a", ]; let title = ""; let url = ""; for (const selector of titleSelectors) { const titleElement = element.querySelector(selector); if (titleElement) { title = titleElement.textContent?.trim() || ""; url = (titleElement as HTMLAnchorElement)?.href || ""; if (title) break; } } // Enhanced authors/venue extraction const authorSelectors = [ ".gs_a, .gs_authors, .gs_venue", '[class*="author"]', '[class*="venue"]', ".gs_a", ".rc .s", ".r .s", ]; let authors = ""; for (const selector of authorSelectors) { const authorElement = element.querySelector(selector); if (authorElement) { authors = authorElement.textContent?.trim() || ""; if (authors) break; } } // Enhanced abstract extraction const abstractSelectors = [ ".gs_rs, .gs_rs_a, .gs_snippet", '[class*="snippet"]', '[class*="abstract"]', ".gs_rs", ".rc .st", ".r .st", ]; let abstract = ""; for (const selector of abstractSelectors) { const abstractElement = element.querySelector(selector); if (abstractElement) { abstract = abstractElement.textContent?.trim() || ""; if (abstract) break; } } // Enhanced citation extraction const citationSelectors = [ ".gs_fl a, .gs_fl", '[class*="citation"]', 'a[href*="cites"]', ".gs_fl", ".rc .f", ".r .f", ]; let citations = ""; for (const selector of citationSelectors) { const citationElement = element.querySelector(selector); if (citationElement) { citations = citationElement.textContent?.trim() || ""; if (citations) break; } } // Enhanced year extraction with better patterns let year = ""; const yearPatterns = [ /(\d{4})/g, /\((\d{4})\)/g, /(\d{4})\s*[–-]/g, /(\d{4})\s*$/g, ]; const textSources = [authors, title, abstract, citations]; for (const text of textSources) { for (const pattern of yearPatterns) { const matches = text.match(pattern); if (matches) { const years = matches .map((m) => m.replace(/\D/g, "")) .filter((y) => y.length === 4); const validYears = years.filter( (y) => parseInt(y) >= 1900 && parseInt(y) <= new Date().getFullYear() + 1, ); if (validYears.length > 0) { year = validYears[validYears.length - 1]; // Get most recent year break; } } } if (year) break; } // Enhanced journal extraction let journal = ""; const journalPatterns = [ /- ([^-]+)$/, /, ([^,]+)$/, /in ([^,]+)/, /([A-Z][^,]+(?:Journal|Review|Medicine|Health|Science|Research))/i, /([A-Z][^,]+(?:Lancet|Nature|Science|NEJM|JAMA|BMJ))/i, ]; for (const pattern of journalPatterns) { const match = authors.match(pattern); if (match) { journal = match[1].trim(); break; } } // Quality filter - only include substantial results if (title && title.length > 10 && title.length < 500) { results.push({ title: title.substring(0, 500), // Limit title length authors: authors.substring(0, 300), // Limit authors length abstract: abstract.substring(0, 1000), // Limit abstract length journal: journal.substring(0, 200), // Limit journal length year, citations: citations.substring(0, 100), // Limit citations length url: url.substring(0, 500), // Limit URL length }); } } catch (error) { console.error("Error processing article element:", error); // Skip this iteration } }); return results; }); } catch (error) { console.error("Error scraping Google Scholar:", error); return []; } finally { if (browser) { await browser.close(); } } }
  • src/index.ts:173-189 (registration)
    MCP server.tool registration for 'search-google-scholar', including input schema (zod) and wrapper handler calling the core implementation
    server.tool( "search-google-scholar", "Search for academic research articles using Google Scholar", { query: z .string() .describe("Academic topic or research query to search for"), }, async ({ query }) => { try { const articles = await searchGoogleScholar(query); return formatGoogleScholarArticles(articles, query); } catch (error: any) { return createErrorResponse("searching Google Scholar", error); } }, );
  • TypeScript type definition for GoogleScholarArticle, defining the output structure from the searchGoogleScholar handler
    export type GoogleScholarArticle = { title: string; authors?: string; abstract?: string; journal?: string; year?: string; citations?: string; url?: string; pdf_url?: string; related_articles?: string[]; };
  • Helper function to format searchGoogleScholar results into a formatted MCP text response
    export function formatGoogleScholarArticles(articles: any[], query: string) { if (articles.length === 0) { return createMCPResponse( `No academic articles found for "${query}". This could be due to no results matching your query, rate limiting, or network issues.`, ); } let result = `**Academic Research Search: "${query}"**\n\n`; result += `Found ${articles.length} article(s)\n\n`; articles.forEach((article, index) => { result += formatArticleItem(article, index); }); return createMCPResponse(result); }
  • Constant defining the Google Scholar base URL used in searchGoogleScholar
    export const GOOGLE_SCHOLAR_API_BASE = "https://scholar.google.com/scholar";

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/JamesANZ/medical-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server