search
Perform a single Google search and retrieve titles, URLs, and snippets for up to 20 results. Automatically handles CAPTCHA by opening a visible Chrome window for manual solving, then retries the search.
Instructions
Single Google search. Returns title/url/snippet per result. ~2s/query (first call ~4s, includes setup). On CAPTCHA, a visible Chrome window opens for the human to solve, then the call retries.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | Search query | |
| limit | No | Max results (default 10) |
Implementation Reference
- src/search.ts:17-72 (handler)Core search handler: navigates to Google, types the query, presses Enter, waits for results, then extracts them via page.evaluate(parseResults). Returns SearchResult[] array with captcha detection and error handling.
export async function search(page: Page, query: string, limit = 10): Promise<SearchResult[]> { const url = page.url(); const onResultsPage = url.includes('/search?'); // skip redundant goto: launch already navigated home, second nav races subresources → ERR_ABORTED const onHome = url.startsWith('https://www.google.com/') && !url.includes('/search?') && !url.includes('/sorry/'); if (!onResultsPage && !onHome) { await page.goto('https://www.google.com/', { waitUntil: 'domcontentloaded', timeout: 10_000 }); await sleep(rand(80, 160)); } if (isBlocked(page.url())) throw new CaptchaError('home'); const sb = page.locator('textarea[name="q"], input[name="q"]').first(); await sb.click({ timeout: 6_000 }); await sleep(rand(30, 70)); if (onResultsPage) { await page.keyboard.press(SELECT_ALL); await page.keyboard.press('Delete'); } for (const ch of query) { await page.keyboard.type(ch, { delay: rand(8, 20) }); } await sleep(rand(50, 110)); await page.keyboard.press('Enter'); // inner 5+4+4=13s, within 30s outer let waitErr: Error | null = null; try { await page.waitForURL(/\/search\?/, { timeout: 5_000 }); await page.waitForLoadState('domcontentloaded', { timeout: 4_000 }); await page.waitForSelector('h3, #search, [id="rso"]', { timeout: 4_000 }); } catch (e) { waitErr = e as Error; } if (isBlocked(page.url())) throw new CaptchaError('after-search'); const out = (await page.evaluate(parseResults, limit)) as ParseOutput; // empty results: throw if we have a reason, otherwise return [] if (out.results.length === 0) { if (waitErr) { throw new Error(`search wait failed and no results: ${waitErr.message.slice(0, 120)}`); } // h3Count >= 5 is an arbitrary threshold; tune from prod data if (out.h3Count >= 5) { throw new Error(`parser stale: ${out.h3Count} h3 elements but 0 results extracted`); } // truly empty SERP, return [] } return out.results; } - src/index.ts:185-237 (registration)Tool registration in ListToolsRequestSchema: defines the 'search' tool name, description, and inputSchema (query string required, limit number optional with min 1 max 20).
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'search', description: 'Single Google search. Returns title/url/snippet per result. ~2s/query (first call ~4s, includes setup). On CAPTCHA, a visible Chrome window opens for the human to solve, then the call retries.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query' }, limit: { type: 'number', minimum: 1, maximum: 20, description: 'Max results (default 10)' }, }, required: ['query'], }, }, { name: 'search_parallel', description: 'Run multiple Google searches in parallel (pool of 4). Returns title/url/snippet per result. First call adds 5–10s setup.', inputSchema: { type: 'object', properties: { queries: { type: 'array', items: { type: 'string' }, minItems: 1, maxItems: 10, description: 'Queries' }, limit: { type: 'number', minimum: 1, maximum: 20, description: 'Max results per query' }, }, required: ['queries'], }, }, { name: 'extract', description: 'Fetch a URL and return clean article markdown. Uses Mozilla Readability with a text fallback. Best-effort: failures return { error } instead of throwing. Private/loopback addresses blocked unless SURF_ALLOW_PRIVATE=true.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to fetch' }, max_chars: { type: 'number', minimum: 200, maximum: 50000, description: 'Truncate content to this many chars (default 8000)' }, }, required: ['url'], }, }, { name: 'search_extract', description: 'Google search + parallel content extraction. Returns SERP results enriched with article markdown. Slower than search (extra ~2–5s) but gives you actual page content, not just snippets. Per-page failures are isolated (returned as { error } in that result).', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query' }, limit: { type: 'number', minimum: 1, maximum: 10, description: 'Number of results to extract (default 5)' }, max_chars: { type: 'number', minimum: 200, maximum: 20000, description: 'Truncate each result content (default 8000)' }, }, required: ['query'], }, }, ], })); - src/types.ts:1-5 (schema)Type definition for SearchResult: the output type returned by the search tool, containing title, url, and description fields.
export interface SearchResult { title: string; url: string; description: string; } - src/parse.ts:13-53 (helper)DOM parsing helper that runs inside page.evaluate(). Extracts organic search results (title, url, description) from Google SERP by querying h3 elements and link anchors, skipping ads and known Google domains.
export function parseResults(max: number): ParseOutput { const SKIP_HOSTS = new Set([ 'www.google.com', 'accounts.google.com', 'webcache.googleusercontent.com', 'translate.google.com', ]); const seen = new Set<string>(); const results: ParsedResult[] = []; const h3Count = document.querySelectorAll('h3').length; const blocks = document.querySelectorAll( 'div.g, div[data-snc], div[data-hveid], div[jscontroller], div.MjjYud, div.tF2Cxc', ); for (const el of Array.from(blocks)) { // skip sponsored / ads (top, bottom, inline) if ( el.matches('[data-text-ad], [data-pcu]') || el.closest('#tads, #tadsb, #bottomads, [aria-label*="Sponsored" i]') ) continue; const t = el.querySelector('h3'); const a = el.querySelector('a[href^="http"]') as HTMLAnchorElement | null; if (!t || !a) continue; const url = a.href; if (seen.has(url)) continue; let host = ''; try { host = new URL(url).hostname; } catch { continue; } if (SKIP_HOSTS.has(host)) continue; seen.add(url); const sn = el.querySelector('[data-sncf="1"]') || el.querySelector('.VwiC3b') || el.querySelector('div[style*="-webkit-line-clamp"]'); results.push({ title: (t.textContent || '').trim(), url, description: (sn?.textContent || '').trim().slice(0, 600), }); if (results.length >= max) break; } return { results, h3Count }; } - src/index.ts:250-278 (handler)CallToolRequestSchema handler for 'search': validates params, acquires browser context via getSequentialCtx(), calls the search() function with captcha fallback, and returns JSON results with elapsed timing.
if (name === 'search') { const query = String(args?.query || '').trim(); if (!query) throw new McpError(ErrorCode.InvalidParams, 'query required'); const limit = Math.min(Math.max(Number(args?.limit) || 10, 1), 20); const t0 = Date.now(); try { const results = await trackSeq(() => withCaptchaFallback( async () => { const ctx = await getSequentialCtx(); const page = await getPage(ctx); return await withTimeout(search(page, query, limit), REQUEST_TIMEOUT_MS, 'search'); }, closeSequential, )); return { content: [{ type: 'text', text: JSON.stringify({ query, results, elapsed_ms: Date.now() - t0 }, null, 2), }], }; } catch (e) { console.error('[google-surf-mcp] search error:', e); const msg = e instanceof CaptchaError ? `CAPTCHA recovery failed. Solve in opened browser or run: npm run bootstrap` : (e as Error).message; return { content: [{ type: 'text', text: `Error: ${msg}` }], isError: true }; } }