Skip to main content
Glama
google.ts4.91 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ import type { Page } from '../../browser/types.js'; import type { SearchEngineAdapter, SearchResult } from '../types.js'; /** * Google search engine adapter implementation. * Provides functionality to generate Google search URLs and extract search results from Google search pages. */ export class GoogleSearchEngine implements SearchEngineAdapter { /** * Generates a Google search URL based on the provided query and options. * * @param query - The search query string * @param options - Search configuration options * @param options.count - Number of search results to request (default: 10) * @param options.excludeDomains - Array of domain names to exclude from search results * @returns Formatted Google search URL as a string */ getSearchUrl( query: string, options: { count?: number; excludeDomains?: string[]; }, ): string { const searchParams = new URLSearchParams({ q: `${ options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(' ')} ` : '' }${query}`, num: `${options.count || 10}`, }); searchParams.set('udm', '14'); return `https://www.google.com/search?${searchParams.toString()}`; } /** * Extracts search results from a Google search page. * * @param window - The browser window object containing the loaded Google search page * @returns Array of search results extracted from the page */ extractSearchResults(window: Window): SearchResult[] { const links: SearchResult[] = []; const document = window.document; /** * Validates if a string is a properly formatted URL. * * @param url - The URL string to validate * @returns Boolean indicating if the URL is valid */ const isValidUrl = (url: string) => { try { new URL(url); return true; } catch (error) { return false; } }; /** * Extracts the snippet text from an element by cloning it and removing title elements * * @param element - The search result element * @returns The extracted snippet text */ const extractSnippet = (element: Element): string => { // Clone the element to avoid modifying the original DOM const clone = element.cloneNode(true) as Element; // Remove title elements (typically h3 tags in Google) const titleElements = clone.querySelectorAll('h3'); titleElements.forEach((el) => el.remove()); // Remove any cite elements (showing the URL) const citeElements = clone.querySelectorAll('cite'); citeElements.forEach((el) => el.remove()); // Remove script and style elements const scriptElements = clone.querySelectorAll('script, style'); scriptElements.forEach((el) => el.remove()); // Get text content and remove duplicates const text = Array.from(clone.querySelectorAll('*')) .filter((node) => node.textContent?.trim()) .map((node) => node.textContent?.trim()) .filter(Boolean) .reduce((acc: string[], curr) => { // Only add text if it's not already included in accumulated text if ( !acc.some( (text) => text.includes(curr as string) || (curr as string).includes(text), ) ) { acc.push(curr as string); } return acc; }, []) .join(' ') .trim() .replace(/\s+/g, ' '); return text; }; try { // Google search results are contained in elements with class 'tF2Cxc' // It may change at any time const elements = document.querySelectorAll('.tF2Cxc'); elements.forEach((element) => { const titleEl = element.querySelector('h3'); const urlEl = element.querySelector('a'); const url = urlEl?.getAttribute('href'); // Extract snippet using the generic method const snippet = extractSnippet(element.parentElement || element); if (!url || !isValidUrl(url)) return; const item: SearchResult = { title: titleEl?.textContent || '', url, snippet, content: '', }; if (!item.title || !item.url) return; links.push(item); }); } catch (error) { console.error(error); } return links; } /** * Waits for Google search results to load completely. * * @param page - The Puppeteer page object * @returns Promise that resolves when search results are loaded */ async waitForSearchResults(page: Page, timeout?: number): Promise<void> { await page.waitForSelector('#search', { timeout: timeout ?? 10000, }); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yokingma/one-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server