Skip to main content
Glama
search.ts22.7 kB
import { chromium, devices, BrowserContextOptions, Browser, Response } from "playwright"; import { SearchResponse, SearchResult, CommandOptions } from "./types.js"; import * as fs from "fs"; import * as path from "path"; import * as os from "os"; import logger from "./logger.js"; // 指纹配置接口 interface FingerprintConfig { deviceName: string; locale: string; timezoneId: string; colorScheme: "dark" | "light"; reducedMotion: "reduce" | "no-preference"; forcedColors: "active" | "none"; } // 保存的状态文件接口 interface SavedState { fingerprint?: FingerprintConfig; googleDomain?: string; } /** * 获取宿主机器的实际配置 * @param userLocale 用户指定的区域设置(如果有) * @returns 基于宿主机器的指纹配置 */ function getHostMachineConfig(userLocale?: string): FingerprintConfig { // 获取系统区域设置 const systemLocale = userLocale || process.env.LANG || "zh-CN"; // 获取系统时区 // Node.js 不直接提供时区信息,但可以通过时区偏移量推断 const timezoneOffset = new Date().getTimezoneOffset(); let timezoneId = "Asia/Shanghai"; // 默认使用上海时区 // 根据时区偏移量粗略推断时区 // 时区偏移量是以分钟为单位,与UTC的差值,负值表示东区 if (timezoneOffset <= -480 && timezoneOffset > -600) { // UTC+8 (中国、新加坡、香港等) timezoneId = "Asia/Shanghai"; } else if (timezoneOffset <= -540) { // UTC+9 (日本、韩国等) timezoneId = "Asia/Tokyo"; } else if (timezoneOffset <= -420 && timezoneOffset > -480) { // UTC+7 (泰国、越南等) timezoneId = "Asia/Bangkok"; } else if (timezoneOffset <= 0 && timezoneOffset > -60) { // UTC+0 (英国等) timezoneId = "Europe/London"; } else if (timezoneOffset <= 60 && timezoneOffset > 0) { // UTC-1 (欧洲部分地区) timezoneId = "Europe/Berlin"; } else if (timezoneOffset <= 300 && timezoneOffset > 240) { // UTC-5 (美国东部) timezoneId = "America/New_York"; } // 检测系统颜色方案 const hour = new Date().getHours(); const colorScheme = hour >= 19 || hour < 7 ? ("dark" as const) : ("light" as const); // 其他设置使用合理的默认值 const reducedMotion = "no-preference" as const; const forcedColors = "none" as const; // 直接使用 Chrome 作为设备名称 const deviceName = "Desktop Chrome"; return { deviceName, locale: systemLocale, timezoneId, colorScheme, reducedMotion, forcedColors, }; } /** * 执行Google搜索并返回结果 * @param query 搜索关键词 * @param options 搜索选项 * @returns 搜索结果 */ export async function googleSearch( query: string, options: CommandOptions = {}, existingBrowser?: Browser ): Promise<SearchResponse> { // 设置默认选项 const { limit = 10, timeout = 60000, stateFile = path.join(os.homedir(), ".google-search-browser-state.json"), noSaveState = false, locale = "zh-CN", // 默认使用中文 region = "cn", // 默认使用中国区域 } = options; // 状态文件路径 const stateFilePath = path.resolve(stateFile); const fingerprintFilePath = stateFilePath.replace( ".json", "-fingerprint.json" ); // 加载保存的状态 let savedState: SavedState = {}; let fingerprint: FingerprintConfig = getHostMachineConfig(locale); // 尝试加载指纹配置 try { if (fs.existsSync(fingerprintFilePath)) { const fingerprintData = fs.readFileSync(fingerprintFilePath, "utf-8"); fingerprint = JSON.parse(fingerprintData); logger.info("已加载浏览器指纹配置"); } else { // 保存新生成的指纹配置 fs.writeFileSync( fingerprintFilePath, JSON.stringify(fingerprint, null, 2) ); logger.info("已生成并保存新的浏览器指纹配置"); } } catch (error) { logger.warn("加载或保存浏览器指纹配置时出错,使用默认配置"); } // 尝试加载保存的状态 try { if (fs.existsSync(stateFilePath)) { const stateData = fs.readFileSync(stateFilePath, "utf-8"); savedState = JSON.parse(stateData); logger.info("已加载保存的状态"); } } catch (error) { logger.warn("加载保存的状态时出错,将使用新会话"); } // 获取 Google 域名 const googleDomain = savedState.googleDomain || `www.google.${region}`; // 忽略传入的headless参数,总是以无头模式启动 let useHeadless = true; logger.info({ options }, "正在初始化浏览器..."); // 检查是否存在状态文件 let storageState: string | undefined = undefined; if (fs.existsSync(stateFilePath)) { logger.info( { stateFile }, "发现浏览器状态文件,将使用保存的浏览器状态以避免反机器人检测" ); storageState = stateFilePath; } else { logger.info( { stateFile }, "未找到浏览器状态文件,将创建新的浏览器会话和指纹" ); } // 获取随机延迟时间 const getRandomDelay = (min: number, max: number) => { return Math.floor(Math.random() * (max - min + 1)) + min; }; // 定义一个函数来执行搜索,可以重用于无头和有头模式 async function performSearch(headless: boolean): Promise<SearchResponse> { let browser: Browser; let browserWasProvided = false; if (existingBrowser) { browser = existingBrowser; browserWasProvided = true; logger.info("使用已存在的浏览器实例"); } else { logger.info( { headless }, `准备以${headless ? "无头" : "有头"}模式启动浏览器...` ); // 初始化浏览器,添加更多参数以避免检测 browser = await chromium.launch({ headless, timeout: timeout * 2, // 增加浏览器启动超时时间 args: [ "--disable-blink-features=AutomationControlled", "--disable-features=IsolateOrigins,site-per-process", "--disable-site-isolation-trials", "--disable-web-security", "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--disable-gpu", "--hide-scrollbars", "--mute-audio", "--disable-background-networking", "--disable-background-timer-throttling", "--disable-backgrounding-occluded-windows", "--disable-breakpad", "--disable-component-extensions-with-background-pages", "--disable-extensions", "--disable-features=TranslateUI", "--disable-ipc-flooding-protection", "--disable-renderer-backgrounding", "--enable-features=NetworkService,NetworkServiceInProcess", "--force-color-profile=srgb", "--metrics-recording-only", ], ignoreDefaultArgs: ["--enable-automation"], }); logger.info("浏览器已成功启动!"); } // 使用统一的 Chrome 设备配置 const deviceConfig = devices["Desktop Chrome"]; // 创建浏览器上下文选项 let contextOptions: BrowserContextOptions = { ...deviceConfig, }; // 如果有保存的指纹配置,使用它;否则使用宿主机器的实际设置 if (savedState.fingerprint) { contextOptions = { ...contextOptions, locale: savedState.fingerprint.locale, timezoneId: savedState.fingerprint.timezoneId, colorScheme: savedState.fingerprint.colorScheme, reducedMotion: savedState.fingerprint.reducedMotion, forcedColors: savedState.fingerprint.forcedColors, }; logger.info("使用保存的浏览器指纹配置"); } else { // 获取宿主机器的实际设置 const hostConfig = getHostMachineConfig(locale); contextOptions = { ...contextOptions, locale: hostConfig.locale, timezoneId: hostConfig.timezoneId, colorScheme: hostConfig.colorScheme, reducedMotion: hostConfig.reducedMotion, forcedColors: hostConfig.forcedColors, }; // 保存新生成的指纹配置 savedState.fingerprint = hostConfig; logger.info( { locale: hostConfig.locale, timezone: hostConfig.timezoneId, colorScheme: hostConfig.colorScheme, deviceType: hostConfig.deviceName, }, "已根据宿主机器生成新的浏览器指纹配置" ); } // 添加通用选项 - 确保使用桌面配置 contextOptions = { ...contextOptions, permissions: ["geolocation", "notifications"], acceptDownloads: true, isMobile: false, // 强制使用桌面模式 hasTouch: false, // 禁用触摸功能 javaScriptEnabled: true, }; if (storageState) { logger.info("正在加载保存的浏览器状态..."); } const context = await browser.newContext( storageState ? { ...contextOptions, storageState } : contextOptions ); // 设置额外的浏览器属性以避免检测 await context.addInitScript(() => { // 覆盖 navigator 属性 Object.defineProperty(navigator, "webdriver", { get: () => false }); Object.defineProperty(navigator, "plugins", { get: () => [1, 2, 3, 4, 5], }); Object.defineProperty(navigator, "languages", { get: () => ["en-US", "en", "zh-CN"], }); // 覆盖 window 属性 // @ts-ignore - 忽略 chrome 属性不存在的错误 window.chrome = { runtime: {}, loadTimes: function () {}, csi: function () {}, app: {}, }; // 添加 WebGL 指纹随机化 if (typeof WebGLRenderingContext !== "undefined") { const getParameter = WebGLRenderingContext.prototype.getParameter; WebGLRenderingContext.prototype.getParameter = function ( parameter: number ) { // 随机化 UNMASKED_VENDOR_WEBGL 和 UNMASKED_RENDERER_WEBGL if (parameter === 37445) { return "Intel Inc."; } if (parameter === 37446) { return "Intel Iris OpenGL Engine"; } return getParameter.call(this, parameter); }; } }); const page = await context.newPage(); // 设置页面额外属性 await page.addInitScript(() => { // 模拟真实的屏幕尺寸和颜色深度 Object.defineProperty(window.screen, "width", { get: () => 1920 }); Object.defineProperty(window.screen, "height", { get: () => 1080 }); Object.defineProperty(window.screen, "colorDepth", { get: () => 24 }); Object.defineProperty(window.screen, "pixelDepth", { get: () => 24 }); }); try { logger.info("正在访问Google搜索页面..."); // 统一使用 www.google.com 作为域名 const selectedDomain = "www.google.com"; // 保存选择的域名 savedState.googleDomain = selectedDomain; // 构建搜索URL const searchUrl = `https://${selectedDomain}/search?q=${encodeURIComponent( query )}&hl=${locale}`; logger.info({ url: searchUrl, query, locale }, "正在访问Google搜索页面"); // 尝试访问Google搜索页面,带重试机制 let response: Response | null = null; let retryCount = 0; const maxRetries = 3; while (retryCount < maxRetries) { try { // 访问Google搜索页面 response = await page.goto(searchUrl, { timeout: timeout * 2, // 增加超时时间 waitUntil: "domcontentloaded", // 改用 domcontentloaded 而不是 networkidle }); // 如果成功加载页面,跳出循环 if (response && response.ok()) { logger.info("页面加载成功"); break; } logger.warn({ status: response?.status(), url: response?.url(), retry: retryCount + 1 }, "页面加载不成功,准备重试"); // 等待一段时间后重试 await page.waitForTimeout(2000); retryCount++; } catch (error) { logger.error({ error: error instanceof Error ? error.message : String(error), retry: retryCount + 1 }, "页面加载出错"); // 等待一段时间后重试 await page.waitForTimeout(2000); retryCount++; } } // 如果所有重试都失败,抛出错误 if (retryCount >= maxRetries && (!response || !response.ok())) { throw new Error(`无法加载Google搜索页面,已重试${maxRetries}次`); } // 检查是否被重定向到人机验证页面 const currentUrl = page.url(); logger.info({ currentUrl }, "当前页面URL"); const sorryPatterns = [ "google.com/sorry/index", "google.com/sorry", "recaptcha", "captcha", "unusual traffic", ]; const isBlockedPage = sorryPatterns.some( (pattern) => currentUrl.includes(pattern) || (response && response.url().includes(pattern)) ); if (isBlockedPage) { logger.warn("检测到人机验证页面"); if (headless) { // 在无头模式下,转为有头模式重试 await page.close(); await context.close(); if (!browserWasProvided) { await browser.close(); return performSearch(false); // 以有头模式重新执行搜索 } throw new Error("检测到人机验证页面,请尝试有头模式或手动验证"); } else { logger.warn("请在浏览器中完成验证..."); throw new Error("检测到人机验证页面,需要手动完成验证"); } } // 检查URL是否已经包含搜索查询 const isSearchResultPage = currentUrl.includes("/search") && currentUrl.includes("q="); // 如果已经是搜索结果页面,跳过输入搜索关键词的步骤 if (isSearchResultPage) { logger.info({ currentUrl }, "已经在搜索结果页面,跳过输入搜索关键词的步骤"); } else { logger.info({ query }, "正在输入搜索关键词"); // 等待搜索框出现 - 尝试多个可能的选择器 const searchInputSelectors = [ "textarea[name='q']", "input[name='q']", "textarea[title='Search']", "input[title='Search']", "textarea[aria-label='Search']", "input[aria-label='Search']", "textarea[aria-label='搜索']", "input[aria-label='搜索']", "#search-box", "#searchform input", "#searchbox", ".gLFyf", "textarea", "input[type='text']" ]; // 尝试等待搜索框出现 try { const selector = searchInputSelectors.join(','); logger.debug({ selector }, "等待搜索框选择器"); // 使用更短的超时时间,避免长时间等待 await page.waitForSelector(selector, { timeout: 10000 }); logger.info({ selector }, "搜索框已出现"); } catch (error) { // 处理 error 为 unknown 类型的情况 const errorMessage = error instanceof Error ? error.message : String(error); logger.warn({ error: errorMessage }, "等待搜索框出现超时,将尝试直接查找"); } let searchInput = null; for (const selector of searchInputSelectors) { logger.debug({ selector }, "尝试查找搜索框"); searchInput = await page.$(selector); if (searchInput) { logger.info({ selector }, "找到搜索框"); break; } logger.debug({ selector }, "未找到搜索框"); } if (!searchInput) { // 分析页面内容 logger.info("分析页面内容以查找问题..."); // 获取页面标题 const title = await page.title(); logger.info({ title }, "页面标题"); // 检查页面是否包含特定文本 const pageContent = await page.content(); const containsRecaptcha = pageContent.includes("recaptcha") || pageContent.includes("captcha"); const containsRobot = pageContent.includes("robot") || pageContent.includes("automated"); const containsError = pageContent.includes("error") || pageContent.includes("sorry"); logger.info({ containsRecaptcha, containsRobot, containsError, url: page.url() }, "页面内容分析"); // 获取所有可见的输入元素 const inputElements = await page.$$eval('input, textarea', elements => { return elements.map(el => ({ type: el.tagName, id: el.id, name: (el as HTMLInputElement | HTMLTextAreaElement).name || '', class: el.className, placeholder: (el as HTMLInputElement | HTMLTextAreaElement).placeholder || '', visible: (el as HTMLElement).offsetWidth > 0 && (el as HTMLElement).offsetHeight > 0 })); }); logger.info({ inputElements }, "页面上的输入元素"); // 保存页面截图以便调试 const screenshotPath = path.join(os.tmpdir(), `google-search-error-${Date.now()}.png`); try { await page.screenshot({ path: screenshotPath, fullPage: true }); logger.error({ screenshotPath }, "已保存页面截图"); } catch (screenshotError) { logger.error({ error: screenshotError }, "保存截图失败"); } // 保存页面HTML以便调试 const htmlPath = path.join(os.tmpdir(), `google-search-error-${Date.now()}.html`); try { const html = await page.content(); fs.writeFileSync(htmlPath, html); logger.error({ htmlPath }, "已保存页面HTML"); } catch (htmlError) { logger.error({ error: htmlError }, "保存HTML失败"); } logger.error("无法找到搜索框"); throw new Error("无法找到搜索框"); } // 直接点击搜索框,减少延迟 await searchInput.click(); // 直接输入整个查询字符串,而不是逐个字符输入 await page.keyboard.type(query, { delay: getRandomDelay(10, 30) }); // 减少按回车前的延迟 await page.waitForTimeout(getRandomDelay(100, 300)); await page.keyboard.press("Enter"); logger.info("正在等待页面加载完成..."); // 等待页面加载完成 await page.waitForLoadState("domcontentloaded", { timeout }); } logger.info({ url: page.url() }, "正在等待搜索结果加载..."); // 等待搜索结果加载 try { await page.waitForSelector("#search, #rso, .g, [data-sokoban-container], div[role='main']", { timeout: timeout / 2 }); logger.info("搜索结果已加载"); } catch (error) { logger.error("无法找到搜索结果元素"); throw new Error("无法找到搜索结果元素"); } // 减少等待时间 await page.waitForTimeout(500); logger.info("正在提取搜索结果..."); // 提取搜索结果 const results = await page.$$eval( ".g, [data-sokoban-container] > div", (elements, maxResults) => { return elements .slice(0, maxResults) .map((el) => { const titleElement = el.querySelector("h3"); const linkElement = el.querySelector("a"); const snippetElement = el.querySelector(".VwiC3b, [data-sncf='1']"); return { title: titleElement ? titleElement.textContent || "" : "", link: linkElement && linkElement instanceof HTMLAnchorElement ? linkElement.href : "", snippet: snippetElement ? snippetElement.textContent || "" : "", }; }) .filter((item) => item.title && item.link); // 过滤掉空结果 }, limit ); logger.info({ count: results.length }, "成功获取到搜索结果"); try { // 保存浏览器状态(除非用户指定了不保存) if (!noSaveState) { logger.info({ stateFile }, "正在保存浏览器状态..."); // 确保目录存在 const stateDir = path.dirname(stateFilePath); if (!fs.existsSync(stateDir)) { fs.mkdirSync(stateDir, { recursive: true }); } // 保存状态 await context.storageState({ path: stateFilePath }); // 保存指纹配置 fs.writeFileSync( fingerprintFilePath, JSON.stringify(savedState, null, 2), "utf8" ); logger.info("浏览器状态和指纹配置已保存"); } } catch (error) { logger.error({ error: error instanceof Error ? error.message : String(error) }, "保存状态时发生错误"); } // 关闭浏览器(如果不是外部提供的) if (!browserWasProvided) { await browser.close(); } // 返回搜索结果 return { query, results, language: locale, region }; } catch (error) { logger.error({ error: error instanceof Error ? error.message : String(error) }, "搜索过程中发生错误"); // 尝试关闭资源 try { if (!browserWasProvided && browser) { await browser.close(); } } catch (closeError) { logger.error({ error: closeError instanceof Error ? closeError.message : String(closeError) }, "关闭浏览器时发生错误"); } // 返回错误结果 return { query, results: [ { title: "搜索失败", link: "", snippet: `无法完成搜索,错误信息: ${error instanceof Error ? error.message : String(error)}`, }, ], language: locale, region, }; } } // 执行搜索,返回结果 return performSearch(useHeadless); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/modelcontextprotocol-servers/google-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server