Skip to main content
Glama
bing.ts4.74 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ import type { Page } from 'puppeteer-core'; import type { SearchEngineAdapter, SearchResult } from '../types.js'; /** * Bing search engine adapter implementation. * Provides functionality to generate Bing search URLs and extract search results from Bing search pages. */ export class BingSearchEngine implements SearchEngineAdapter { /** * Generates a Bing search URL based on the provided query and options. * * @param query - The search query string * @param options - Search configuration options * @param options.count - Number of search results to request (default: 10) * @param options.excludeDomains - Array of domain names to exclude from search results * @returns Formatted Bing search URL as a string */ getSearchUrl( query: string, options: { count?: number; excludeDomains?: string[]; }, ): string { const searchParams = new URLSearchParams({ q: `${ options.excludeDomains && options.excludeDomains.length > 0 ? `${options.excludeDomains.map((domain) => `-site:${domain}`).join(' ')} ` : '' }${query}`, count: `${options.count || 10}`, }); return `https://www.bing.com/search?${searchParams.toString()}`; } /** * Extracts search results from a Bing search page. * * @param window - The browser window object containing the loaded Bing search page * @returns Array of search results extracted from the page */ extractSearchResults(window: Window): SearchResult[] { const links: SearchResult[] = []; const document = window.document; /** * Validates if a string is a properly formatted URL. * * @param url - The URL string to validate * @returns Boolean indicating if the URL is valid */ const isValidUrl = (url: string) => { try { new URL(url); return true; } catch (error) { return false; } }; /** * Extracts the snippet text from a search result element * @param element - The search result element * @returns The extracted snippet text */ const extractSnippet = (element: Element): string => { // Clone the element to avoid modifying the original DOM const clone = element.cloneNode(true) as Element; // Remove title elements (typically h2 tags in Bing) const titleElements = clone.querySelectorAll('h2'); titleElements.forEach((el) => el.remove()); // Remove any cite/URL elements const citeElements = clone.querySelectorAll('.b_attribution'); citeElements.forEach((el) => el.remove()); // Remove script and style elements const scriptElements = clone.querySelectorAll('script, style'); scriptElements.forEach((el) => el.remove()); // Get text content and remove duplicates const text = Array.from(clone.querySelectorAll('*')) .filter((node) => node.textContent?.trim()) .map((node) => node.textContent?.trim()) .filter(Boolean) .reduce((acc: string[], curr) => { // Only add text if it's not already included in accumulated text if ( !acc.some( (text) => text.includes(curr as string) || (curr as string).includes(text), ) ) { acc.push(curr as string); } return acc; }, []) .join(' ') .trim() .replace(/\s+/g, ' '); return text; }; try { // Bing search results are in elements with class 'b_algo' const elements = document.querySelectorAll('.b_algo'); elements.forEach((element) => { const titleEl = element.querySelector('h2'); const urlEl = element.querySelector('h2 a'); const url = urlEl?.getAttribute('href'); const snippet = extractSnippet(element); if (!url || !isValidUrl(url)) return; const item: SearchResult = { title: titleEl?.textContent || '', snippet, url, content: '', }; if (!item.title || !item.url) return; links.push(item); }); } catch (error) { console.error('Error extracting search results from Bing:', error); throw error; } return links; } /** * Waits for Bing search results to load completely. * * @param page - The Puppeteer page object * @returns Promise that resolves when search results are loaded */ async waitForSearchResults(page: Page, timeout?: number): Promise<void> { await page.waitForSelector('#b_results', { timeout: timeout ?? 10000, }); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yokingma/one-search-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server