Skip to main content
Glama
search.js12 kB
import { search as duckDuckGoSearch, SafeSearchType } from "duck-duck-scrape"; import { ensureSearxngRunning } from "./docker-searxng.js"; let dockerInitialized = false; let dockerAvailable = false; const USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ]; function getRandomUserAgent() { return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; } const DEFAULT_TIMEOUT = 10000; const DEFAULT_RETRIES = 2; const PROVIDERS = [ { name: "duck-duck-scrape", timeout: DEFAULT_TIMEOUT, retries: 2 }, { name: "searxng-local", timeout: 8000, retries: 1 }, { name: "scraper-api", timeout: 15000, retries: 1 }, { name: "searxng-public", timeout: 12000, retries: 1 } ]; const PUBLIC_SEARXNG_INSTANCES = [ "https://searx.work", "https://search.sapti.me", "https://paulgo.io", "https://searx.info", "https://searx.be", "https://search.bus-hit.me", "https://searx.tiekoetter.com" ]; async function delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } function log(provider, message) { console.log(`[Search] [${provider}] ${message}`); } function logError(provider, error) { const errorMsg = error instanceof Error ? error.message : String(error); console.warn(`[Search] [${provider}] Failed: ${errorMsg}`); } function isRateLimitError(error) { const message = error.message?.toLowerCase() || ""; return (message.includes("anomaly") || message.includes("too quickly") || message.includes("rate limit") || message.includes("429") || message.includes("service unavailable") || message.includes("503") || message.includes("cloudflare") || message.includes("429")); } function createAbortController(timeoutMs) { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeoutMs); return { controller, timeoutId }; } async function fetchWithRetry(url, provider, retries = 3, timeoutMs = DEFAULT_TIMEOUT, customHeaders) { for (let attempt = 0; attempt < retries; attempt++) { try { const { controller, timeoutId } = createAbortController(timeoutMs); const response = await fetch(url, { signal: controller.signal, headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", ...customHeaders } }); clearTimeout(timeoutId); if (!response.ok) { const statusError = new Error(`HTTP ${response.status}: ${response.statusText}`); if (isRateLimitError(statusError)) { throw statusError; } throw new Error(`HTTP Error: ${response.status}`); } return response; } catch (error) { const rateLimited = isRateLimitError(error); const isLastAttempt = attempt === retries - 1; const isTimeout = error instanceof Error && error.name === "AbortError"; if (isTimeout) { logError(provider, `Timeout (${timeoutMs}ms)`); } if (!rateLimited || isLastAttempt) { throw error; } const backoffMs = Math.pow(2, attempt) * 1000; log(provider, `Rate limited, retrying in ${backoffMs}ms (attempt ${attempt + 1}/${retries})`); await delay(backoffMs); } } throw new Error("Fetch attempts exhausted"); } async function searchDuckDuckGo(query, maxResults) { const MAX_RETRIES = 3; const BASE_DELAY = 2000; for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { const userAgent = getRandomUserAgent(); log("duck-duck-scrape", `Attempt ${attempt}/${MAX_RETRIES} - Searching for: "${query}"`); try { if (attempt > 1) { const delayMs = BASE_DELAY * Math.pow(2, attempt - 2); log("duck-duck-scrape", `Waiting ${delayMs}ms before retry...`); await delay(delayMs); } const searchResults = await duckDuckGoSearch(query, { safeSearch: SafeSearchType.OFF }); if (!searchResults.results || searchResults.results.length === 0) { throw new Error("No results returned"); } const formatted = searchResults.results.slice(0, maxResults).map((r) => ({ url: r.link || r.url || "", title: r.title || "", description: r.snippet || r.description || "" })); log("duck-duck-scrape", `Success - found ${formatted.length} results`); return formatted; } catch (error) { const errorMsg = error.message || ""; const isRateLimit = errorMsg.includes("anomaly") || errorMsg.includes("too quickly") || errorMsg.includes("rate limit"); if (isRateLimit && attempt < MAX_RETRIES) { log("duck-duck-scrape", `Rate limited, will retry...`); continue; } if (attempt === MAX_RETRIES) { logError("duck-duck-scrape", error); throw error; } } } throw new Error("All DuckDuckGo attempts failed"); } async function searchSearxngLocal(query, maxResults) { if (!dockerInitialized) { dockerInitialized = true; log("searxng-local", "Attempting to auto-start Docker SearXNG..."); dockerAvailable = await ensureSearxngRunning(); if (dockerAvailable) { log("searxng-local", "Docker SearXNG is ready"); } else { log("searxng-local", "Docker SearXNG not available, will try anyway..."); } } log("searxng-local", `Searching for: "${query}"`); const encodedQuery = encodeURIComponent(query); const url = `http://localhost:8080/search?q=${encodedQuery}&format=json&number_of_results=${maxResults}`; const searxngHeaders = { "X-Forwarded-For": "127.0.0.1", "X-Real-IP": "127.0.0.1" }; try { const response = await fetchWithRetry(url, "searxng-local", 1, 8000, searxngHeaders); const data = (await response.json()); if (!data.results || data.results.length === 0) { throw new Error("No results returned"); } const formatted = data.results.slice(0, maxResults).map((r) => ({ url: r.url, title: r.title, description: r.content || "" })); log("searxng-local", `Success - found ${formatted.length} results`); return formatted; } catch (error) { logError("searxng-local", error); throw error; } } async function searchScraperAPI(query, maxResults) { const apiKey = process.env.SCRAPER_API_KEY; if (!apiKey) { throw new Error("SCRAPER_API_KEY not configured"); } log("scraper-api", `Searching for: "${query}"`); const duckUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`; const scraperUrl = `http://api.scraperapi.com/?api_key=${apiKey}&url=${encodeURIComponent(duckUrl)}`; try { const response = await fetchWithRetry(scraperUrl, "scraper-api", 1, 15000); const html = await response.text(); const results = []; const resultRegex = /<a\s+class="result__a"\s+href="([^"]+)"[^>]*>([^<]+)<\/a>[\s\S]*?<a\s+class="result__snippet"[^>]*>([^<]*)<\/a>/g; let match; while ((match = resultRegex.exec(html)) !== null && results.length < maxResults) { const [, url, title, description] = match; if (url && title) { results.push({ url: url.trim(), title: title.trim(), description: (description || "").trim() }); } } if (results.length === 0) { throw new Error("No results parsed from HTML"); } log("scraper-api", `Success - found ${results.length} results`); return results.slice(0, maxResults); } catch (error) { logError("scraper-api", error); throw error; } } async function searchSearxngPublic(instance, query, maxResults) { log("searxng-public", `Trying ${instance}...`); const encodedQuery = encodeURIComponent(query); const url = `${instance}/search?q=${encodedQuery}&format=json&number_of_results=${maxResults}`; const searxngHeaders = { "X-Forwarded-For": "127.0.0.1", "X-Real-IP": "127.0.0.1" }; try { const response = await fetchWithRetry(url, `searxng-public(${instance})`, 1, 12000, searxngHeaders); const data = (await response.json()); if (!data.results || data.results.length === 0) { throw new Error("No results returned"); } const formatted = data.results.slice(0, maxResults).map((r) => ({ url: r.url, title: r.title, description: r.content || "" })); log("searxng-public", `Success with ${instance} - found ${formatted.length} results`); return formatted; } catch (error) { logError(`searxng-public(${instance})`, error); throw error; } } export async function searchWeb(query, maxResults = 5) { log("orchestrator", "Starting fallback chain..."); const errors = []; try { log("orchestrator", "Step 1/4: Trying duck-duck-scrape (primary)..."); return await searchDuckDuckGo(query, maxResults); } catch (e) { const errorMsg = e instanceof Error ? e.message : String(e); errors.push({ provider: "duck-duck-scrape", error: errorMsg }); } try { log("orchestrator", "Step 2/4: Trying SearXNG local (localhost:8080)..."); return await searchSearxngLocal(query, maxResults); } catch (e) { const errorMsg = e instanceof Error ? e.message : String(e); errors.push({ provider: "searxng-local", error: errorMsg }); log("orchestrator", "Note: Docker SearXNG may not be running"); } if (process.env.SCRAPER_API_KEY) { try { log("orchestrator", "Step 3/4: Trying ScraperAPI..."); return await searchScraperAPI(query, maxResults); } catch (e) { const errorMsg = e instanceof Error ? e.message : String(e); errors.push({ provider: "scraper-api", error: errorMsg }); } } else { log("orchestrator", "Step 3/4: Skipping ScraperAPI (SCRAPER_API_KEY not configured)"); } log("orchestrator", `Step 4/4: Trying ${PUBLIC_SEARXNG_INSTANCES.length} public SearXNG instances...`); for (const instance of PUBLIC_SEARXNG_INSTANCES) { try { return await searchSearxngPublic(instance, query, maxResults); } catch (e) { const errorMsg = e instanceof Error ? e.message : String(e); errors.push({ provider: `searxng-public(${instance})`, error: errorMsg }); } } const errorSummary = errors .map((e) => `${e.provider}: ${e.error}`) .join(" | "); throw new Error(`All search providers failed. Chain: ${errorSummary}`); } //# sourceMappingURL=search.js.map

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/alucardeht/isis-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server