DuckDuckGo Search MCP

Overview Schema Related Servers Score Discussions

search.js•11.7 KiB

import axios from 'axios'; import * as cheerio from 'cheerio'; import https from 'https'; // Constants const RESULTS_PER_PAGE = 10; const MAX_CACHE_PAGES = 5; // Rotating User Agents const USER_AGENTS = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Edge/120.0.0.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ]; // Cache results to avoid repeated requests const resultsCache = new Map(); const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes // HTTPS agent configuration to handle certificate chain issues const httpsAgent = new https.Agent({ rejectUnauthorized: true, // Keep security enabled keepAlive: true, timeout: 10000, // Provide fallback for certificate issues while maintaining security secureProtocol: 'TLSv1_2_method' }); /** * Get a random user agent from the list * @returns {string} A random user agent string */ function getRandomUserAgent() { return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; } /** * Generate a cache key for a search query and page * @param {string} query - The search query * @param {number} page - The page number * @returns {string} The cache key */ function getCacheKey(query, page) { return `${query}-${page}`; } /** * Clear old entries from the cache */ function clearOldCache() { const now = Date.now(); for (const [key, value] of resultsCache.entries()) { if (now - value.timestamp > CACHE_DURATION) { resultsCache.delete(key); } } } /** * Extract the direct URL from a DuckDuckGo redirect URL * @param {string} duckduckgoUrl - The DuckDuckGo URL to extract from * @returns {string} The direct URL */ function extractDirectUrl(duckduckgoUrl) { try { // Handle relative URLs from DuckDuckGo if (duckduckgoUrl.startsWith('//')) { duckduckgoUrl = 'https:' + duckduckgoUrl; } else if (duckduckgoUrl.startsWith('/')) { duckduckgoUrl = 'https://duckduckgo.com' + duckduckgoUrl; } const url = new URL(duckduckgoUrl); // Extract direct URL from DuckDuckGo redirect if (url.hostname === 'duckduckgo.com' && url.pathname === '/l/') { const uddg = url.searchParams.get('uddg'); if (uddg) { return decodeURIComponent(uddg); } } // Handle ad redirects if (url.hostname === 'duckduckgo.com' && url.pathname === '/y.js') { const u3 = url.searchParams.get('u3'); if (u3) { try { const decodedU3 = decodeURIComponent(u3); const u3Url = new URL(decodedU3); const clickUrl = u3Url.searchParams.get('ld'); if (clickUrl) { return decodeURIComponent(clickUrl); } return decodedU3; } catch { return duckduckgoUrl; } } } return duckduckgoUrl; } catch { // If URL parsing fails, try to extract URL from a basic string match const urlMatch = duckduckgoUrl.match(/https?:\/\/[^\s<>"]+/); if (urlMatch) { return urlMatch[0]; } return duckduckgoUrl; } } /** * Get a favicon URL for a given website URL * @param {string} url - The website URL * @returns {string} The favicon URL */ function getFaviconUrl(url) { try { const urlObj = new URL(url); return `https://www.google.com/s2/favicons?domain=${urlObj.hostname}&sz=32`; } catch { return ''; // Return empty string if URL is invalid } } /** * Scrapes search results from DuckDuckGo HTML * @param {string} query - The search query * @param {number} page - The page number (default: 1) * @param {number} numResults - Number of results to return (default: 10) * @returns {Promise<Array>} - Array of search results */ async function searchDuckDuckGo(query, page = 1, numResults = 10) { try { // Clear old cache entries clearOldCache(); // Calculate start index for pagination const startIndex = (page - 1) * RESULTS_PER_PAGE; // Check cache first const cacheKey = getCacheKey(query, page); const cachedResults = resultsCache.get(cacheKey); if (cachedResults && Date.now() - cachedResults.timestamp < CACHE_DURATION) { return cachedResults.results.slice(0, numResults); } // Get a random user agent const userAgent = getRandomUserAgent(); // Fetch results const response = await axios.get( `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}&s=${startIndex}`, { headers: { 'User-Agent': userAgent }, httpsAgent: httpsAgent } ); if (response.status !== 200) { throw new Error('Failed to fetch search results'); } const html = response.data; // Parse results using cheerio const $ = cheerio.load(html); const results = []; $('.result').each((i, result) => { const $result = $(result); const titleEl = $result.find('.result__title a'); const linkEl = $result.find('.result__url'); const snippetEl = $result.find('.result__snippet'); const title = titleEl.text()?.trim(); const rawLink = titleEl.attr('href'); const description = snippetEl.text()?.trim(); const displayUrl = linkEl.text()?.trim(); const directLink = extractDirectUrl(rawLink || ''); const favicon = getFaviconUrl(directLink); if (title && directLink) { results.push({ title, url: directLink, snippet: description || '', favicon: favicon, displayUrl: displayUrl || '' }); } }); // Get paginated results const paginatedResults = results.slice(0, numResults); // Cache the results resultsCache.set(cacheKey, { results: paginatedResults, timestamp: Date.now() }); // If cache is too big, remove oldest entries if (resultsCache.size > MAX_CACHE_PAGES) { const oldestKey = Array.from(resultsCache.keys())[0]; resultsCache.delete(oldestKey); } return paginatedResults; } catch (error) { console.error('Error searching DuckDuckGo:', error.message); throw error; } } /** * Fetches the content of a URL and returns it as text * @param {string} url - The URL to fetch * @param {Object} options - Options for content extraction * @param {boolean} options.extractMainContent - Whether to attempt to extract main content (default: true) * @param {boolean} options.includeLinks - Whether to include link text (default: true) * @param {boolean} options.includeImages - Whether to include image alt text (default: true) * @param {string[]} options.excludeTags - Tags to exclude from extraction * @returns {Promise<string>} - The content of the URL */ async function fetchUrlContent(url, options = {}) { try { // Default options const { extractMainContent = true, includeLinks = true, includeImages = true, excludeTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'nav', 'footer', 'header', 'aside'] } = options; // Get a random user agent const userAgent = getRandomUserAgent(); const response = await axios.get(url, { headers: { 'User-Agent': userAgent }, timeout: 10000, // 10 second timeout httpsAgent: httpsAgent }); if (response.status !== 200) { throw new Error(`Failed to fetch URL: ${url}`); } // If the content is HTML, extract the text content const contentType = response.headers['content-type'] || ''; if (contentType.includes('text/html')) { const $ = cheerio.load(response.data); // Remove unwanted elements excludeTags.forEach(tag => { $(tag).remove(); }); // Remove ads and other common unwanted elements const unwantedSelectors = [ '[id*="ad"]', '[class*="ad"]', '[id*="banner"]', '[class*="banner"]', '[id*="popup"]', '[class*="popup"]', '[class*="cookie"]', '[id*="cookie"]', '[class*="newsletter"]', '[id*="newsletter"]', '[class*="social"]', '[id*="social"]', '[class*="share"]', '[id*="share"]' ]; unwantedSelectors.forEach(selector => { try { $(selector).remove(); } catch (e) { // Ignore invalid selectors } }); // Handle links and images if (!includeLinks) { $('a').each((i, link) => { $(link).replaceWith($(link).text()); }); } if (!includeImages) { $('img').remove(); } else { // Replace images with their alt text $('img').each((i, img) => { const alt = $(img).attr('alt'); if (alt) { $(img).replaceWith(`[Image: ${alt}]`); } else { $(img).remove(); } }); } // Try to extract main content if requested if (extractMainContent) { // Common content selectors in order of priority const contentSelectors = [ 'article', 'main', '[role="main"]', '.post-content', '.article-content', '.content', '#content', '.post', '.article', '.entry-content', '.page-content', '.post-body', '.post-text', '.story-body' ]; for (const selector of contentSelectors) { const mainContent = $(selector).first(); if (mainContent.length > 0) { // Clean up the content return cleanText(mainContent.text()); } } } // If no main content found or not requested, use the body return cleanText($('body').text()); } // For non-HTML content, return as is return response.data.toString(); } catch (error) { console.error('Error fetching URL content:', error.message); throw error; } } /** * Cleans up text by removing excessive whitespace and normalizing line breaks * @param {string} text - The text to clean * @returns {string} - The cleaned text */ function cleanText(text) { return text .replace(/\s+/g, ' ') // Replace multiple whitespace with single space .replace(/\n\s*\n/g, '\n\n') // Normalize multiple line breaks .replace(/^\s+|\s+$/g, '') // Trim start and end .trim(); } /** * Extracts metadata from a URL (title, description, etc.) * @param {string} url - The URL to extract metadata from * @returns {Promise<Object>} - The metadata */ async function extractUrlMetadata(url) { try { // Get a random user agent const userAgent = getRandomUserAgent(); const response = await axios.get(url, { headers: { 'User-Agent': userAgent }, httpsAgent: httpsAgent }); if (response.status !== 200) { throw new Error(`Failed to fetch URL: ${url}`); } const $ = cheerio.load(response.data); // Extract metadata const title = $('title').text() || ''; const description = $('meta[name="description"]').attr('content') || $('meta[property="og:description"]').attr('content') || ''; const ogImage = $('meta[property="og:image"]').attr('content') || ''; const favicon = $('link[rel="icon"]').attr('href') || $('link[rel="shortcut icon"]').attr('href') || ''; // Resolve relative URLs const resolvedFavicon = favicon ? new URL(favicon, url).href : getFaviconUrl(url); const resolvedOgImage = ogImage ? new URL(ogImage, url).href : ''; return { title, description, ogImage: resolvedOgImage, favicon: resolvedFavicon, url }; } catch (error) { console.error('Error extracting URL metadata:', error.message); throw error; } } export { searchDuckDuckGo, fetchUrlContent, extractUrlMetadata, extractDirectUrl, getFaviconUrl };

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OEvortex/ddg_search'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

search.js•11.7 KiB