Skip to main content
Glama

Model Context Protocol (MCP) Server

by infinyte
web-search.js10.2 kB
const axios = require('axios'); const cheerio = require('cheerio'); const crypto = require('crypto'); const fs = require('fs'); const path = require('path'); // Simple cache implementation const cacheDir = path.join(__dirname, '../../cache'); if (!fs.existsSync(cacheDir)) { fs.mkdirSync(cacheDir, { recursive: true }); } /** * Creates a hash for the URL to use as a cache key * @param {string} url - The URL to hash * @returns {string} - The hashed URL */ function createCacheKey(url) { return crypto.createHash('md5').update(url).digest('hex'); } /** * Check if a URL is cached and not expired * @param {string} url - The URL to check * @param {number} maxAge - The maximum age of the cache in milliseconds * @returns {string|null} - The cached content or null if not cached or expired */ function getFromCache(url, maxAge = 3600000) { // Default: 1 hour const cacheKey = createCacheKey(url); const cachePath = path.join(cacheDir, `${cacheKey}.json`); if (fs.existsSync(cachePath)) { try { const cacheData = JSON.parse(fs.readFileSync(cachePath, 'utf8')); const cacheTime = new Date(cacheData.timestamp); const now = new Date(); if (now - cacheTime < maxAge) { return cacheData.content; } } catch (error) { console.error('Error reading from cache:', error); } } return null; } /** * Save content to cache * @param {string} url - The URL to cache * @param {string} content - The content to cache */ function saveToCache(url, content) { const cacheKey = createCacheKey(url); const cachePath = path.join(cacheDir, `${cacheKey}.json`); try { const cacheData = { url, content, timestamp: new Date().toISOString() }; fs.writeFileSync(cachePath, JSON.stringify(cacheData)); } catch (error) { console.error('Error writing to cache:', error); } } /** * Fetch the content of a webpage * @param {string} url - The URL to fetch * @param {boolean} useCache - Whether to use cache * @param {number} maxAge - The maximum age of the cache in milliseconds * @returns {Promise<string>} - The HTML content of the webpage */ async function fetchWebpage(url, useCache = true, maxAge = 3600000) { // If using cache, check if the URL is cached if (useCache) { const cachedContent = getFromCache(url, maxAge); if (cachedContent) { return cachedContent; } } try { // Set headers to mimic a browser const headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Referer': 'https://www.google.com/' }; const response = await axios.get(url, { headers }); const html = response.data; // Cache the result if using cache if (useCache) { saveToCache(url, html); } return html; } catch (error) { console.error(`Error fetching webpage ${url}:`, error.message); throw new Error(`Failed to fetch webpage: ${error.message}`); } } /** * Extract text content from HTML * @param {string} html - The HTML content * @returns {string} - The extracted text */ function extractTextContent(html) { const $ = cheerio.load(html); // Remove script and style elements $('script, style, noscript, iframe, img').remove(); // Extract title const title = $('title').text().trim(); // Extract meta description const metaDescription = $('meta[name="description"]').attr('content') || ''; // Extract main content - focus on main content areas const mainContent = []; // Try to find main content areas const contentSelectors = [ 'main', 'article', '.content', '.main', '#content', '#main', '[role="main"]', '.post', '.entry', '.blog-post' ]; // Get text from specified selectors or fallback to body let contentFound = false; contentSelectors.forEach(selector => { if ($(selector).length && !contentFound) { contentFound = true; mainContent.push($(selector).text().trim()); } }); // If no content found with specific selectors, use body if (!contentFound) { mainContent.push($('body').text().trim()); } // Clean the text const cleanedContent = mainContent.join('\n') .replace(/\s+/g, ' ') .replace(/\n+/g, '\n') .trim(); return { title, description: metaDescription, content: cleanedContent }; } /** * Get a summarized version of a URL's content * @param {string} url - The URL to fetch and summarize * @param {boolean} useCache - Whether to use the cache * @returns {Promise<Object>} - The summarized content */ async function getWebpageContent(url, useCache = true) { try { const html = await fetchWebpage(url, useCache); const content = extractTextContent(html); return { url, title: content.title, description: content.description, content: content.content, extractedAt: new Date().toISOString() }; } catch (error) { console.error(`Error getting webpage content for ${url}:`, error.message); throw error; } } /** * Search the web using Google Custom Search API * @param {string} query - The search query * @param {number} limit - The maximum number of results to return * @returns {Promise<Object>} - Search results */ async function searchWeb(query, limit = 5) { console.log(`Searching web for: ${query} (limit: ${limit})`); // Create cache key for this search const cacheKey = `search:${query}:${limit}`; // Try to get results from cache first const cachedResults = getFromCache(cacheKey, 3600000); // 1 hour cache if (cachedResults) { console.log('Returning cached search results'); return cachedResults; } // Check for Google API key if (!process.env.GOOGLE_CSE_API_KEY || !process.env.GOOGLE_CSE_ID) { console.warn('GOOGLE_CSE_API_KEY or GOOGLE_CSE_ID not set. Using fallback search.'); // Try Bing search if available if (process.env.BING_SEARCH_API_KEY) { return await searchBing(query, limit); } return { query, searchedAt: new Date().toISOString(), message: "Search API credentials not configured. Please set GOOGLE_CSE_API_KEY and GOOGLE_CSE_ID in your environment variables.", results: [] }; } try { const apiKey = process.env.GOOGLE_CSE_API_KEY; const cseId = process.env.GOOGLE_CSE_ID; const url = `https://www.googleapis.com/customsearch/v1?key=${apiKey}&cx=${cseId}&q=${encodeURIComponent(query)}&num=${limit}`; const response = await axios.get(url); if (response.status !== 200) { throw new Error(`Google API error: ${response.status}`); } const data = response.data; // Process and format results const results = data.items ? data.items.map(item => ({ title: item.title, link: item.link, snippet: item.snippet, displayLink: item.displayLink, source: 'google' })) : []; const searchResults = { query, searchedAt: new Date().toISOString(), totalResults: data.searchInformation?.totalResults || 0, searchTime: data.searchInformation?.searchTime || 0, results: results.slice(0, limit) }; // Cache results saveToCache(cacheKey, searchResults); return searchResults; } catch (error) { console.error('Google Search API error:', error.message); // Try Bing as fallback if available if (process.env.BING_SEARCH_API_KEY) { console.log('Trying Bing Search API as fallback...'); return await searchBing(query, limit); } // Return a graceful error response return { query, searchedAt: new Date().toISOString(), error: error.message, results: [] }; } } /** * Search the web using Bing Search API (fallback) * @param {string} query - The search query * @param {number} limit - The maximum number of results to return * @returns {Promise<Object>} - Search results */ async function searchBing(query, limit = 5) { if (!process.env.BING_SEARCH_API_KEY) { return { query, searchedAt: new Date().toISOString(), message: "Bing Search API key not configured. Please set BING_SEARCH_API_KEY in your environment variables.", results: [] }; } try { const apiKey = process.env.BING_SEARCH_API_KEY; const url = `https://api.bing.microsoft.com/v7.0/search?q=${encodeURIComponent(query)}&count=${limit}`; const response = await axios.get(url, { headers: { 'Ocp-Apim-Subscription-Key': apiKey } }); if (response.status !== 200) { throw new Error(`Bing API error: ${response.status}`); } const data = response.data; // Process and format results const results = data.webPages?.value ? data.webPages.value.map(item => ({ title: item.name, link: item.url, snippet: item.snippet, displayLink: item.displayUrl, source: 'bing' })) : []; return { query, searchedAt: new Date().toISOString(), totalResults: data.webPages?.totalEstimatedMatches || 0, results: results.slice(0, limit) }; } catch (error) { console.error('Bing Search API error:', error.message); // Return a graceful error response return { query, searchedAt: new Date().toISOString(), error: error.message, results: [] }; } } /** * Fetch and extract content from multiple URLs in parallel * @param {Array<string>} urls - An array of URLs to fetch * @param {boolean} useCache - Whether to use the cache * @returns {Promise<Array>} - An array of extracted content */ async function fetchMultipleUrls(urls, useCache = true) { try { const promises = urls.map(url => getWebpageContent(url, useCache)); return await Promise.all(promises); } catch (error) { console.error('Error fetching multiple URLs:', error.message); throw error; } } module.exports = { getWebpageContent, searchWeb, searchBing, fetchMultipleUrls };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/infinyte/mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server