Skip to main content
Glama
search.ts13.1 kB
/** * Search namespace implementation */ import axios from 'axios'; import * as cheerio from 'cheerio'; import { MCPServer } from '../core/server.js'; import { MCPTool } from '../types/core.js'; import { SearchEngine, HttpMethod, WebSearchResponse, FetchResponse, ExtractTarget, ExtractRules, ExtractResponse, CrawlResponse } from '../types/search.js'; import { InvalidArgError, ConfigMissingError } from '../core/errors.js'; export class SearchNamespace { private mcpServer: MCPServer; constructor(mcpServer: MCPServer) { this.mcpServer = mcpServer; this.registerTools(); } private registerTools(): void { const registry = this.mcpServer.getRegistry(); registry.registerTool( 'search.web', { name: 'search.web', description: 'Search the web using various search engines', inputSchema: { type: 'object', properties: { query: { type: 'string' }, engine: { type: 'string', enum: ['bing', 'google', 'brave', 'ddg'] }, num: { type: 'number', minimum: 1, maximum: 50 } }, required: ['query'] } }, this.webSearch.bind(this) ); registry.registerTool( 'search.fetch', { name: 'search.fetch', description: 'Fetch a URL with HTTP request options', inputSchema: { type: 'object', properties: { url: { type: 'string' }, method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS'] }, headers: { type: 'object' }, body: { type: 'string' }, follow: { type: 'boolean' } }, required: ['url'] } }, this.fetch.bind(this) ); registry.registerTool( 'search.extract', { name: 'search.extract', description: 'Extract structured data from HTML content', inputSchema: { type: 'object', properties: { target: { oneOf: [ { type: 'object', properties: { url: { type: 'string' } }, required: ['url'] }, { type: 'object', properties: { body_text: { type: 'string' } }, required: ['body_text'] } ] }, rules: { type: 'object', properties: { css: { type: 'array', items: { type: 'string' } }, xpath: { type: 'array', items: { type: 'string' } }, boilerplate: { type: 'boolean' } } } }, required: ['target'] } }, this.extract.bind(this) ); registry.registerTool( 'search.crawl', { name: 'search.crawl', description: 'Crawl multiple web pages starting from seed URLs', inputSchema: { type: 'object', properties: { seed_urls: { type: 'array', items: { type: 'string' } }, limit: { type: 'number', minimum: 1, maximum: 1000 }, same_origin: { type: 'boolean' }, include: { type: 'array', items: { type: 'string' } }, exclude: { type: 'array', items: { type: 'string' } }, cursor: { type: 'string' } }, required: ['seed_urls'] } }, this.crawl.bind(this) ); } private async webSearch(params: { query: string; engine?: SearchEngine; num?: number; }): Promise<WebSearchResponse> { const { query, engine = 'ddg', num = 10 } = params; switch (engine) { case 'ddg': return await this.searchDuckDuckGo(query, num); case 'brave': return await this.searchBrave(query, num); case 'bing': return await this.searchBing(query, num); case 'google': return await this.searchGoogle(query, num); default: throw new InvalidArgError('engine', `Unsupported search engine: ${engine}`); } } private async searchDuckDuckGo(query: string, num: number): Promise<WebSearchResponse> { try { // DuckDuckGo HTML search (free tier) const response = await axios.get('https://html.duckduckgo.com/html/', { params: { q: query, t: 'h_', ia: 'web' }, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' }, timeout: 10000 }); const $ = cheerio.load(response.data); const results: any[] = []; $('.result').each((i, element) => { if (i >= num) return false; const $el = $(element); const titleEl = $el.find('.result__title a'); const title = titleEl.text().trim(); const url = titleEl.attr('href'); const snippet = $el.find('.result__snippet').text().trim(); if (title && url) { results.push({ rank: i + 1, title, url: url.startsWith('/') ? `https://duckduckgo.com${url}` : url, snippet: snippet || undefined }); } }); return { results, raw: { source: 'duckduckgo', total_results: results.length } }; } catch (error) { throw new Error(`DuckDuckGo search failed: ${error instanceof Error ? error.message : error}`); } } private async searchBrave(query: string, num: number): Promise<WebSearchResponse> { // Brave Search API would require an API key throw new ConfigMissingError('BRAVE_SEARCH_API_KEY'); } private async searchBing(query: string, num: number): Promise<WebSearchResponse> { // Bing Search API would require Azure Cognitive Services key throw new ConfigMissingError('BING_SEARCH_API_KEY'); } private async searchGoogle(query: string, num: number): Promise<WebSearchResponse> { // Google Custom Search API would require API key and Search Engine ID throw new ConfigMissingError('GOOGLE_SEARCH_API_KEY'); } private async fetch(params: { url: string; method?: HttpMethod; headers?: Record<string, string>; body?: string; follow?: boolean; }): Promise<FetchResponse> { const { url, method = 'GET', headers = {}, body, follow = true } = params; try { const response = await axios({ url, method: method.toLowerCase() as any, headers: { 'User-Agent': 'mcp-fullstack/1.0', ...headers }, data: body, maxRedirects: follow ? 5 : 0, timeout: 30000, validateStatus: () => true, // Don't throw on HTTP errors responseType: 'arraybuffer' }); const isText = this.isTextContent(response.headers['content-type'] || ''); const size = response.data.byteLength; let body_text: string | undefined; let json: any; let binary = false; if (isText && size < 10 * 1024 * 1024) { // Max 10MB for text content body_text = Buffer.from(response.data).toString('utf-8'); // Try to parse as JSON if (response.headers['content-type']?.includes('application/json')) { try { json = JSON.parse(body_text); } catch { // Not valid JSON, keep as text } } } else { binary = true; } return { status: response.status, headers: response.headers as Record<string, string>, body_text, json, binary, size }; } catch (error) { if (axios.isAxiosError(error)) { throw new Error(`HTTP request failed: ${error.message}`); } throw error; } } private isTextContent(contentType: string): boolean { return contentType.startsWith('text/') || contentType.includes('application/json') || contentType.includes('application/xml') || contentType.includes('application/javascript') || contentType.includes('application/x-www-form-urlencoded'); } private async extract(params: { target: ExtractTarget; rules?: ExtractRules; }): Promise<ExtractResponse> { const { target, rules = {} } = params; let html: string; if ('url' in target) { const fetchResponse = await this.fetch({ url: target.url! }); if (!fetchResponse.body_text) { throw new Error('Failed to fetch text content from URL'); } html = fetchResponse.body_text; } else { html = target.body_text!; } const $ = cheerio.load(html); const result: ExtractResponse = {}; // Apply boilerplate removal if requested if (rules.boilerplate) { // Remove common boilerplate elements $('script, style, nav, header, footer, aside, .advertisement, .ads').remove(); } // Extract using CSS selectors if (rules.css) { const fields: Record<string, any> = {}; for (const [index, selector] of rules.css.entries()) { const elements = $(selector); if (elements.length === 1) { fields[`css_${index}`] = elements.text().trim(); } else if (elements.length > 1) { fields[`css_${index}`] = elements.map((i, el) => $(el).text().trim()).get(); } } if (Object.keys(fields).length > 0) { result.fields = fields; } } // Extract plain text content const textContent = $('body').text().replace(/\s+/g, ' ').trim(); if (textContent) { result.text = textContent; } // Extract all links const links: Array<{ url: string; title?: string }> = []; $('a[href]').each((i, el) => { const $el = $(el); const href = $el.attr('href'); const title = $el.text().trim() || $el.attr('title'); if (href) { let url = href; // Convert relative URLs to absolute if ('url' in target && !href.startsWith('http')) { try { const baseUrl = new URL(target.url!); url = new URL(href, baseUrl.origin).toString(); } catch { // Skip invalid URLs return; } } links.push({ url, title }); } }); if (links.length > 0) { result.links = links; } return result; } private async crawl(params: { seed_urls: string[]; limit?: number; same_origin?: boolean; include?: string[]; exclude?: string[]; cursor?: string; }): Promise<CrawlResponse> { const { seed_urls, limit = 50, same_origin = true, include = [], exclude = [] } = params; const visited = new Set<string>(); const queue = [...seed_urls]; const pages = []; const origins = same_origin ? new Set(seed_urls.map(url => new URL(url).origin)) : null; while (queue.length > 0 && pages.length < limit) { const url = queue.shift()!; if (visited.has(url)) continue; visited.add(url); // Check include/exclude patterns if (include.length > 0 && !include.some(pattern => url.includes(pattern))) { continue; } if (exclude.length > 0 && exclude.some(pattern => url.includes(pattern))) { continue; } try { const fetchResponse = await this.fetch({ url }); const $ = cheerio.load(fetchResponse.body_text || ''); const title = $('title').text().trim(); const textContent = $('body').text().replace(/\s+/g, ' ').trim(); const excerpt = textContent.substring(0, 200) + (textContent.length > 200 ? '...' : ''); pages.push({ url, status: fetchResponse.status, title: title || undefined, text_excerpt: excerpt || undefined }); // Add new URLs to queue if we haven't hit the limit if (pages.length < limit) { $('a[href]').each((i, el) => { const href = $(el).attr('href'); if (!href) return; let newUrl: string; try { newUrl = href.startsWith('http') ? href : new URL(href, url).toString(); } catch { return; // Skip invalid URLs } // Check same origin constraint if (origins && !origins.has(new URL(newUrl).origin)) { return; } if (!visited.has(newUrl) && !queue.includes(newUrl)) { queue.push(newUrl); } }); } } catch (error) { console.warn(`Failed to crawl ${url}:`, error); pages.push({ url, status: 0, text_excerpt: `Error: ${error instanceof Error ? error.message : error}` }); } } return { pages, next_cursor: queue.length > 0 ? Buffer.from(JSON.stringify(queue.slice(0, 10))).toString('base64') : undefined }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/JacobFV/mcp-fullstack'

If you have feedback or need assistance with the MCP directory API, please join our Discord server