Skip to main content
Glama
server.js9.65 kB
import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; // Auto-generate tool definitions function generateTools() { return [ { name: 'web_search', description: 'Perform web or news search using Bing search engine. Supports both general web search and news search modes.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query, e.g., Node.js tutorial, tech news, political updates, etc.' }, searchType: { type: 'string', enum: ['web', 'news'], description: 'Search type: web (general web search), news (news search) - required' }, maxResults: { type: 'number', description: 'Maximum number of results', default: 10, minimum: 1, maximum: 20 }, timeFilter: { type: 'string', enum: ['past_hour', 'past_24_hours', 'past_7_days', 'past_30_days'], description: 'Time filter (only valid for news search): past 1 hour, 24 hours, 7 days, 30 days', default: 'past_24_hours' } }, required: ['query', 'searchType'] } }, { name: 'get_webpage_content', description: 'Fetch webpage content and convert to specified format. Supports Markdown, HTML, and plain text.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL of the webpage to scrape. Must be a valid HTTP/HTTPS link.' }, format: { type: 'string', enum: ['markdown', 'html', 'text'], description: 'Output format: markdown (default), html, text', default: 'markdown' } }, required: ['url'] } }, { name: 'get_webpage_source', description: 'Fetch the raw HTML source code and page information of a webpage.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL of the webpage to get source from. Must be a valid HTTP/HTTPS link.' } }, required: ['url'] } }, { name: 'batch_webpage_scrape', description: 'Batch scrape multiple webpages with concurrent processing support.', inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'List of webpage URLs to scrape, up to 20.', minItems: 1, maxItems: 20 }, maxConcurrent: { type: 'number', description: 'Maximum concurrency', default: 3, minimum: 1, maximum: 10 } }, required: ['urls'] } } ]; } // Create MCP server const server = new Server( { name: 'spider-mcp', version: '1.0.0' }, { capabilities: { tools: {} } } ); // Implement tool invocation server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: generateTools() }; }); server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { let result; switch (name) { case 'web_search': result = await handleWebSearch(args); break; case 'get_webpage_content': result = await handleGetWebpageContent(args); break; case 'get_webpage_source': result = await handleGetWebpageSource(args); break; case 'batch_webpage_scrape': result = await handleBatchWebpageScrape(args); break; default: throw new Error(`Unknown tool: ${name}`); } return { content: [ { type: 'text', text: JSON.stringify(result, null, 2) } ] }; } catch (error) { return { content: [ { type: 'text', text: `Error: ${error.message}` } ], isError: true }; } }); // Handle web search (supports both general and news search) async function handleWebSearch(args) { const { query, searchType, maxResults = 10, timeFilter = 'past_24_hours' } = args; if (!query || typeof query !== 'string') { throw new Error('Query parameter is required and must be a string'); } if (!searchType || !['web', 'news'].includes(searchType)) { throw new Error('searchType is required and must be either "web" or "news"'); } if (maxResults < 1 || maxResults > 20) { throw new Error('maxResults must be between 1 and 20'); } if (searchType === 'news' && !['past_hour', 'past_24_hours', 'past_7_days', 'past_30_days'].includes(timeFilter)) { throw new Error('timeFilter must be a valid time filter option'); } const searchService = (await import('../services/searchService.js')).default; let results; if (searchType === 'news') { results = await searchService.searchBingNews(query, maxResults, timeFilter); } else { results = await searchService.searchBing(query, maxResults); } return { tool: 'web_search', searchType, query, maxResults, timeFilter: searchType === 'news' ? timeFilter : undefined, results: results.results, totalResults: results.totalResults, timestamp: results.timestamp }; } // Handle fetching webpage content async function handleGetWebpageContent(args) { const { url, format = 'markdown' } = args; if (!url || typeof url !== 'string') { throw new Error('URL parameter is required and must be a string'); } try { new URL(url); } catch (error) { throw new Error('Invalid URL format'); } if (!['markdown', 'html', 'text'].includes(format)) { throw new Error('format must be one of: markdown, html, text'); } const searchService = (await import('../services/searchService.js')).default; let result; if (format === 'markdown') { result = await searchService.getWebpageMarkdown(url); } else { result = await searchService.scrapeWebpage(url); } return { tool: 'get_webpage_content', url, format, title: result.title, description: result.description, content: format === 'markdown' ? result.markdown : result.content, timestamp: result.timestamp }; } // Handle fetching webpage source async function handleGetWebpageSource(args) { const { url } = args; if (!url || typeof url !== 'string') { throw new Error('URL parameter is required and must be a string'); } try { new URL(url); } catch (error) { throw new Error('Invalid URL format'); } const searchService = (await import('../services/searchService.js')).default; const result = await searchService.scrapeWebpage(url); return { tool: 'get_webpage_source', url, title: result.title, description: result.description, keywords: result.keywords, content: result.content, links: result.links, timestamp: result.timestamp }; } // Handle batch webpage scraping async function handleBatchWebpageScrape(args) { const { urls, maxConcurrent = 3 } = args; if (!Array.isArray(urls) || urls.length === 0) { throw new Error('urls must be a non-empty array'); } if (urls.length > 20) { throw new Error('A maximum of 20 URLs is supported'); } if (maxConcurrent < 1 || maxConcurrent > 10) { throw new Error('maxConcurrent must be between 1 and 10'); } const searchService = (await import('../services/searchService.js')).default; const results = []; const errors = []; // Process in batches for (let i = 0; i < urls.length; i += maxConcurrent) { const batch = urls.slice(i, i + maxConcurrent); const batchPromises = batch.map(async (url) => { try { const result = await searchService.scrapeWebpage(url); return { success: true, url, data: result }; } catch (error) { return { success: false, url, error: error.message }; } }); const batchResults = await Promise.allSettled(batchPromises); batchResults.forEach((result) => { if (result.status === 'fulfilled') { if (result.value.success) { results.push(result.value); } else { errors.push(result.value); } } else { errors.push({ url: 'unknown', error: result.reason?.message || 'Unknown error' }); } }); } return { tool: 'batch_webpage_scrape', totalUrls: urls.length, successful: results.length, failed: errors.length, maxConcurrent, results, errors, timestamp: new Date().toISOString() }; } // Start the server const transport = new StdioServerTransport(); server.connect(transport); console.error('Spider MCP server started'); console.error('Available tools:', generateTools().map(t => t.name).join(', '));

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Bosegluon2/spider-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server