Metal MCP Server

  • src
#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError } from '@modelcontextprotocol/sdk/types.js'; import { chromium, Browser, Page } from 'playwright'; import TurndownService from 'turndown'; import DeepResearch from './deep-research.js'; interface DeepResearchArgs { topic: string; maxDepth?: number; maxBranching?: number; timeout?: number; minRelevanceScore?: number; } interface ParallelSearchArgs { queries: string[]; maxParallel?: number; } interface VisitPageArgs { url: string; } // Initialize Turndown service for converting HTML to Markdown const turndownService = new TurndownService({ headingStyle: 'atx', hr: '---', bulletListMarker: '-', codeBlockStyle: 'fenced', emDelimiter: '_', strongDelimiter: '**', linkStyle: 'inlined', }); // Custom Turndown rules turndownService.addRule('removeScripts', { filter: ['script', 'style', 'noscript'], replacement: () => '' }); turndownService.addRule('preserveLinks', { filter: 'a', replacement: (content: string, node: Node) => { const element = node as HTMLAnchorElement; const href = element.getAttribute('href'); return href ? `[${content}](${href})` : content; } }); // Redirect console output to stderr to keep stdout clean for MCP communication const originalConsoleLog = console.log; const originalConsoleError = console.error; console.log = (...args) => { process.stderr.write(`[INFO] ${args.join(' ')}\n`); }; console.error = (...args) => { process.stderr.write(`[ERROR] ${args.join(' ')}\n`); }; const deepResearch = new DeepResearch(); let browser: Browser | undefined; let page: Page | undefined; const server = new Server( { name: 'mcp-deepwebresearch', version: '0.3.0' }, { capabilities: { tools: {} } } ); // List available tools server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'deep_research', description: 'Perform deep research on a topic with content extraction and analysis', inputSchema: { type: 'object', properties: { topic: { type: 'string', description: 'Research topic or question' }, maxDepth: { type: 'number', description: 'Maximum depth of related content exploration', minimum: 1, maximum: 2 }, maxBranching: { type: 'number', description: 'Maximum number of related paths to explore', minimum: 1, maximum: 3 }, timeout: { type: 'number', description: 'Research timeout in milliseconds', minimum: 30000, maximum: 55000 }, minRelevanceScore: { type: 'number', description: 'Minimum relevance score for including content', minimum: 0, maximum: 1 } }, required: ['topic'] } }, { name: 'parallel_search', description: 'Perform multiple Google searches in parallel', inputSchema: { type: 'object', properties: { queries: { type: 'array', items: { type: 'string' }, description: 'Array of search queries to execute in parallel' }, maxParallel: { type: 'number', description: 'Maximum number of parallel searches', minimum: 1, maximum: 5 } }, required: ['queries'] } }, { name: 'visit_page', description: 'Visit a webpage and extract its content', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to visit' } }, required: ['url'] } } ] })); // Validate URL format and security function isValidUrl(urlString: string): boolean { try { const url = new URL(urlString); return url.protocol === 'http:' || url.protocol === 'https:'; } catch { return false; } } // Safe page navigation with timeout async function safePageNavigation(page: Page, url: string): Promise<void> { await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 // 10 second timeout }); // Quick check for bot protection or security challenges const validation = await page.evaluate(() => { const botProtectionExists = [ '#challenge-running', '#cf-challenge-running', '#px-captcha', '#ddos-protection', '#waf-challenge-html' ].some(selector => document.querySelector(selector)); const suspiciousTitle = [ 'security check', 'ddos protection', 'please wait', 'just a moment', 'attention required' ].some(phrase => document.title.toLowerCase().includes(phrase)); return { botProtection: botProtectionExists, suspiciousTitle, title: document.title }; }); if (validation.botProtection) { throw new Error('Bot protection detected'); } if (validation.suspiciousTitle) { throw new Error(`Suspicious page title detected: "${validation.title}"`); } } // Extract content as markdown async function extractContentAsMarkdown(page: Page): Promise<string> { const html = await page.evaluate(() => { // Try standard content containers first const contentSelectors = [ 'main', 'article', '[role="main"]', '#content', '.content', '.main', '.post', '.article' ]; for (const selector of contentSelectors) { const element = document.querySelector(selector); if (element) { return element.outerHTML; } } // Fallback to cleaning full body content const body = document.body; const elementsToRemove = [ 'header', 'footer', 'nav', '[role="navigation"]', 'aside', '.sidebar', '[role="complementary"]', '.nav', '.menu', '.header', '.footer', '.advertisement', '.ads', '.cookie-notice' ]; elementsToRemove.forEach(sel => { body.querySelectorAll(sel).forEach(el => el.remove()); }); return body.outerHTML; }); if (!html) { return ''; } try { const markdown = turndownService.turndown(html); return markdown .replace(/\n{3,}/g, '\n\n') .replace(/^- $/gm, '') .replace(/^\s+$/gm, '') .trim(); } catch (error) { console.error('Error converting HTML to Markdown:', error); return html; } } // Ensure browser is initialized async function ensureBrowser(): Promise<Page> { if (!browser) { browser = await chromium.launch({ headless: true }); const context = await browser.newContext(); page = await context.newPage(); } if (!page) { const context = await browser.newContext(); page = await context.newPage(); } return page; } // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { try { switch (request.params.name) { case 'deep_research': { const args = request.params.arguments as unknown as DeepResearchArgs; if (!args?.topic) { throw new McpError(ErrorCode.InvalidParams, 'Topic is required'); } console.log(`Starting deep research on topic: ${args.topic}`); const result = await deepResearch.startResearch(args.topic, { maxDepth: Math.min(args.maxDepth || 2, 2), maxBranching: Math.min(args.maxBranching || 3, 3), timeout: Math.min(args.timeout || 55000, 55000), minRelevanceScore: args.minRelevanceScore || 0.7 }); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2) } ] }; } case 'parallel_search': { const args = request.params.arguments as unknown as ParallelSearchArgs; if (!args?.queries) { throw new McpError(ErrorCode.InvalidParams, 'Queries array is required'); } const limitedQueries = args.queries.slice(0, 5); console.log(`Starting parallel search with ${limitedQueries.length} queries`); const result = await deepResearch.parallelSearch.parallelSearch(limitedQueries); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2) } ] }; } case 'visit_page': { const args = request.params.arguments as unknown as VisitPageArgs; if (!args?.url) { throw new McpError(ErrorCode.InvalidParams, 'URL is required'); } if (!isValidUrl(args.url)) { throw new McpError( ErrorCode.InvalidParams, `Invalid URL: ${args.url}. Only http and https protocols are supported.` ); } const page = await ensureBrowser(); try { await safePageNavigation(page, args.url); const title = await page.title(); const content = await extractContentAsMarkdown(page); return { content: [ { type: 'text', text: JSON.stringify({ url: args.url, title, content }, null, 2) } ] }; } catch (error) { throw new McpError( ErrorCode.InternalError, `Failed to visit page: ${(error as Error).message}` ); } } default: throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}` ); } } catch (error) { console.error('Error executing tool:', error); throw new McpError( ErrorCode.InternalError, error instanceof Error ? error.message : 'Unknown error occurred' ); } }); // Error handling server.onerror = (error) => { console.error('[MCP Error]', error); }; // Handle shutdown process.on('SIGINT', async () => { if (browser) { await browser.close(); } await server.close(); process.exit(0); }); // Start the server const transport = new StdioServerTransport(); server.connect(transport).catch(console.error); console.error('MCP Web Research server running on stdio');