Web Content MCP Server

by amotivv
Verified
  • src
import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js'; import { BrowserClient } from './browser-client.js'; import { ContentProcessor } from './content-processor.js'; /** * Cloudflare Browser Rendering MCP Server * * This server provides tools for fetching and processing web content * using Cloudflare Browser Rendering for use as context in LLMs. */ export class BrowserRenderingServer { private server: Server; private browserClient: BrowserClient; private contentProcessor: ContentProcessor; constructor() { this.server = new Server( { name: 'cloudflare-browser-rendering', version: '0.1.0', }, { capabilities: { tools: {}, }, } ); // Initialize the browser client and content processor this.browserClient = new BrowserClient(); this.contentProcessor = new ContentProcessor(); // Set up request handlers this.setupToolHandlers(); // Error handling this.server.onerror = (error) => console.error('[MCP Error]', error); process.on('SIGINT', async () => { await this.server.close(); process.exit(0); }); } /** * Set up tool handlers for the MCP server */ private setupToolHandlers() { // List available tools this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'fetch_page', description: 'Fetches and processes a web page for LLM context', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to fetch', }, includeScreenshot: { type: 'boolean', description: 'Whether to include a screenshot (base64 encoded)', }, maxContentLength: { type: 'number', description: 'Maximum content length to return', }, }, required: ['url'], }, }, { name: 'search_documentation', description: 'Searches Cloudflare documentation and returns relevant content', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query', }, maxResults: { type: 'number', description: 'Maximum number of results to return', }, }, required: ['query'], }, }, { name: 'extract_structured_content', description: 'Extracts structured content from a web page using CSS selectors', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to extract content from', }, selectors: { type: 'object', description: 'CSS selectors to extract content', additionalProperties: { type: 'string', }, }, }, required: ['url', 'selectors'], }, }, { name: 'summarize_content', description: 'Summarizes web content for more concise LLM context', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'URL to summarize', }, maxLength: { type: 'number', description: 'Maximum length of the summary', }, }, required: ['url'], }, }, ], })); // Handle tool calls this.server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'fetch_page': return await this.handleFetchPage(args); case 'search_documentation': return await this.handleSearchDocumentation(args); case 'extract_structured_content': return await this.handleExtractStructuredContent(args); case 'summarize_content': return await this.handleSummarizeContent(args); default: throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${name}` ); } } catch (error) { if (error instanceof McpError) { throw error; } console.error(`Error in tool ${name}:`, error); throw new McpError( ErrorCode.InternalError, `Error executing tool ${name}: ${error instanceof Error ? error.message : String(error)}` ); } }); } /** * Handle the fetch_page tool */ private async handleFetchPage(args: any) { // Validate arguments if (typeof args !== 'object' || args === null || typeof args.url !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Invalid arguments for fetch_page'); } const { url, includeScreenshot = false, maxContentLength = 10000 } = args; try { // Fetch the page content const html = await this.browserClient.fetchContent(url); // Process the content for LLM const processedContent = this.contentProcessor.processForLLM(html, url); // Truncate if necessary const truncatedContent = processedContent.length > maxContentLength ? processedContent.substring(0, maxContentLength) + '...' : processedContent; // Get screenshot if requested let screenshot = null; if (includeScreenshot) { screenshot = await this.browserClient.takeScreenshot(url); } // Return the result return { content: [ { type: 'text', text: truncatedContent, }, ...(screenshot ? [{ type: 'image', image: screenshot, }] : []), ], }; } catch (error) { console.error('Error fetching page:', error); return { content: [ { type: 'text', text: `Error fetching page: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } /** * Handle the search_documentation tool */ private async handleSearchDocumentation(args: any) { // Validate arguments if (typeof args !== 'object' || args === null || typeof args.query !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Invalid arguments for search_documentation'); } const { query, maxResults = 3 } = args; try { // In a real implementation, you would: // 1. Use Cloudflare Browser Rendering to navigate to the docs // 2. Use the search functionality on the docs site // 3. Extract the search results // For this simulation, we'll return mock results const mockResults = [ { title: 'Browser Rendering API Overview', url: 'https://developers.cloudflare.com/browser-rendering/', snippet: 'Cloudflare Browser Rendering is a serverless headless browser service that allows execution of browser actions within Cloudflare Workers.', }, { title: 'REST API Reference', url: 'https://developers.cloudflare.com/browser-rendering/rest-api/', snippet: 'The REST API provides simple endpoints for common browser tasks like fetching content, taking screenshots, and generating PDFs.', }, { title: 'Workers Binding API Reference', url: 'https://developers.cloudflare.com/browser-rendering/workers-binding/', snippet: 'For more advanced use cases, you can use the Workers Binding API with Puppeteer to automate browser interactions.', }, ].slice(0, maxResults); // Format the results const formattedResults = mockResults.map(result => `## [${result.title}](${result.url})\n${result.snippet}\n` ).join('\n'); return { content: [ { type: 'text', text: `# Search Results for "${query}"\n\n${formattedResults}`, }, ], }; } catch (error) { console.error('Error searching documentation:', error); return { content: [ { type: 'text', text: `Error searching documentation: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } /** * Handle the extract_structured_content tool */ private async handleExtractStructuredContent(args: any) { // Validate arguments if ( typeof args !== 'object' || args === null || typeof args.url !== 'string' || typeof args.selectors !== 'object' ) { throw new McpError(ErrorCode.InvalidParams, 'Invalid arguments for extract_structured_content'); } const { url, selectors } = args; try { // In a real implementation, you would: // 1. Use Cloudflare Browser Rendering to fetch the page // 2. Use the /scrape endpoint to extract content based on selectors // For this simulation, we'll return mock results const mockResults: Record<string, string> = {}; for (const [key, selector] of Object.entries(selectors)) { if (typeof selector === 'string') { // Simulate extraction based on selector mockResults[key] = `Extracted content for selector "${selector}"`; } } // Format the results const formattedResults = Object.entries(mockResults) .map(([key, value]) => `## ${key}\n${value}`) .join('\n\n'); return { content: [ { type: 'text', text: `# Structured Content from ${url}\n\n${formattedResults}`, }, ], }; } catch (error) { console.error('Error extracting structured content:', error); return { content: [ { type: 'text', text: `Error extracting structured content: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } /** * Handle the summarize_content tool */ private async handleSummarizeContent(args: any) { // Validate arguments if (typeof args !== 'object' || args === null || typeof args.url !== 'string') { throw new McpError(ErrorCode.InvalidParams, 'Invalid arguments for summarize_content'); } const { url, maxLength = 500 } = args; try { // In a real implementation, you would: // 1. Fetch the page content using Cloudflare Browser Rendering // 2. Process the content for LLM // 3. Call an LLM API to summarize the content // For this simulation, we'll return a mock summary const mockSummary = ` # Browser Rendering API Summary Cloudflare Browser Rendering is a serverless headless browser service for Cloudflare Workers that enables: 1. Rendering JavaScript-heavy websites 2. Taking screenshots and generating PDFs 3. Extracting structured data 4. Automating browser interactions It offers two main interfaces: - **REST API**: Simple endpoints for common tasks - **Workers Binding API**: Advanced integration with Puppeteer The service runs within Cloudflare's network, providing low-latency access to browser capabilities without managing infrastructure. `.trim(); // Truncate if necessary const truncatedSummary = mockSummary.length > maxLength ? mockSummary.substring(0, maxLength) + '...' : mockSummary; return { content: [ { type: 'text', text: truncatedSummary, }, ], }; } catch (error) { console.error('Error summarizing content:', error); return { content: [ { type: 'text', text: `Error summarizing content: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, }; } } /** * Run the MCP server */ async run() { const transport = new StdioServerTransport(); await this.server.connect(transport); } }