Scraper.is MCP Server

import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { Tool, CallToolRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { ScraperAPI } from '../lib/scraper-api.js'; import { sendLoggingMessage } from '../utils/index.js'; import fetch from 'node-fetch'; /** * ScraperMCPServer class for handling MCP server operations */ export class ScraperMCPServer { private server: Server; private scraperApi: ScraperAPI; private screenshots: Map<string, string>; private tools: Tool[]; /** * Creates a new ScraperMCPServer * @param apiKey The API key for Scraper.is * @param apiBase The base URL for the Scraper.is API * @param version The server version */ constructor( apiKey: string, apiBase: string = 'https://scraper.is/api', version: string = '0.1.0' ) { this.scraperApi = new ScraperAPI(apiKey, apiBase); this.screenshots = new Map<string, string>(); // Initialize MCP server this.server = new Server( { name: 'scraperis-mcp', version: version, }, { capabilities: { tools: {}, resources: {}, prompts: {}, logging: {} }, } ); // Define tools this.tools = [ { description: 'Scrape a single webpage with advanced options for content extraction. \n' + 'Always returns both markdown content and visual screenshot for rich context. \n' + 'Supports various formats including markdown, HTML, screenshots, JSON, and quick. \n' + 'The prompt should include the website URL and what data you want to extract. \n' + "For example: 'Get me the top 10 products from producthunt.com' or \n" + "'Extract all article titles and authors from techcrunch.com/news'", name: 'scrape', type: 'function', inputSchema: { type: 'object', properties: { prompt: { type: 'string', description: 'The prompt describing what to scrape, including the URL' }, format: { type: 'string', enum: ['markdown', 'html', 'screenshot', 'json', 'quick'], description: 'The format to return the content in' } }, required: ['prompt', 'format'] }, }, // Uncomment to enable screenshot tool // { // description: 'Take a screenshot of a webpage', // name: 'screenshot', // type: 'function', // inputSchema: { // type: 'object', // properties: { // url: { // type: 'string', // description: 'The URL to take a screenshot of' // } // }, // required: ['url'] // }, // } ]; // Set up request handlers this.setupRequestHandlers(); } /** * Sets up the request handlers for the MCP server */ private setupRequestHandlers(): void { // Handler for listing available tools this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: this.tools, })); // Handler for reading resources (screenshots) this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => { this.log('info', `ReadResourceRequestSchema: ${JSON.stringify(request.params)}`); const screenshotURL = request.params.uri.replace('scraperis_screenshot://', ''); this.log('info', `screenshotURL: ${screenshotURL}`); if (screenshotURL) { try { const imageBuffer = await fetch(screenshotURL); const base64Image = await imageBuffer.arrayBuffer(); return { content: [ { uri: screenshotURL, mimeType: 'image/png', blob: base64Image } ], }; } catch (error) { this.log('error', `Error fetching screenshot: ${error instanceof Error ? error.message : String(error)}`); throw new Error(`Failed to fetch screenshot: ${error instanceof Error ? error.message : String(error)}`); } } this.log('error', 'Resource not found: No valid screenshot URL'); throw new Error('Resource not found'); }); // Handler for tool calls this.server.setRequestHandler(CallToolRequestSchema, async (request) => { try { const { name, arguments: args } = request.params; this.log('info', `Received request for tool: ${name}`); const progressToken = request.params._meta?.progressToken; if (name === 'scrape') { const { prompt, format } = args as { prompt: string, format: string }; // Create progress callback const onProgress = progressToken ? async (progress: number) => { await this.server.notification({ method: 'notifications/progress', params: { progress: progress, total: 100, progressToken: progressToken }, }); } : undefined; // Call the scraper API const handlerData = await this.scraperApi.scrape(prompt, format, onProgress); this.log('info', `Scrape completed for prompt: "${prompt.substring(0, 50)}${prompt.length > 50 ? '...' : ''}"`); // Handle different format responses if (format === 'markdown' && handlerData.markdown) { return { content: [ { type: 'text', text: handlerData.markdown, } ], isError: false }; } if (format === 'screenshot' && handlerData.screenshot && handlerData.screenshot.url) { this.log('info', `Screenshot URL: ${handlerData.screenshot.url}`); this.screenshots.set(handlerData.url || '', handlerData.screenshot.url); this.server.notification({ method: 'notifications/resources/list_changed', }); const resourceUri = `scraperis_screenshot://${handlerData.screenshot.url}`; this.log('info', `Screenshot available at: ${handlerData.screenshot.url}`); return { content: [{ type: 'text' as const, text: `Screenshot taken successfully. You can view it via *MCP Resources* (Paperclip icon) @ URI: ${resourceUri}` }], isError: false }; } if (format === 'json' && handlerData.data) { return { content: [ { type: 'text', text: 'JSON Data:\n```json\n' + JSON.stringify(handlerData.data, null, 2) + '\n```', } ], isError: false }; } // Default response return { content: [ { type: 'text', text: JSON.stringify(handlerData), }, ], isError: false }; } // Handle screenshot tool (if enabled) if (name === 'screenshot') { const { url } = args as { url: string }; try { const handlerData = await this.scraperApi.screenshot(url); this.log('info', `Screenshot taken successfully for URL: ${url}`); return { content: [ { type: 'text', text: JSON.stringify(handlerData), } ], isError: false }; } catch (error) { this.log('error', `Error taking screenshot for URL ${url}: ${error instanceof Error ? error.message : String(error)}`); throw error; } } throw new Error(`Unknown tool: ${name}`); } catch (error) { this.log('error', `Error in tool operation: ${error}`); return { content: [ { type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}`, } ], isError: true }; } }); } /** * Logs a message to the console and sends it to the MCP server * @param level The log level ('error' or 'info') * @param message The message to log */ private log(level: 'error' | 'info', message: string): void { sendLoggingMessage(this.server, level, message); } /** * Starts the MCP server */ async start(): Promise<void> { try { console.error('Initializing Scraperis MCP Server...'); const transport = new StdioServerTransport(); await this.server.connect(transport); this.log('info', 'Scraperis MCP Server connected to stdio'); this.log('info', 'Scraperis MCP Server initialized successfully'); this.log('info', `Configuration: API URL: ${this.scraperApi['apiBase']}`); } catch (error) { this.log('error', `Fatal error running server: ${error instanceof Error ? error.message : String(error)}`); console.error('Fatal error running server:', error); process.exit(1); } } }