Prysm MCP Server

MIT License

Overview InspectNew Endpoints Schema Related Servers Reviews Score

scrapeBalanced.ts•4.5 kB

import type { ToolDefinition } from '../config.js'; import { ScraperBaseParams, ScraperResponse } from '../types.js'; import * as prysm from '@pinkpixel/prysm-llm'; import { config } from '../config.js'; // Define the tool export const scrapeBalanced: ToolDefinition = { name: 'scrapeBalanced', description: 'Balanced web scraping approach with good coverage and reasonable speed', parameters: { type: 'object', properties: { url: { type: 'string', description: 'URL of the webpage to scrape' }, maxScrolls: { type: 'number', description: 'Maximum number of scroll attempts (default: 10)' }, scrollDelay: { type: 'number', description: 'Delay between scrolls in ms (default: 2000)' }, pages: { type: 'number', description: 'Number of pages to scrape (if pagination is present)' }, scrapeImages: { type: 'boolean', description: 'Whether to include images in the scrape result' }, downloadImages: { type: 'boolean', description: 'Whether to download images locally' }, maxImages: { type: 'number', description: 'Maximum number of images to extract' }, minImageSize: { type: 'number', description: 'Minimum width/height for images in pixels' }, timeout: { type: 'number', description: 'Maximum time in ms for the scrape operation (default: 30000)' }, output: { type: 'string', description: 'Output directory for general results' }, imageOutput: { type: 'string', description: 'Output directory for downloaded images' } }, required: ['url'] }, handler: async (params: ScraperBaseParams & { timeout?: number }): Promise<ScraperResponse> => { const { url, maxScrolls = 10, scrollDelay = 2000, pages = 1, scrapeImages = false, downloadImages = false, maxImages = 50, minImageSize = 100, timeout = 30000, output, imageOutput } = params; try { // Create options object for the scraper const options = { maxScrolls, scrollDelay, pages, focused: false, standard: true, // Use standard mode for balanced extraction deep: false, scrapeImages: scrapeImages || downloadImages, downloadImages, maxImages, minImageSize, timeout, // Add timeout option output: output || config.serverOptions.defaultOutputDir, // Use configured default if not provided imageOutput: imageOutput || config.serverOptions.defaultImageOutputDir // Use configured default if not provided }; // Create a promise with timeout const scrapePromise = prysm.scrape(url, options); // Add timeout const timeoutPromise = new Promise<never>((_, reject) => { setTimeout(() => reject(new Error(`Scraping timed out after ${timeout}ms`)), timeout); }); // Race the scraping against the timeout const result = await Promise.race([scrapePromise, timeoutPromise]) as ScraperResponse; // Limit content size to prevent overwhelming the MCP client if (result.content && result.content.length > 0) { // Limit the number of content sections if (result.content.length > 20) { result.content = result.content.slice(0, 20); result.content.push("(Content truncated due to size limitations)"); } // Limit the size of each content section result.content = result.content.map(section => { if (section.length > 5000) { return section.substring(0, 5000) + "... (truncated)"; } return section; }); } // Limit the number of images to return if (result.images && result.images.length > 20) { result.images = result.images.slice(0, 20); } return result; } catch (error) { console.error(`Error scraping ${url}:`, error); // Return a proper error format for MCP return { title: "Scraping Error", content: [`Failed to scrape ${url}: ${error instanceof Error ? error.message : String(error)}`], images: [], metadata: { error: true }, url: url, structureType: "error", paginationType: "none", extractionMethod: "none" }; } } };

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pinkpixel-dev/prysm-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server