Puppeteer Vision MCP Server

webpage-scraper.ts•2.98 KiB

import puppeteerExtraImport from 'puppeteer-extra'; import StealthPluginImport from 'puppeteer-extra-plugin-stealth'; import fs from 'fs'; import { handlePageInteractions } from '../ai/page-interactions.js'; import { processHtmlContent } from './content-processor.js'; import { ScrapeResult, WebpageScrapeOptions } from '../types/index.js'; import { config } from '../config.js'; // Work around TypeScript issues with puppeteer-extra const puppeteerExtra = puppeteerExtraImport as any; const StealthPlugin = StealthPluginImport as any; // Apply stealth plugin puppeteerExtra.use(StealthPlugin()); /** * Visits a webpage, handles interactions, and extracts content * @param options Configuration options for the scraping operation * @returns Markdown content or error message */ export async function visitWebPage({ url, autoInteract = true, maxInteractionAttempts = 3, waitForNetworkIdle = true, }: WebpageScrapeOptions): Promise<ScrapeResult> { // Launch puppeteer with stealth plugin and respect headless configuration const browser = await puppeteerExtra.launch({ headless: config.headless ? "new" : false, // Use config.headless setting args: ['--no-sandbox', '--disable-setuid-sandbox'], }); try { console.log(`Visiting webpage: ${url}`); const page = await browser.newPage(); // Set viewport to a standard desktop size await page.setViewport({ width: 1280, height: 800 }); // Navigate to the URL await page.goto(url, { waitUntil: waitForNetworkIdle ? 'networkidle2' : 'domcontentloaded' }); // Allow initial page load to complete await new Promise(resolve => setTimeout(resolve, 2000)); // Handle page interactions if enabled if (autoInteract) { console.log("Checking for interactive elements that need handling..."); await handlePageInteractions(page, maxInteractionAttempts); } // Extract content after handling interactions const htmlContent: string = await page.evaluate(() => { // Try to select the main content area, fallback to the body if no specific selector const main = document.querySelector('main') || document.querySelector('article') || document.querySelector('.content') || document.querySelector('#content') || document.body; return main.innerHTML; }); // Process the HTML content const markdown = await processHtmlContent(htmlContent); await browser.close(); console.log(`Successfully scraped and converted to markdown: ${url}`); return { data: markdown }; } catch(error) { await browser.close(); if (error instanceof Error) { console.error(`Error scraping ${url}:`, error.message); return { error: { message: error.message, }, }; } else { console.error(`Unknown error scraping ${url}`); return { error: { message: "An unknown error occurred", }, }; } } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/djannot/puppeteer-vision-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

webpage-scraper.ts•2.98 KiB