Skip to main content
Glama

MCP Webscan Server

by bsmi021
FetchPageService.ts4.91 kB
import { fetchHtml, htmlToMarkdown } from '../utils/index.js'; import { logger } from '../utils/index.js'; import { FetchPageResult } from '../types/fetchPageTypes.js'; // Use specific type file import { ServiceError, ValidationError, NotFoundError } from '../utils/index.js'; // Placeholder for potential future configuration // import { ConfigurationManager } from '../config/ConfigurationManager.js'; // import { FetchPageServiceConfig } from '../types/fetchPageTypes.js'; export class FetchPageService { // private readonly config: Required<FetchPageServiceConfig>; constructor(/* config: Partial<FetchPageServiceConfig> = {} */) { // const configManager = ConfigurationManager.getInstance(); // const defaultConfig = configManager.getFetchPageServiceConfig(); // Assuming this method exists // this.config = { ...defaultConfig, ...config }; // logger.debug("FetchPageService initialized", { config: this.config }); logger.debug("FetchPageService initialized"); } /** * Fetches a web page, optionally selects content, and converts it to Markdown. * @param pageUrl - The URL of the page to fetch. * @param selector - Optional CSS selector to extract specific content. * @returns A promise resolving to an object containing the Markdown content and source info. * @throws {ValidationError} If the input URL is invalid. * @throws {NotFoundError} If the selector doesn't match any element. * @throws {ServiceError} If fetching or conversion fails. */ public async fetchAndConvertToMarkdown(pageUrl: string, selector?: string): Promise<FetchPageResult> { // Basic validation if (!pageUrl || typeof pageUrl !== 'string') { throw new ValidationError('Invalid input: pageUrl string is required.'); } if (selector && typeof selector !== 'string') { throw new ValidationError('Invalid input: selector must be a string if provided.'); } logger.info(`Starting page fetch for Markdown conversion: ${pageUrl}`, { selector }); try { const { $ } = await fetchHtml(pageUrl); logger.debug(`Successfully fetched HTML for ${pageUrl}`); let targetHtml: string | null = null; let effectiveSelector = selector; if (selector) { const selectedElement = $(selector).first(); // Get the first matching element if (selectedElement.length > 0) { targetHtml = selectedElement.html(); logger.debug(`Using selector "${selector}"`); } else { logger.warn(`Selector "${selector}" did not match any elements on ${pageUrl}. Falling back to body.`); effectiveSelector = undefined; // Clear selector if it didn't match targetHtml = $('body').html(); } } else { logger.debug(`No selector provided, using body content.`); targetHtml = $('body').html(); } if (targetHtml === null || targetHtml.trim() === '') { // Use NotFoundError if selector was provided but yielded no content, otherwise ServiceError const errorMsg = selector ? `Content extraction failed: Selector "${selector}" yielded empty content.` : `Content extraction failed: Body content is empty or could not be retrieved.`; logger.error(errorMsg, { pageUrl, selector }); if (selector) { throw new NotFoundError(errorMsg); } else { throw new ServiceError(errorMsg); } } // Convert the extracted HTML to Markdown const markdown = htmlToMarkdown(targetHtml); logger.info(`Successfully converted content from ${pageUrl} to Markdown.`); const result: FetchPageResult = { markdownContent: markdown, sourceUrl: pageUrl, selectorUsed: effectiveSelector, // Report the selector actually used (or undefined) }; return result; } catch (error) { // Handle errors from fetchHtml or htmlToMarkdown, or our own thrown errors logger.error(`Error during fetch/conversion for ${pageUrl}`, { error: error instanceof Error ? error.message : String(error), pageUrl, selector }); // Re-throw known custom errors, wrap others in ServiceError if (error instanceof ValidationError || error instanceof NotFoundError || error instanceof ServiceError) { throw error; } throw new ServiceError(`Failed to fetch or convert page ${pageUrl}: ${error instanceof Error ? error.message : String(error)}`, error); } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bsmi021/mcp-server-webscan'

If you have feedback or need assistance with the MCP directory API, please join our Discord server