EnriWeb

EnriWeb
src
tools

WebFetchTool.ts•16.7 KiB

/** * WEB FETCH TOOL * * Implements the `web_fetch` MCP tool by delegating to EnriProxy. * * @module tools/WebFetchTool */ import type { EnriProxyClient } from "../client/EnriProxyClient.js"; import { assertHttpUrl, assertNonEmptyString, assertObject, optionalInt, optionalString } from "../shared/validation.js"; /** * Default number of characters to include in the human-readable MCP output. * * @remarks * The full fetched payload is still available in `structuredContent.content`, * but MCP clients may enforce tool-result token limits. Keeping the human * output short avoids duplication and reduces the chance of overflows. */ const DEFAULT_TEXT_PREVIEW_CHARS = 2000; /** * Tool parameters for `web_fetch`. */ export interface WebFetchToolParams { /** * URL to fetch. */ readonly url?: string; /** * Cursor identifier returned by a previous call. */ readonly cursor?: string; /** * Optional prompt for extraction. */ readonly prompt?: string; /** * Maximum content length in characters. */ readonly maxChars?: number; /** * Offset in characters for cursor pagination. */ readonly offsetChars?: number; /** * Limit in characters for cursor pagination. */ readonly limitChars?: number; } /** * Tool result for `web_fetch`. */ export interface WebFetchToolResult extends Record<string, unknown> { /** * Fetched content. */ readonly content: string; /** * HTTP status code. */ readonly status: number; /** * Content type of the response. */ readonly content_type: string; /** * Whether content was truncated. */ readonly truncated: boolean; /** * URL that was fetched. */ readonly url: string; /** * Cursor identifier for pagination (when available). */ readonly cursor?: string; /** * Offset in characters (cursor reads). */ readonly offset_chars?: number; /** * Limit in characters (cursor reads). */ readonly limit_chars?: number; /** * Total captured characters (cursor reads). */ readonly total_chars?: number; /** * Whether more content exists beyond this slice. */ readonly has_more?: boolean; /** * Whether content was reduced into an excerpt pack. */ readonly reduced?: boolean; /** * Whether the upstream fetch was truncated. */ readonly fetched_truncated?: boolean; } /** * Dependencies for {@link WebFetchTool}. */ export interface WebFetchToolDeps { /** * Creates an EnriProxy client with a base URL, API key, and timeout. * * @param serverUrl - EnriProxy URL * @param apiKey - EnriProxy API key * @param timeoutMs - Timeout in ms * @returns Client instance */ readonly createClient: (serverUrl: string, apiKey: string, timeoutMs: number) => EnriProxyClient; /** * Default EnriProxy server URL. */ readonly defaultServerUrl: string; /** * Default EnriProxy API key. */ readonly defaultApiKey: string; /** * Default timeout in milliseconds. */ readonly defaultTimeoutMs: number; /** * Default maximum content length in characters returned by the tool when * `max_chars` is not provided. */ readonly defaultMaxChars: number; } /** * MCP tool that fetches URL content via EnriProxy. */ export class WebFetchTool { /** * Readme file candidates commonly used in GitHub repositories. */ private static readonly README_FILENAMES: readonly string[] = [ "README.md", "readme.md" ]; /** * Default branches to try when resolving GitHub raw README URLs. */ private static readonly README_BRANCHES: readonly string[] = ["main", "master"]; /** * Tool dependencies. */ private readonly deps: WebFetchToolDeps; /** * Creates a new {@link WebFetchTool}. * * @param deps - Tool dependencies */ public constructor(deps: WebFetchToolDeps) { this.deps = deps; } /** * Gets the configured default max chars for web fetch results. * * @returns Default max chars */ public getDefaultMaxChars(): number { return this.deps.defaultMaxChars; } /** * Validates raw MCP tool arguments. * * @param raw - Raw tool arguments * @returns Validated parameters */ public parseParams(raw: unknown): WebFetchToolParams { const obj = assertObject(raw, "arguments"); const cursorRaw = optionalString(obj["cursor"]); const cursor = cursorRaw?.trim() ? cursorRaw.trim() : undefined; const urlRaw = optionalString(obj["url"]); const url = urlRaw?.trim() ? assertHttpUrl(urlRaw.trim(), "url") : undefined; if (!cursor && !url) { throw new Error("web_fetch requires either 'url' or 'cursor'."); } const prompt = optionalString(obj["prompt"]); const maxChars = optionalInt(obj["max_chars"]); const offsetCharsRaw = optionalInt(obj["offset_chars"]) ?? optionalInt(obj["offset"]); const limitCharsRaw = optionalInt(obj["limit_chars"]) ?? optionalInt(obj["limit"]); if (maxChars !== undefined && maxChars < 1) { throw new Error("max_chars must be positive."); } const offsetChars = cursor ? offsetCharsRaw : undefined; let limitChars: number | undefined = cursor ? limitCharsRaw : undefined; if (offsetChars !== undefined && offsetChars < 0) { throw new Error("offset must be non-negative."); } if (limitChars !== undefined) { if (limitChars < 0) { throw new Error("limit must be positive."); } if (limitChars === 0) { limitChars = undefined; } } return { url, cursor, prompt, maxChars, offsetChars, limitChars }; } /** * Executes the web fetch tool. * * @param params - Validated parameters * @returns Tool result */ public async execute(params: WebFetchToolParams): Promise<WebFetchToolResult> { const serverUrl = assertHttpUrl(this.deps.defaultServerUrl, "ENRIPROXY_URL"); const apiKey = assertNonEmptyString(this.deps.defaultApiKey, "ENRIPROXY_API_KEY"); const client = this.deps.createClient(serverUrl, apiKey, this.deps.defaultTimeoutMs); const effectiveMaxChars = typeof params.maxChars === "number" ? params.maxChars : this.deps.defaultMaxChars; if (typeof params.cursor === "string" && params.cursor.trim()) { const response = await client.webFetch({ cursor: params.cursor.trim(), offsetChars: params.offsetChars, limitChars: params.limitChars, maxChars: effectiveMaxChars }); const resolvedUrl = response.url ?? params.url ?? "(cursor)"; return { content: response.content, status: response.status, content_type: response.content_type, truncated: response.truncated, url: resolvedUrl, cursor: response.cursor, offset_chars: response.offset_chars, limit_chars: response.limit_chars, total_chars: response.total_chars, has_more: response.has_more, reduced: response.reduced, fetched_truncated: response.fetched_truncated }; } if (!params.url) { throw new Error("web_fetch requires a URL when cursor is not provided."); } const url: string = params.url; const urlParams: WebFetchToolParams & { readonly url: string } = { ...params, url }; const npmResult = await this.tryExecuteNpmPackageFetch( urlParams, client, effectiveMaxChars ); if (npmResult) { return npmResult; } const response = await client.webFetch({ url, prompt: params.prompt, maxChars: effectiveMaxChars }); return { content: response.content, status: response.status, content_type: response.content_type, truncated: response.truncated, url: response.url ?? url, cursor: response.cursor, total_chars: response.total_chars, has_more: response.has_more, reduced: response.reduced, fetched_truncated: response.fetched_truncated }; } /** * Attempts to provide a higher-quality fetch for npm package pages. * * @param params - Tool parameters * @param client - EnriProxy client * @param maxChars - Maximum content length to return * @returns Tool result if the URL is an npm package page, otherwise null */ private async tryExecuteNpmPackageFetch( params: WebFetchToolParams & { readonly url: string }, client: EnriProxyClient, maxChars: number ): Promise<WebFetchToolResult | null> { const requestedUrl = new URL(params.url); const packageName = this.tryParseNpmPackageName(requestedUrl); if (!packageName) { return null; } const metadataUrl = `https://registry.npmjs.org/${packageName}/latest`; const metadataResponse = await client.webFetch({ url: metadataUrl, maxChars: Math.min(maxChars, 20000) }); if (metadataResponse.status < 200 || metadataResponse.status >= 300) { return null; } const metadata = this.tryParseJsonObject(metadataResponse.content); if (!metadata) { return null; } const name = this.tryGetString(metadata["name"]) ?? packageName; const version = this.tryGetString(metadata["version"]); const description = this.tryGetString(metadata["description"]); const license = this.tryGetString(metadata["license"]); const repositoryUrl = this.tryGetRepositoryUrl(metadata["repository"]); const homepageUrl = this.tryGetString(metadata["homepage"]); let gitHubRepoUrl: string | null = null; if (repositoryUrl) { gitHubRepoUrl = this.tryNormalizeGitHubRepoUrl(repositoryUrl); } let readmeText: string | null = null; let readmeTruncated = false; if (gitHubRepoUrl) { const readmeResult = await this.tryFetchGitHubReadme( client, gitHubRepoUrl, maxChars ); if (readmeResult) { readmeText = readmeResult.content; readmeTruncated = readmeResult.truncated; } } const lines: string[] = []; lines.push(`# ${name}`); lines.push(""); lines.push(`Requested URL: ${params.url}`); lines.push(""); if (description) { lines.push(`Description: ${description}`); } if (version) { lines.push(`Latest version: ${version}`); } if (license) { lines.push(`License: ${license}`); } if (homepageUrl) { lines.push(`Homepage: ${homepageUrl}`); } if (gitHubRepoUrl) { lines.push(`Repository: ${gitHubRepoUrl}`); } else if (repositoryUrl) { lines.push(`Repository: ${repositoryUrl}`); } if (readmeText) { lines.push(""); lines.push("## README"); lines.push(""); lines.push(readmeText); } const combined = lines.join("\n").trim() + "\n"; const shouldTrim = combined.length > maxChars; const content = shouldTrim ? combined.slice(0, maxChars) : combined; return { content, status: 200, content_type: "text/markdown", truncated: shouldTrim || readmeTruncated || metadataResponse.truncated, url: params.url }; } /** * Attempts to parse an npm package name from an npmjs.com package page URL. * * @param url - Parsed URL * @returns npm package name (e.g. "chalk" or "@scope/name") or null */ private tryParseNpmPackageName(url: URL): string | null { const hostname = url.hostname.toLowerCase(); if (hostname !== "www.npmjs.com" && hostname !== "npmjs.com") { return null; } const segments = url.pathname.split("/").filter(Boolean); if (segments.length < 2) { return null; } if (segments[0] !== "package") { return null; } const first = segments[1]; if (!first) { return null; } if (first.startsWith("@")) { const second = segments[2]; if (!second) { return null; } return `${first}/${second}`; } return first; } /** * Tries to parse a JSON object from a string. * * @param input - JSON string * @returns Parsed object or null */ private tryParseJsonObject(input: string): Record<string, unknown> | null { try { const parsed: unknown = JSON.parse(input); if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) { return null; } return parsed as Record<string, unknown>; } catch { return null; } } /** * Extracts a string from an unknown value if possible. * * @param value - Unknown input * @returns Trimmed string or null */ private tryGetString(value: unknown): string | null { if (typeof value !== "string") { return null; } const trimmed = value.trim(); return trimmed.length > 0 ? trimmed : null; } /** * Extracts a repository URL from npm metadata. * * @param repository - Repository field value * @returns Normalized URL string or null */ private tryGetRepositoryUrl(repository: unknown): string | null { if (typeof repository === "string") { return this.normalizeRepositoryUrl(repository); } if (typeof repository === "object" && repository !== null && !Array.isArray(repository)) { const record = repository as Record<string, unknown>; const rawUrl = this.tryGetString(record["url"]); if (!rawUrl) { return null; } return this.normalizeRepositoryUrl(rawUrl); } return null; } /** * Normalizes common git repository URL schemes into an https URL. * * @param rawUrl - Raw repository URL from metadata * @returns Normalized URL string or null */ private normalizeRepositoryUrl(rawUrl: string): string | null { let urlText = rawUrl.trim(); if (urlText.startsWith("git+")) { urlText = urlText.slice("git+".length); } if (urlText.startsWith("git://")) { urlText = `https://${urlText.slice("git://".length)}`; } if (urlText.endsWith(".git")) { urlText = urlText.slice(0, -".git".length); } try { const parsed = new URL(urlText); if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { return null; } return parsed.toString(); } catch { return null; } } /** * Normalizes a GitHub repository URL to the canonical https form. * * @param repositoryUrl - Repository URL * @returns Canonical GitHub repo URL (https://github.com/{owner}/{repo}) or null */ private tryNormalizeGitHubRepoUrl(repositoryUrl: string): string | null { try { const parsed = new URL(repositoryUrl); if (parsed.hostname.toLowerCase() !== "github.com") { return null; } const segments = parsed.pathname.split("/").filter(Boolean); if (segments.length < 2) { return null; } const owner = segments[0]; const repo = segments[1]; if (!owner || !repo) { return null; } return `https://github.com/${owner}/${repo}`; } catch { return null; } } /** * Attempts to fetch a GitHub repository README via raw.githubusercontent.com. * * @param client - EnriProxy client * @param githubRepoUrl - Canonical GitHub repo URL * @param maxChars - Maximum content length * @returns README content if found, otherwise null */ private async tryFetchGitHubReadme( client: EnriProxyClient, githubRepoUrl: string, maxChars: number ): Promise<{ content: string; truncated: boolean } | null> { const parsed = new URL(githubRepoUrl); const segments = parsed.pathname.split("/").filter(Boolean); if (segments.length < 2) { return null; } const owner = segments[0]; const repo = segments[1]; if (!owner || !repo) { return null; } for (const branch of WebFetchTool.README_BRANCHES) { for (const filename of WebFetchTool.README_FILENAMES) { const url = `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${filename}`; const response = await client.webFetch({ url, maxChars }); if (response.status >= 200 && response.status < 300 && response.content.trim().length > 0) { return { content: response.content, truncated: response.truncated }; } } } return null; } /** * Formats results for MCP text output. * * @param result - Tool result * @returns Formatted text */ public formatOutput(result: WebFetchToolResult): string { const truncatedNote = result.truncated ? " [TRUNCATED]" : ""; const previewChars = Math.min(DEFAULT_TEXT_PREVIEW_CHARS, result.content.length); const preview = result.content.slice(0, previewChars); const header = `Fetched ${result.url} (${result.content_type}, ${result.content.length} chars)${truncatedNote}.`; const previewNote = previewChars < result.content.length ? `\n\nPreview (first ${previewChars} chars):\n\n` : "\n\nContent:\n\n"; return header + previewNote + preview; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Bedolla/EnriWeb'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

WebFetchTool.ts•16.7 KiB