MCP Fetch

by JeremyNixon
Verified
#!/usr/bin/env node import { Server } from "@modelcontextprotocol/sdk/server/index.js" import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js" import { z } from "zod" import { zodToJsonSchema } from "zod-to-json-schema" import fetch from "node-fetch" import { JSDOM } from "jsdom" import { Readability } from "@mozilla/readability" import TurndownService from "turndown" import { exec } from "node:child_process" import { promisify } from "node:util" import sharp from "sharp" const execAsync = promisify(exec) function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)) } interface Image { src: string alt: string data?: Buffer } interface ExtractedContent { markdown: string images: Image[] } const DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)" const DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)" const FetchArgsSchema = z.object({ url: z.string().url(), maxLength: z.number().positive().max(1000000).default(20000), startIndex: z.number().min(0).default(0), raw: z.boolean().default(false), }) const ListToolsSchema = z.object({ method: z.literal("tools/list"), }) const CallToolSchema = z.object({ method: z.literal("tools/call"), params: z.object({ name: z.string(), arguments: z.record(z.unknown()).optional(), }), }) function extractContentFromHtml( html: string, url: string, ): ExtractedContent | string { const dom = new JSDOM(html, { url }) const reader = new Readability(dom.window.document) const article = reader.parse() if (!article || !article.content) { return "<e>Page failed to be simplified from HTML</e>" } // Extract images from the article content only const articleDom = new JSDOM(article.content) const imgElements = Array.from( articleDom.window.document.querySelectorAll("img"), ) const images: Image[] = imgElements.map((img) => { const src = img.src const alt = img.alt || "" return { src, alt } }) const turndownService = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced", }) const markdown = turndownService.turndown(article.content) return { markdown, images } } async function fetchImages( images: Image[], ): Promise<(Image & { data: Buffer })[]> { const fetchedImages = [] for (const img of images) { const response = await fetch(img.src) if (!response.ok) { throw new Error( `Failed to fetch image ${img.src}: status ${response.status}`, ) } const buffer = await response.arrayBuffer() const imageBuffer = Buffer.from(buffer) // Check if the image is a GIF and extract first frame if animated if (img.src.toLowerCase().endsWith(".gif")) { try { const metadata = await sharp(imageBuffer).metadata() if (metadata.pages && metadata.pages > 1) { // Extract first frame of animated GIF const firstFrame = await sharp(imageBuffer, { page: 0 }) .png() .toBuffer() fetchedImages.push({ ...img, data: firstFrame, }) continue } } catch (error) { console.warn(`Warning: Failed to process GIF image ${img.src}:`, error) } } fetchedImages.push({ ...img, data: imageBuffer, }) } return fetchedImages } async function commandExists(cmd: string): Promise<boolean> { try { await execAsync(`which ${cmd}`) return true } catch { return false } } async function getImageDimensions( buffer: Buffer, ): Promise<{ width: number; height: number; size: number }> { const metadata = await sharp(buffer).metadata() return { width: metadata.width || 0, height: metadata.height || 0, size: buffer.length, } } async function addImagesToClipboard( images: (Image & { data: Buffer })[], ): Promise<void> { if (images.length === 0) return const hasPbcopy = await commandExists("pbcopy") const hasOsascript = await commandExists("osascript") if (!hasPbcopy) { throw new Error( "'pbcopy' command not found. This tool works on macOS only by default.", ) } if (!hasOsascript) { throw new Error( "'osascript' command not found. Required to set clipboard with images.", ) } const MAX_HEIGHT = 8000 const MAX_SIZE_BYTES = 30 * 1024 * 1024 // 30MB const MAX_IMAGES_PER_GROUP = 6 // 1グループあたりの最大画像数 const tempDir = "/tmp/mcp-fetch-images" await execAsync(`mkdir -p ${tempDir} && rm -f ${tempDir}/*.png`) // 画像をグループ化して処理 let currentGroup: Buffer[] = [] let currentHeight = 0 let currentSize = 0 const processGroup = async (group: Buffer[]) => { if (group.length === 0) return // 垂直方向に画像を結合 const mergedImagePath = `${tempDir}/merged_${Date.now()}.png` await sharp({ create: { width: Math.max( ...(await Promise.all( group.map(async (buffer) => { const metadata = await sharp(buffer).metadata() return metadata.width || 0 }), )), ), height: ( await Promise.all( group.map(async (buffer) => { const metadata = await sharp(buffer).metadata() return metadata.height || 0 }), ) ).reduce((a, b) => a + b, 0), channels: 4, background: { r: 255, g: 255, b: 255, alpha: 1 }, }, }) .composite( await Promise.all( group.map(async (buffer, index) => { const previousHeights = await Promise.all( group.slice(0, index).map(async (b) => { const metadata = await sharp(b).metadata() return metadata.height || 0 }), ) const top = previousHeights.reduce((a, b) => a + b, 0) return { input: buffer, top, left: 0, } }), ), ) .png() .toFile(mergedImagePath) const { stderr } = await execAsync( `osascript -e 'set the clipboard to (read (POSIX file "${mergedImagePath}") as «class PNGf»)'`, ) if (stderr?.trim()) { const lines = stderr.trim().split("\n") const nonWarningLines = lines.filter((line) => !line.includes("WARNING:")) if (nonWarningLines.length > 0) { throw new Error("Failed to copy merged image to clipboard.") } } await sleep(500) const pasteScript = `osascript -e 'tell application "System Events" to keystroke "v" using command down'` const { stderr: pasteStderr } = await execAsync(pasteScript) if (pasteStderr?.trim()) { const lines = pasteStderr.trim().split("\n") const nonWarningLines = lines.filter((line) => !line.includes("WARNING:")) if (nonWarningLines.length > 0) { console.warn("Failed to paste merged image.") } } await sleep(500) } for (const img of images) { const { height, size } = await getImageDimensions(img.data) if ( currentGroup.length >= MAX_IMAGES_PER_GROUP || currentHeight + height > MAX_HEIGHT || currentSize + size > MAX_SIZE_BYTES ) { // 現在のグループを処理 await processGroup(currentGroup) // 新しいグループを開始 currentGroup = [img.data] currentHeight = height currentSize = size } else { currentGroup.push(img.data) currentHeight += height currentSize += size } } // 残りのグループを処理 await processGroup(currentGroup) await execAsync(`rm -rf ${tempDir}`) } interface FetchResult { content: string prefix: string imageUrls?: string[] } async function fetchUrl( url: string, userAgent: string, forceRaw = false, ): Promise<FetchResult> { const response = await fetch(url, { headers: { "User-Agent": userAgent }, }) if (!response.ok) { throw new Error(`Failed to fetch ${url} - status code ${response.status}`) } const contentType = response.headers.get("content-type") || "" const text = await response.text() const isHtml = text.toLowerCase().includes("<html") || contentType.includes("text/html") if (isHtml && !forceRaw) { const result = extractContentFromHtml(text, url) if (typeof result === "string") { return { content: result, prefix: "", } } const { markdown, images } = result const fetchedImages = await fetchImages(images) const imageUrls = fetchedImages.map((img) => img.src) if (fetchedImages.length > 0) { try { await addImagesToClipboard(fetchedImages) return { content: markdown, prefix: `Found and processed ${fetchedImages.length} images. Images have been merged vertically (max 6 images per group) and copied to your clipboard. Please paste (Cmd+V) to combine with the retrieved content.\n`, imageUrls, } } catch (err) { return { content: markdown, prefix: `Found ${fetchedImages.length} images but failed to copy them to the clipboard.\nError: ${err instanceof Error ? err.message : String(err)}\n`, imageUrls, } } } return { content: markdown, prefix: "", imageUrls, } } return { content: text, prefix: `Content type ${contentType} cannot be simplified to markdown, but here is the raw content:\n`, } } // Server setup const server = new Server( { name: "mcp-fetch", version: "1.0.0", }, { capabilities: { tools: {}, }, }, ) interface RequestHandlerExtra { signal: AbortSignal } server.setRequestHandler( ListToolsSchema, async (request: { method: "tools/list" }, extra: RequestHandlerExtra) => { const tools = [ { name: "fetch", description: "Retrieves URLs from the Internet and extracts their content as markdown. If images are found, they are merged vertically (max 6 images per group, max height 8000px, max size 30MB per group) and copied to the clipboard of the user's host machine. You will need to paste (Cmd+V) to insert the images.", inputSchema: zodToJsonSchema(FetchArgsSchema), }, ] return { tools } }, ) server.setRequestHandler( CallToolSchema, async ( request: { method: "tools/call" params: { name: string; arguments?: Record<string, unknown> } }, extra: RequestHandlerExtra, ) => { try { const { name, arguments: args } = request.params if (name !== "fetch") { throw new Error(`Unknown tool: ${name}`) } const parsed = FetchArgsSchema.safeParse(args) if (!parsed.success) { throw new Error(`Invalid arguments: ${parsed.error}`) } const { content, prefix, imageUrls } = await fetchUrl( parsed.data.url, DEFAULT_USER_AGENT_AUTONOMOUS, parsed.data.raw, ) let finalContent = content if (finalContent.length > parsed.data.maxLength) { finalContent = finalContent.slice( parsed.data.startIndex, parsed.data.startIndex + parsed.data.maxLength, ) finalContent += `\n\n<e>Content truncated. Call the fetch tool with a start_index of ${ parsed.data.startIndex + parsed.data.maxLength } to get more content.</e>` } let imagesSection = "" if (imageUrls && imageUrls.length > 0) { imagesSection = "\n\nImages found in article:\n" + imageUrls.map((url) => `- ${url}`).join("\n") } return { content: [ { type: "text", text: `${prefix}Contents of ${parsed.data.url}:\n${finalContent}${imagesSection}`, }, ], } } catch (error) { return { content: [ { type: "text", text: `Error: ${error instanceof Error ? error.message : String(error)}`, }, ], isError: true, } } }, ) // Start server async function runServer() { const transport = new StdioServerTransport() await server.connect(transport) } runServer().catch((error) => { process.stderr.write(`Fatal error running server: ${error}\n`) process.exit(1) })