Skip to main content
Glama

mcp-web

by TsFreddie
index.ts23.7 kB
#!/usr/bin/env bun import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import * as cheerio from "cheerio"; import * as pureimage from "pureimage"; import { Readable, PassThrough } from "stream"; import { readFile } from "fs/promises"; import path from "path"; import http from "http"; import { randomBytes } from "crypto"; import { parseHTML } from "linkedom"; import { Defuddle } from "defuddle/node"; const supportedRegions: Record<string, string> = { global: "", argentina: "ar-es", australia: "au-en", austria: "at-de", belgium_fr: "be-fr", belgium_nl: "be-nl", brazil: "br-pt", bulgaria: "bg-bg", canada_en: "ca-en", canada_fr: "ca-fr", catalonia: "ct-ca", chile: "cl-es", china: "cn-zh", colombia: "co-es", croatia: "hr-hr", czech_republic: "cz-cs", denmark: "dk-da", estonia: "ee-et", finland: "fi-fi", france: "fr-fr", germany: "de-de", greece: "gr-el", hong_kong: "hk-tzh", hungary: "hu-hu", iceland: "is-is", india_en: "in-en", indonesia_en: "id-en", ireland: "ie-en", israel_en: "il-en", italy: "it-it", japan: "jp-jp", korea: "kr-kr", latvia: "lv-lv", lithuania: "lt-lt", malaysia_en: "my-en", mexico: "mx-es", netherlands: "nl-nl", new_zealand: "nz-en", norway: "no-no", pakistan_en: "pk-en", peru: "pe-es", philippines_en: "ph-en", poland: "pl-pl", portugal: "pt-pt", romania: "ro-ro", russia: "ru-ru", saudi_arabia: "xa-ar", singapore: "sg-en", slovakia: "sk-sk", slovenia: "sl-sl", south_africa: "za-en", spain_ca: "es-ca", spain_es: "es-es", sweden: "se-sv", switzerland_de: "ch-de", switzerland_fr: "ch-fr", taiwan: "tw-tzh", thailand_en: "th-en", turkey: "tr-tr", us_english: "us-en", us_spanish: "us-es", ukraine: "ua-uk", united_kingdom: "uk-en", vietnam_en: "vn-en", }; const supportedDateFrame: Record<string, string> = { any: "", past_day: "d", past_week: "w", past_month: "m", past_year: "y", }; // Search state management let searchState: { currentQuery: string | null; currentPage: number; nextFormData: string | null; } = { currentQuery: null, currentPage: 1, nextFormData: null, }; // CAPTCHA challenge state let captchaState: { challenge: string | null; images: string[] | null; action: string | null; submitValue: string | null; checkboxNames: string[] | null; } = { challenge: null, images: null, action: null, submitValue: null, checkboxNames: null, }; let captchaImages = new Map<string, Buffer>(); const main = async () => { const hostname = "127.0.0.1"; const captchaServer = await new Promise<any>((resolve) => { // Basic HTTP server just for serving the CAPTCHA image const server = http.createServer((req, res) => { const image = captchaImages.get(req.url || ""); if (image) { res.statusCode = 200; res.setHeader("Content-Type", "image/png"); res.end(image); } else { res.statusCode = 404; res.end(); } }); server.listen(0, hostname, () => { resolve(server); }); }); const locale = Intl.DateTimeFormat().resolvedOptions().locale; const lang = locale.split("-")[0]; const defaultHeaders = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": !lang ? "en-US,en;q=0.9" : `${locale},${lang};q=0.9`, "Accept-Encoding": "gzip, deflate, br, zstd", Referer: "https://html.duckduckgo.com/", "Sec-GPC": "1", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "same-origin", DNT: "1", Priority: "u=0, i", Pragma: "no-cache", "Cache-Control": "no-cache", TE: "trailers", }; // Shared function for parsing search results async function parseSearchResults(html: string) { // Parse the HTML with Cheerio const $ = cheerio.load(html); // Extract search results const results: { title: string; url: string; description: string }[] = []; // Check if we got a CAPTCHA challenge const challengeForm = $("#challenge-form"); if (challengeForm.length > 0) { // Extract CAPTCHA details const action = challengeForm.attr("action") || ""; const submitButton = challengeForm.find( "button[name='challenge-submit']" ); const submitValue = submitButton.attr("value") || ""; // Extract image URLs and checkbox names const images: string[] = []; const checkboxNames: string[] = []; challengeForm.find(".anomaly-modal__image").each((i, elem) => { const src = $(elem).attr("src") || ""; // Convert relative path to absolute URL const absoluteUrl = new URL(src, "https://duckduckgo.com").href; images.push(absoluteUrl); // Extract checkbox name from the image filename const filename = src.split("/").pop()?.replace(".jpg", "") || ""; if (filename) { checkboxNames.push(`image-check_${filename}`); } }); const challenge = challengeForm .find(".anomaly-modal__instructions") .text(); // Store CAPTCHA state captchaState = { challenge, images, action, submitValue, checkboxNames, }; return { mode: "captcha" as const, error: "Search blocked by CAPTCHA challenge.", captcha: { challenge, images, }, } as const; } // Check if we got an error form const errorForm = $("#error-form"); if (errorForm.length > 0) { const instruction = errorForm .find(".anomaly-modal__error-instructions") .first(); return { mode: "captcha-failed" as const, error: instruction.text().trim(), next: errorForm.attr("action") || "", } as const; } // Check if we got an success form const successForm = $("#success-form"); if (successForm.length > 0) { return { mode: "captcha-success" as const, } as const; } const resultsElement = $(".results"); if (resultsElement.length == 0) { // Not a result page, can't display results return { mode: "unknown" as const, } as const; } $(".result").each((i: number, elem: any) => { const $result = $(elem); const titleElement = $result.find(".result__a").first(); const urlElement = $result.find(".result__a").first(); const descriptionElement = $result.find(".result__snippet").first(); if (titleElement.length > 0) { const title = titleElement.text().trim(); const url = urlElement.attr("href") || ""; const description = descriptionElement.length > 0 ? descriptionElement.text().trim() : ""; if (title) { results.push({ title, url, description, }); } } }); // Look for pagination forms const next = $("form>[value='Next']").parent(); const hasMore = next.length; if (!hasMore) { return { mode: "results" as const, hasMore, results, } as const; } // Make the next page request const inputs = next.find("input"); const formData = new URLSearchParams(); for (const input of inputs) { if (input.attribs.name) { formData.append(input.attribs.name, input.attribs.value || ""); } } searchState.nextFormData = formData.toString(); return { mode: "results" as const, hasMore, results, } as const; } const overlay = async () => { // load image const file = await readFile( path.join(import.meta.dir, "assets/captcha-overlay.png") ); const readable = new Readable(); readable.push(file); readable.push(null); const image = await pureimage.decodePNGFromStream(readable); return image; }; const dataToResult = async ( data: Awaited<ReturnType<typeof parseSearchResults>>, query: string ): Promise<{ content: Array< | { type: "text"; text: string; } | { type: "image"; data: string; mimeType: string; } >; isError?: boolean; }> => { if (data.mode === "captcha-failed") { return { content: [ { type: "text", text: `Captcha failed. Notify user to retry giving the captcha result.\n\n${data.error}.`, }, ], isError: true, }; } if (data.mode === "captcha-success") { // search again return { content: [ { type: "text", text: `Captcha successfully solved. You should call "search" tool again.`, }, ], }; } if (data.mode === "unknown") { // error return { content: [ { type: "text", text: `Search can not be performed due to unknown error.`, }, ], }; } if (data.captcha) { // Check if we got a CAPTCHA challenge // Download all images in parallel const imageBuffers: Buffer[] = await Promise.all( data.captcha.images.map(async (imageUrl) => { if (!imageUrl) return null; try { const response = await fetch(imageUrl); if (response.ok) { const arrayBuffer = await response.arrayBuffer(); return Buffer.from(arrayBuffer); } } catch (error) { console.error(`Error downloading image ${imageUrl}:`, error); } return null; }) ).then((buffers) => buffers.filter((buffer) => buffer !== null)); // Check if we have all 9 images if (imageBuffers.length === 9) { // Create a 3x3 grid of images // Each image is 100x100 pixels based on the CAPTCHA const tileSize = 256; const compositeImages = []; // Create the composite image for (let row = 0; row < 3; row++) { for (let col = 0; col < 3; col++) { const index = row * 3 + col; compositeImages.push({ input: imageBuffers[index], top: row * tileSize, left: col * tileSize, }); } } // Create a blank canvas for the composite image const canvas = pureimage.make(tileSize * 3, tileSize * 3); const ctx = canvas.getContext("2d"); // Fill with white background ctx.fillStyle = "white"; ctx.fillRect(0, 0, tileSize * 3, tileSize * 3); // Composite images onto canvas for (let row = 0; row < 3; row++) { for (let col = 0; col < 3; col++) { const index = row * 3 + col; const readable = new Readable(); readable.push(imageBuffers[index]); readable.push(null); const img = await pureimage.decodeJPEGFromStream(readable); ctx.drawImage( img, 0, 0, img.width, img.height, col * tileSize, row * tileSize, tileSize, tileSize ); // Draw index number ctx.fillStyle = "red"; ctx.font = "20px Arial"; ctx.fillText( index.toString(), col * tileSize - 5, row * tileSize - 25 ); } } // Overlay image const overlayImage = await overlay(); ctx.drawImage(overlayImage, 0, 0, tileSize * 3, tileSize * 3); // Convert to PNG buffer const passThrough = new PassThrough(); const chunks: Buffer[] = []; passThrough.on("data", (chunk) => chunks.push(chunk)); await pureimage.encodePNGToStream(canvas, passThrough); passThrough.end(); const buffer = Buffer.concat(chunks); const imageUrl = `/${randomBytes(16).toString("hex")}.png`; captchaImages.set(imageUrl, buffer); // available for 5 minutes setTimeout(() => captchaImages.delete(imageUrl), 5 * 60 * 1000); return { content: [ { type: "text", text: `Search blocked by CAPTCHA challenge.\n\nAsk user to solve CAPTCHA: "${ data.captcha.challenge }". Inform user that the search requires CAPTCHA to be solved. Inform user to check this image "http://${hostname}:${ (captchaServer.address() as any).port }${imageUrl}" and reply the result numbers selected from 1 to 9. Do not attempt to solve it for user or provide suggestions.\n\nThen use the "solve_captcha" tool with the list of result numbers.`, }, ], }; } else { return { content: [ { type: "text", text: `Search blocked by DuckDuckGo, this search cannot be performed.`, }, ], }; } } return { content: [ { type: "text", text: `Found ${ data.results?.length || 0 } results for query "${query}". ${ data.hasMore ? "More pages available via search_next tool. Only use it if you have yet to obtain enough information." : "No more pages available." }\n\nResults:\n${ data.results ?.map((r) => `(${r.title})[${r.url}]\n${r.description}`) .join("\n\n") || "No results found." }`, }, ], }; }; // Create an MCP server with stdio transport const server = new McpServer({ name: "mcp-web", version: "1.0.0", }); // Register the fetch tool server.registerTool( "fetch", { title: "Fetch URL", description: "Fetches the content of a URL and returns the extracted article content. Only supports GET requests. Not suitable for debugging HTML.", inputSchema: { url: z.string().url().describe("The URL to read about"), headers: z .record(z.string()) .optional() .describe("Optional headers to include in the request"), }, }, async ({ url, headers }) => { try { // Parse url const origin = new URL(url).origin; // Merge custom headers with default headers const mergedHeaders = { ...defaultHeaders, Origin: origin, ...headers }; // Make the fetch request const response = await fetch(url, { method: "GET", headers: mergedHeaders, }); // Get the HTML content const html = await response.text(); const startTime = Date.now(); const dom = parseHTML(html); // remove all iframes const iframes = dom.document.querySelectorAll("iframe"); for (const iframe of iframes) { iframe.remove(); } // remove all svgs const svgs = dom.document.querySelectorAll("svg"); for (const svg of svgs) { svg.remove(); } // remove all data URIs const dataURIs = dom.document.querySelectorAll("[src^='data:']"); for (const dataURI of dataURIs) { dataURI.setAttribute("src", ""); } const article = await Defuddle(dom as any, url, { markdown: true, }); return { content: [ { type: "text", text: article.content, }, ], }; } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; return { content: [ { type: "text", text: `Error fetching URL: ${errorMessage}` }, ], isError: true, }; } } ); // Register the search tool server.registerTool( "search", { title: "Search DuckDuckGo", description: "Searches DuckDuckGo and returns parsed results. Convert user's natrual language to keywords if needed. If user specifies a region, make sure to search keywords in that region's language. Starts from page 1 every time it is called. Use the fetch tool on result URLs to read more about them.", inputSchema: { query: z.string().describe("The search query"), region: z .enum(Object.keys(supportedRegions) as [string, ...string[]]) .optional() .describe("The region to search in"), dateFrame: z .enum(Object.keys(supportedDateFrame) as [string, ...string[]]) .optional() .describe("The date frame to search in"), }, }, async ({ query, region, dateFrame }) => { try { // Reset search state for new search searchState = { currentQuery: query, currentPage: 1, nextFormData: null, }; const searchParams = new URLSearchParams(); searchParams.set("q", query); if (region && supportedRegions[region]) { searchParams.set("kl", supportedRegions[region]); } if (dateFrame && supportedDateFrame[dateFrame]) { searchParams.set("df", supportedDateFrame[dateFrame]); } searchParams.set("b", ""); // Make the search request with proper headers const response = await fetch("https://html.duckduckgo.com/html/", { method: "POST", headers: { ...defaultHeaders, "Content-Type": "application/x-www-form-urlencoded", Origin: "https://html.duckduckgo.com", }, body: searchParams.toString(), }); if (!response.ok) { return { content: [ { type: "text", text: `Error fetching search results: ${response.status} ${response.statusText}`, }, ], isError: true, }; } const html = await response.text(); // Parse search results const data = await parseSearchResults(html); const result = await dataToResult(data, query); return result; } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; return { content: [ { type: "text", text: `Error performing search: ${errorMessage}` }, ], isError: true, }; } } ); server.registerTool( "search_next", { title: "Next Search Page", description: "Navigates to the next page of search results. Warning: Do not use more than 3 times as search quality degrades with excessive pagination.", }, async () => { if (!searchState.nextFormData || searchState.currentPage > 5) { return { content: [ { type: "text", text: "No next page available.", }, ], isError: true, }; } try { searchState.currentPage++; const body = searchState.nextFormData; searchState.nextFormData = null; // Make the search request with proper headers const response = await fetch("https://html.duckduckgo.com/html/", { method: "POST", headers: { ...defaultHeaders, "Content-Type": "application/x-www-form-urlencoded", Origin: "https://html.duckduckgo.com", }, body, }); if (!response.ok) { return { content: [ { type: "text", text: `Error fetching search results: ${response.status} ${response.statusText}`, }, ], isError: true, }; } const html = await response.text(); // Parse search results const data = await parseSearchResults(html); const result = await dataToResult(data, searchState.currentQuery || ""); return result; } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; return { content: [ { type: "text", text: `Error performing search: ${errorMessage}` }, ], isError: true, }; } } ); // Register the solve_captcha tool server.registerTool( "solve_captcha", { title: "Solve CAPTCHA Challenge", description: "Solves a CAPTCHA challenge by providing the indices of images containing the requested object", inputSchema: { indices: z .array(z.number()) .describe( "Array of indices (1-9) of images that contain the requested object" ), }, }, async ({ indices }) => { try { // Check if we have a CAPTCHA challenge if ( !captchaState.challenge || !captchaState.images || !captchaState.action || !captchaState.submitValue || !captchaState.checkboxNames ) { return { content: [ { type: "text", text: "No CAPTCHA challenge is currently active.", }, ], isError: true, }; } // Build form data const formData = new URLSearchParams(); // Add the submit value formData.append("challenge-submit", captchaState.submitValue); // Add selected image checkboxes for (const index of indices) { if (index >= 1 && index <= captchaState.checkboxNames.length) { const checkboxName = captchaState.checkboxNames[index - 1]; if (checkboxName) { formData.append(checkboxName, "1"); } } } const actionUrl = new URL( captchaState.action, "https://duckduckgo.com" ); // Make the POST request to solve the CAPTCHA const response = await fetch(actionUrl, { method: "POST", headers: { ...defaultHeaders, "Content-Type": "application/x-www-form-urlencoded", Origin: "https://html.duckduckgo.com", }, body: formData.toString(), }); if (!response.ok) { return { content: [ { type: "text", text: `Error solving CAPTCHA: ${response.status} ${response.statusText}`, }, ], isError: true, }; } // Reset CAPTCHA state const html = await response.text(); // Parse search results const data = await parseSearchResults(html); return await dataToResult(data, searchState.currentQuery || ""); } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; return { content: [ { type: "text", text: `Error solving CAPTCHA: ${errorMessage}` }, ], isError: true, }; } } ); // Connect via stdio transport const transport = new StdioServerTransport(); await server.connect(transport); }; main();

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/TsFreddie/mcp-web'

If you have feedback or need assistance with the MCP directory API, please join our Discord server