SearXNG Server

Overview Schema Related Servers Score Discussions

mcp-searxng
src

url-reader.ts•7.96 KiB

import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { NodeHtmlMarkdown } from "node-html-markdown"; import { createProxyAgent } from "./proxy.js"; import { logMessage } from "./logging.js"; import { urlCache } from "./cache.js"; import { createURLFormatError, createNetworkError, createServerError, createContentError, createConversionError, createTimeoutError, createEmptyContentWarning, createUnexpectedError, type ErrorContext } from "./error-handler.js"; interface PaginationOptions { startChar?: number; maxLength?: number; section?: string; paragraphRange?: string; readHeadings?: boolean; } function applyCharacterPagination(content: string, startChar: number = 0, maxLength?: number): string { if (startChar >= content.length) { return ""; } const start = Math.max(0, startChar); const end = maxLength ? Math.min(content.length, start + maxLength) : content.length; return content.slice(start, end); } function extractSection(markdownContent: string, sectionHeading: string): string { const lines = markdownContent.split('\n'); const sectionRegex = new RegExp(`^#{1,6}\s*.*${sectionHeading}.*$`, 'i'); let startIndex = -1; let currentLevel = 0; // Find the section start for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (sectionRegex.test(line)) { startIndex = i; currentLevel = (line.match(/^#+/) || [''])[0].length; break; } } if (startIndex === -1) { return ""; } // Find the section end (next heading of same or higher level) let endIndex = lines.length; for (let i = startIndex + 1; i < lines.length; i++) { const line = lines[i]; const match = line.match(/^#+/); if (match && match[0].length <= currentLevel) { endIndex = i; break; } } return lines.slice(startIndex, endIndex).join('\n'); } function extractParagraphRange(markdownContent: string, range: string): string { const paragraphs = markdownContent.split('\n\n').filter(p => p.trim().length > 0); // Parse range (e.g., "1-5", "3", "10-") const rangeMatch = range.match(/^(\d+)(?:-(\d*))?$/); if (!rangeMatch) { return ""; } const start = parseInt(rangeMatch[1]) - 1; // Convert to 0-based index const endStr = rangeMatch[2]; if (start < 0 || start >= paragraphs.length) { return ""; } if (endStr === undefined) { // Single paragraph (e.g., "3") return paragraphs[start] || ""; } else if (endStr === "") { // Range to end (e.g., "10-") return paragraphs.slice(start).join('\n\n'); } else { // Specific range (e.g., "1-5") const end = parseInt(endStr); return paragraphs.slice(start, end).join('\n\n'); } } function extractHeadings(markdownContent: string): string { const lines = markdownContent.split('\n'); const headings = lines.filter(line => /^#{1,6}\s/.test(line)); if (headings.length === 0) { return "No headings found in the content."; } return headings.join('\n'); } function applyPaginationOptions(markdownContent: string, options: PaginationOptions): string { let result = markdownContent; // Apply heading extraction first if requested if (options.readHeadings) { return extractHeadings(result); } // Apply section extraction if (options.section) { result = extractSection(result, options.section); if (result === "") { return `Section "${options.section}" not found in the content.`; } } // Apply paragraph range filtering if (options.paragraphRange) { result = extractParagraphRange(result, options.paragraphRange); if (result === "") { return `Paragraph range "${options.paragraphRange}" is invalid or out of bounds.`; } } // Apply character-based pagination last if (options.startChar !== undefined || options.maxLength !== undefined) { result = applyCharacterPagination(result, options.startChar, options.maxLength); } return result; } export async function fetchAndConvertToMarkdown( server: Server, url: string, timeoutMs: number = 10000, paginationOptions: PaginationOptions = {} ) { const startTime = Date.now(); logMessage(server, "info", `Fetching URL: ${url}`); // Check cache first const cachedEntry = urlCache.get(url); if (cachedEntry) { logMessage(server, "info", `Using cached content for URL: ${url}`); const result = applyPaginationOptions(cachedEntry.markdownContent, paginationOptions); const duration = Date.now() - startTime; logMessage(server, "info", `Processed cached URL: ${url} (${result.length} chars in ${duration}ms)`); return result; } // Validate URL format let parsedUrl: URL; try { parsedUrl = new URL(url); } catch (error) { logMessage(server, "error", `Invalid URL format: ${url}`); throw createURLFormatError(url); } // Create an AbortController instance const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeoutMs); try { // Prepare request options with proxy support const requestOptions: RequestInit = { signal: controller.signal, }; // Add proxy dispatcher if proxy is configured // Node.js fetch uses 'dispatcher' option for proxy, not 'agent' const proxyAgent = createProxyAgent(); if (proxyAgent) { (requestOptions as any).dispatcher = proxyAgent; } let response: Response; try { // Fetch the URL with the abort signal response = await fetch(url, requestOptions); } catch (error: any) { const context: ErrorContext = { url, proxyAgent: !!proxyAgent, timeout: timeoutMs }; throw createNetworkError(error, context); } if (!response.ok) { let responseBody: string; try { responseBody = await response.text(); } catch { responseBody = '[Could not read response body]'; } const context: ErrorContext = { url }; throw createServerError(response.status, response.statusText, responseBody, context); } // Retrieve HTML content let htmlContent: string; try { htmlContent = await response.text(); } catch (error: any) { throw createContentError( `Failed to read website content: ${error.message || 'Unknown error reading content'}`, url ); } if (!htmlContent || htmlContent.trim().length === 0) { throw createContentError("Website returned empty content.", url); } // Convert HTML to Markdown let markdownContent: string; try { markdownContent = NodeHtmlMarkdown.translate(htmlContent); } catch (error: any) { throw createConversionError(error, url, htmlContent); } if (!markdownContent || markdownContent.trim().length === 0) { logMessage(server, "warning", `Empty content after conversion: ${url}`); // DON'T cache empty/failed conversions - return warning directly return createEmptyContentWarning(url, htmlContent.length, htmlContent); } // Only cache successful markdown conversion urlCache.set(url, htmlContent, markdownContent); // Apply pagination options const result = applyPaginationOptions(markdownContent, paginationOptions); const duration = Date.now() - startTime; logMessage(server, "info", `Successfully fetched and converted URL: ${url} (${result.length} chars in ${duration}ms)`); return result; } catch (error: any) { if (error.name === "AbortError") { logMessage(server, "error", `Timeout fetching URL: ${url} (${timeoutMs}ms)`); throw createTimeoutError(timeoutMs, url); } // Re-throw our enhanced errors if (error.name === 'MCPSearXNGError') { logMessage(server, "error", `Error fetching URL: ${url} - ${error.message}`); throw error; } // Catch any unexpected errors logMessage(server, "error", `Unexpected error fetching URL: ${url}`, error); const context: ErrorContext = { url }; throw createUnexpectedError(error, context); } finally { // Clean up the timeout to prevent memory leaks clearTimeout(timeoutId); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ihor-sokoliuk/mcp-searxng'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

url-reader.ts•7.96 KiB