WOL MCP Server

wolService.ts•9.86 KiB

import * as cheerio from "cheerio"; import { type SearchOptions, type Document, type Publication, type PublicationType, type WOLError, PUBLICATION_NAMES, type SearchResponse, } from "./types"; import { URLBuilder, SearchOperatorParser, ContentParser, createWOLError, } from "./utils"; export class WOLService { private static readonly MAX_RETRIES = 3; private static readonly TIMEOUT = 30000; // 30 seconds static async search( query: string, options: SearchOptions = {}, ): Promise<SearchResponse> { try { if ( !SearchOperatorParser.validateOperators(query) && options.useOperators === true ) { throw createWOLError( "INVALID_QUERY", "Invalid characters or operators in query", { query }, ); } const searchUrl = URLBuilder.buildSearchURL(query, options); console.log("searchUrl", searchUrl, query); const response = await WOLService.fetchWithRetry(searchUrl); const html = await response.text(); if (response.status === 404) { return { results: [], pagination: { totalResults: 0, pageSize: 40, totalPages: 1, currentPage: options.page ?? 1, }, }; } if (response.status === 502 || response.status === 503) { throw createWOLError( "SERVICE_UNAVAILABLE", "WOL service temporarily unavailable", { status: response.status }, ); } const parsed = ContentParser.parseSearchResults(html); // Separate key publications and document results const keyPublications = parsed.results.filter( (r) => r.resultType === "key_publication", ); const documentResults = parsed.results.filter( (r) => r.resultType === "document_result", ); // Apply limit only to document results const limitedDocuments = options.limit ? documentResults.slice(0, options.limit) : documentResults; return { results: [...keyPublications, ...limitedDocuments], pagination: parsed.pagination, }; } catch (error) { if (error instanceof Error && (error as WOLError).code) { throw error; } throw createWOLError("NETWORK_ERROR", `Search failed: ${error}`, { query, options, }); } } static async getDocumentByUrl( url: string, format: string = "markdown", ): Promise<Document> { try { const documentUrl = URLBuilder.validateAndNormalizeDocumentURL(url); const response = await WOLService.fetchWithRetry(documentUrl); if (response.status === 404) { throw createWOLError("NOT_FOUND", `Document not found`, { url: documentUrl, }); } if (response.status === 502 || response.status === 503) { throw createWOLError( "SERVICE_UNAVAILABLE", "WOL service temporarily unavailable", { status: response.status }, ); } const html = await response.text(); const document = ContentParser.parseDocument(html, documentUrl); document.url = documentUrl; // Normalize non-breaking spaces in the raw HTML/content before formatting // Handles both HTML entity and Unicode NBSP document.content = document.content .replace(/ /gi, " ") .replace(/\u00A0/g, " "); // Format content based on requested format if (format === "markdown") { document.content = WOLService.convertToMarkdown(document.content); } else if (format === "plain") { document.content = WOLService.convertToPlainText(document.content); } return document; } catch (error) { if (error instanceof Error && (error as WOLError).code) { throw error; } throw createWOLError( "NETWORK_ERROR", `Document retrieval failed: ${error}`, { url }, ); } } static async browsePublications( type?: PublicationType, language: string = "en", year?: number, ): Promise<Publication[]> { try { const publications: Publication[] = []; if (type) { publications.push({ code: type, name: PUBLICATION_NAMES[type], description: `Browse ${PUBLICATION_NAMES[type]} publications`, language, years: year ? [year] : undefined, }); } else { // Return all available publication types Object.entries(PUBLICATION_NAMES).forEach(([code, name]) => { publications.push({ code: code as PublicationType, name, description: `Browse ${name} publications`, language, }); }); } return publications; } catch (error) { throw createWOLError( "NETWORK_ERROR", `Publication browsing failed: ${error}`, { type, language, year }, ); } } private static async fetchWithRetry( url: string, retries: number = WOLService.MAX_RETRIES, ): Promise<Response> { for (let i = 0; i < retries; i++) { try { const controller = new AbortController(); const timeoutId = setTimeout( () => controller.abort(), WOLService.TIMEOUT, ); const response = await fetch(url, { signal: controller.signal, headers: { "User-Agent": "Mozilla/5.0 (compatible; WOL-MCP-Server/1.0)", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", // Prefer language from the URL path (e.g., /pt/), fallback to English "Accept-Language": (() => { try { const m = url.match(/https?:\/\/[^/]+\/([a-z-]+)\//i); return m ? `${m[1]},${m[1]}-US;q=0.9,en-US,en;q=0.5` : "en-US,en;q=0.5"; } catch { return "en-US,en;q=0.5"; } })(), "Accept-Encoding": "gzip, deflate", Connection: "keep-alive", "Upgrade-Insecure-Requests": "1", }, }); clearTimeout(timeoutId); return response; } catch (error) { console.warn(`Fetch attempt ${i + 1} failed:`, error); if (i === retries - 1) { throw error; } // Exponential backoff await new Promise((resolve) => setTimeout(resolve, 2 ** i * 1000)); } } throw new Error("Max retries exceeded"); } private static convertToMarkdown(html: string): string { const $ = cheerio.load(html, { xml: false }); const skipTags = new Set([ "script", "style", "noscript", "input", "button", "textarea", "select", "fieldset", "svg", ]); const convertNode = (node: any): string => { if (node.type === "text") { return (node.data || "").replace(/\s+/g, " "); } if (node.type !== "tag" && node.type !== "root") return ""; const tag = (node.name || "").toLowerCase(); if (skipTags.has(tag)) return ""; const children = (node.children || []) .map((c: any) => convertNode(c)) .join(""); switch (tag) { case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": { const level = parseInt(tag[1]); const hashes = "#".repeat(level); return `\n\n${hashes} ${children.trim()}\n\n`; } case "p": return `${children.trim()}\n\n`; case "strong": case "b": { const t = children.trim(); return t ? `**${t}**` : ""; } case "em": case "i": { const t = children.trim(); return t ? `*${t}*` : ""; } case "a": { const href = node.attribs?.href || ""; const text = children.trim(); return href && text ? `[${text}](${href})` : text; } case "img": { const src = node.attribs?.src || ""; const alt = node.attribs?.alt || ""; return src ? `![${alt}](${src})` : ""; } case "br": return "\n"; case "hr": return "\n\n---\n\n"; case "ul": { const items = (node.children || []) .filter((c: any) => c.type === "tag" && c.name === "li") .map((li: any) => `- ${convertNode(li).trim()}`) .join("\n"); return `\n${items}\n\n`; } case "ol": { const start = parseInt(node.attribs?.start || "1") || 1; const items = (node.children || []) .filter((c: any) => c.type === "tag" && c.name === "li") .map( (li: any, i: number) => `${start + i}. ${convertNode(li).trim()}`, ) .join("\n"); return `\n${items}\n\n`; } case "li": return children; case "blockquote": { const lines = children .trim() .split("\n") .map((l: string) => `> ${l}`) .join("\n"); return `\n${lines}\n\n`; } case "figure": return `\n${children.trim()}\n\n`; case "figcaption": return `*${children.trim()}*\n`; case "table": { const rows: string[][] = []; $(node) .find("tr") .each((_: number, tr: any) => { const cells: string[] = []; $(tr) .find("td, th") .each((__: number, cell: any) => { cells.push(convertNode(cell).trim()); }); rows.push(cells); }); if (rows.length === 0) return children; const colCount = Math.max(...rows.map((r) => r.length)); const lines: string[] = []; for (let i = 0; i < rows.length; i++) { const padded = rows[i].concat( Array(colCount - rows[i].length).fill(""), ); lines.push(`| ${padded.join(" | ")} |`); if (i === 0) { lines.push(`| ${Array(colCount).fill("---").join(" | ")} |`); } } return `\n${lines.join("\n")}\n\n`; } case "div": case "section": case "article": case "header": case "footer": case "main": case "aside": case "nav": return `\n${children}\n`; default: return children; } }; const root = $.root()[0]; let result = convertNode(root); // Collapse excessive newlines and trim result = result.replace(/\n{3,}/g, "\n\n").trim(); // Fallback: if conversion produced empty/very short output, extract text if (result.length < 20) { const $article = $("article#article"); const $fallback = $article.length > 0 ? $article : $("body"); const fallbackText = $fallback.text(); result = fallbackText .replace(/[\t ]*\n[\t ]*(\n[\t ]*)*/g, "\n\n") .trim(); } return result; } private static convertToPlainText(html: string): string { const $ = cheerio.load(html); $("script, style, noscript").remove(); const text = $.root().text(); return text.replace(/\s+/g, " ").trim(); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/LeomaiaJr/wol-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

wolService.ts•9.86 KiB