Skip to main content
Glama

MTS MCP Server

by CalvinMagezi
documentation-scraper.ts2.72 kB
import puppeteer from "puppeteer"; import * as cheerio from "cheerio"; import { NodeHtmlMarkdown } from "node-html-markdown"; import URLParse from "url-parse"; export class DocumentationScraper { private baseUrl: string; private visited: Set<string> = new Set(); private queue: string[] = []; private domain: string; constructor(baseUrl: string) { this.baseUrl = baseUrl; const parsedUrl = new URLParse(baseUrl); this.domain = parsedUrl.hostname; } async scrapeDocumentation(): Promise<{ url: string; content: string }[]> { const browser = await puppeteer.launch({ headless: "new" }); const results: { url: string; content: string }[] = []; try { this.queue.push(this.baseUrl); while (this.queue.length > 0) { const url = this.queue.shift()!; if (this.visited.has(url)) continue; const page = await browser.newPage(); try { await page.goto(url, { waitUntil: "networkidle0" }); const html = await page.content(); const $ = cheerio.load(html); // Extract main content const content = this.extractContent($); if (content) { results.push({ url, content }); } // Find and queue new links const links = this.extractLinks($); this.queueNewLinks(links); this.visited.add(url); } catch (error) { console.error(`Error processing ${url}:`, error); } finally { await page.close(); } } } finally { await browser.close(); } return results; } private extractContent($: cheerio.CheerioAPI): string { // Remove unwanted elements $("script, style, nav, footer, header").remove(); // Find main content area const mainContent = $("main").html() || $("article").html() || $(".content").html() || $(".documentation").html() || $("body").html(); if (!mainContent) return ""; // Convert HTML to Markdown return NodeHtmlMarkdown.translate(mainContent); } private extractLinks($: cheerio.CheerioAPI): string[] { const links: string[] = []; $("a").each((_, element) => { const href = $(element).attr("href"); if (href) { try { const url = new URL(href, this.baseUrl); if (url.hostname === this.domain) { links.push(url.href); } } catch (error) { // Invalid URL, skip } } }); return links; } private queueNewLinks(links: string[]) { for (const link of links) { if (!this.visited.has(link) && !this.queue.includes(link)) { this.queue.push(link); } } } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/CalvinMagezi/mts-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server