Tailwind Svelte Assistant

scrape-docs.mjs•2.88 KiB

import axios from 'axios'; import * as cheerio from 'cheerio'; import TurndownService from 'turndown'; import { promises as fs } from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; // Initialize Turndown service const turndownService = new TurndownService({ headingStyle: 'atx', // Use '#' for headings codeBlockStyle: 'fenced', // Use '```' for code blocks }); // Define __dirname for ES modules const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Base path for all documentation output const DOCS_BASE_OUTPUT_PATH = path.join(__dirname, '..', 'content', 'docs'); /** * Scrapes a webpage, extracts main content, converts to Markdown, and saves it. * @param {string} url - The URL to scrape. * @param {string} outputPath - The directory to save the Markdown file. * @param {string} contentSelector - Cheerio selector for the main content area. * @param {string} filename - The name of the output Markdown file (without .md extension). * @param {string} siteName - A friendly name for the site being scraped (for logging). */ async function scrapeAndSave(url, outputPath, contentSelector, filename, siteName) { console.log(`Scraping ${siteName} page: ${filename} from ${url}`); try { const { data: html } = await axios.get(url); const $ = cheerio.load(html); const mainContentHtml = $(contentSelector).html(); if (!mainContentHtml) { console.error(`Could not find content with selector "${contentSelector}" on ${url}`); return; } const markdown = turndownService.turndown(mainContentHtml); const filePath = path.join(outputPath, `${filename}.md`); await fs.mkdir(outputPath, { recursive: true }); // Ensure directory exists await fs.writeFile(filePath, markdown, 'utf-8'); console.log(`Successfully saved ${siteName} doc: ${filename}.md to ${filePath}`); } catch (error) { console.error(`Error scraping ${siteName} page ${filename} from ${url}:`, error.message || error); if (error.response) { console.error(`Status: ${error.response.status}`); } } } async function main() { console.log("Starting documentation scraping process..."); const configFile = await fs.readFile(path.join(__dirname, 'scraping-config.json'), 'utf-8'); const config = JSON.parse(configFile); for (const site of config.sites) { console.log(`\nProcessing site: ${site.siteName}`); const siteOutputPath = path.join(DOCS_BASE_OUTPUT_PATH, site.outputDir); for (const pageSlug of site.pages) { const pageUrl = site.baseURL + pageSlug; await scrapeAndSave( pageUrl, siteOutputPath, site.contentSelector, pageSlug, site.siteName ); } } console.log("\nDocumentation scraping process finished."); } main().catch(error => { console.error("An error occurred during the scraping script execution:", error); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/CaullenOmdahl/Tailwind-Svelte-Assistant'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

scrape-docs.mjs•2.88 KiB