Vega-Lite MCP Server

scraper.ts•7.42 kB

#!/usr/bin/env node /** * Vega-Lite Documentation Scraper * * This utility scrapes documentation from the official Vega-Lite website * and saves it in a structured format for the MCP server to use. */ import { promises as fs } from "fs"; import path from "path"; import { fileURLToPath } from "url"; import fetch from "node-fetch"; import * as cheerio from "cheerio"; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); interface DocSection { title: string; url: string; content: string; category: string; source?: string; } interface VegaLiteExample { name: string; description: string; category: string; spec: Record<string, unknown>; url: string; } const VEGA_LITE_BASE_URL = "https://vega.github.io/vega-lite"; const VEGA_LITE_DOCS_URL = `${VEGA_LITE_BASE_URL}/docs/`; const DENEB_BASE_URL = "https://deneb.guide"; const DENEB_DOCS_URL = `${DENEB_BASE_URL}/docs/`; // Key Vega-Lite documentation pages to scrape const VEGA_LITE_DOC_PAGES = [ { path: "", category: "introduction" }, { path: "mark.html", category: "marks" }, { path: "encoding.html", category: "encoding" }, { path: "type.html", category: "data-types" }, { path: "scale.html", category: "scales" }, { path: "axis.html", category: "axes" }, { path: "legend.html", category: "legends" }, { path: "transform.html", category: "transforms" }, { path: "aggregate.html", category: "transforms" }, { path: "bin.html", category: "transforms" }, { path: "timeunit.html", category: "transforms" }, { path: "filter.html", category: "transforms" }, { path: "selection.html", category: "interaction" }, { path: "condition.html", category: "interaction" }, { path: "layer.html", category: "composition" }, { path: "concat.html", category: "composition" }, { path: "facet.html", category: "composition" }, { path: "repeat.html", category: "composition" }, ]; // Key Deneb documentation pages to scrape const DENEB_DOC_PAGES = [ // Introduction and Getting Started { path: "", category: "introduction" }, { path: "getting-started", category: "getting-started" }, { path: "simple-example", category: "getting-started" }, { path: "visual-editor", category: "getting-started" }, { path: "dataset", category: "getting-started" }, { path: "keyboard", category: "getting-started" }, // Deeper Concepts { path: "formatting", category: "concepts" }, { path: "scrolling-overflow", category: "concepts" }, { path: "schemes", category: "concepts" }, { path: "pattern-fills", category: "concepts" }, { path: "templates", category: "concepts" }, { path: "performance", category: "concepts" }, // Interactivity Features { path: "interactivity-overview", category: "interactivity" }, { path: "interactivity-tooltips", category: "interactivity" }, { path: "interactivity-context-menu", category: "interactivity" }, { path: "interactivity-selection", category: "interactivity" }, { path: "interactivity-selection-advanced", category: "interactivity" }, { path: "interactivity-highlight", category: "interactivity" }, // Other { path: "changelog", category: "other" }, ]; // Example categories to scrape const EXAMPLE_CATEGORIES = [ "bar", "line", "area", "scatter", "histogram", "circle", "tick", "rect", "interactive", "composite", ]; /** * Scrape a documentation page (generic) */ async function scrapeDocPage( baseUrl: string, pagePath: string, category: string, source: string ): Promise<DocSection | null> { const url = `${baseUrl}${pagePath}`; try { console.log(`Scraping ${source}: ${url}...`); const response = await fetch(url); const html = await response.text(); const $ = cheerio.load(html); // Extract main content const title = $("h1").first().text().trim() || $("title").text().trim(); // Try to get the main content area let content = ""; const mainContent = $("main, article, .content, #content").first(); if (mainContent.length) { // Remove script and style tags mainContent.find("script, style").remove(); content = mainContent.text().trim(); } else { // Fallback: get body text content = $("body").text().trim(); } // Clean up the content (remove excessive whitespace) content = content.replace(/\s+/g, " ").substring(0, 5000); // Limit to 5000 chars return { title, url, content, category, source, }; } catch (error) { console.error(`Error scraping ${url}:`, error); return null; } } /** * Scrape examples (placeholder - would need actual implementation) */ async function scrapeExamples(): Promise<VegaLiteExample[]> { // This is a placeholder. In a real implementation, you would: // 1. Scrape the examples gallery page // 2. Extract example specifications // 3. Categorize them console.log("Example scraping not fully implemented - using fallback data"); return []; } /** * Main scraper function */ async function scrapeAll() { console.log("Starting Vega-Lite and Deneb documentation scraper...\n"); // Create data directory if it doesn't exist const dataDir = path.join(__dirname, "..", "data"); await fs.mkdir(dataDir, { recursive: true }); // Scrape Vega-Lite documentation pages console.log("=== Scraping Vega-Lite Documentation ===\n"); const vegaLiteDocs: DocSection[] = []; for (const page of VEGA_LITE_DOC_PAGES) { const doc = await scrapeDocPage(VEGA_LITE_DOCS_URL, page.path, page.category, "vega-lite"); if (doc) { vegaLiteDocs.push(doc); } // Be nice to the server await new Promise((resolve) => setTimeout(resolve, 1000)); } // Save Vega-Lite documentation const vegaLiteDocsPath = path.join(dataDir, "documentation.json"); await fs.writeFile(vegaLiteDocsPath, JSON.stringify(vegaLiteDocs, null, 2)); console.log(`\n✅ Saved ${vegaLiteDocs.length} Vega-Lite documentation sections to ${vegaLiteDocsPath}\n`); // Scrape Deneb documentation pages console.log("=== Scraping Deneb Documentation ===\n"); const denebDocs: DocSection[] = []; for (const page of DENEB_DOC_PAGES) { const doc = await scrapeDocPage(DENEB_DOCS_URL, page.path, page.category, "deneb"); if (doc) { denebDocs.push(doc); } // Be nice to the server await new Promise((resolve) => setTimeout(resolve, 1000)); } // Save Deneb documentation const denebDocsPath = path.join(dataDir, "deneb-documentation.json"); await fs.writeFile(denebDocsPath, JSON.stringify(denebDocs, null, 2)); console.log(`\n✅ Saved ${denebDocs.length} Deneb documentation sections to ${denebDocsPath}\n`); // Scrape examples const examples = await scrapeExamples(); const examplesPath = path.join(dataDir, "examples.json"); await fs.writeFile(examplesPath, JSON.stringify(examples, null, 2)); console.log(`✅ Saved ${examples.length} examples to ${examplesPath}\n`); console.log("=== Scraping Complete! ==="); console.log(`Total Vega-Lite docs: ${vegaLiteDocs.length}`); console.log(`Total Deneb docs: ${denebDocs.length}`); console.log(`Total examples: ${examples.length}`); } // Run scraper if called directly if (import.meta.url === `file://${process.argv[1]}`) { scrapeAll().catch((error) => { console.error("Fatal error:", error); process.exit(1); }); } export { scrapeAll, scrapeDocPage, scrapeExamples };

Latest Blog Posts

Model Context Protocol Proxies: Enabling Enterprise Control with Virtual MCPs
By Om-Shree-0709 on December 9, 2025.
AI Security
Virtual MCP
Kubernetes Operator
The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/inteligencianegociosmmx/vegaLite_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server