Gemini Docs MCP Server

parser.ts•6.05 KiB

/** * HTML parsing utilities for Gemini API documentation. */ import * as cheerio from "cheerio"; import { request } from "undici"; import { USER_AGENT } from "../constants.js"; export interface DocContent { url: string; title: string; description: string; sections: Array<{ level: number; title: string; content: string; }>; codeExamples: Array<{ language: string; code: string; }>; fullText: string; } /** * Fetches HTML content from a URL. */ export async function fetchHtml(url: string): Promise<string> { const { statusCode, body } = await request(url, { method: "GET", headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", }, }); if (statusCode !== 200) { const text = await body.text(); throw new Error(`Failed to fetch ${url}: ${statusCode} - ${text.substring(0, 200)}`); } return body.text(); } /** * Parses a Gemini API documentation page and extracts structured content. */ export function parseDocPage(html: string, url: string): DocContent { const $ = cheerio.load(html); // Extract title const title = $("h1").first().text().trim() || "Untitled"; // Extract description (first paragraph after h1) const description = $("article p").first().text().trim() || $(".devsite-article-body p").first().text().trim() || ""; // Extract sections with headings const sections: DocContent["sections"] = []; const article = $("article, .devsite-article-body").first(); if (article.length) { article.find("h1, h2, h3, h4").each((_, heading) => { const $heading = $(heading); const level = parseInt(heading.tagName.replace("h", ""), 10); const sectionTitle = $heading.text().trim(); // Get content until next heading let content = ""; let nextEl = $heading.next(); while (nextEl.length && !nextEl.is("h1, h2, h3, h4")) { if (!nextEl.is("pre, code, .devsite-code-block")) { content += nextEl.text().trim() + "\n"; } nextEl = nextEl.next(); } if (sectionTitle) { sections.push({ level, title: sectionTitle, content: content.trim(), }); } }); } // Extract code examples const codeExamples: DocContent["codeExamples"] = []; $("pre code, .devsite-code-block code").each((_, codeEl) => { const $code = $(codeEl); const code = $code.text().trim(); // Try to detect language from class const classes = $code.attr("class") || ""; const langMatch = classes.match(/language-(\w+)|(\w+)-code/); const language = langMatch ? (langMatch[1] || langMatch[2]) : "text"; if (code) { codeExamples.push({ language, code }); } }); // Extract full text content (without code blocks for cleaner text) const articleClone = article.clone(); articleClone.find("pre, code, script, style, nav").remove(); const fullText = articleClone.text() .replace(/\s+/g, " ") .trim(); return { url, title, description, sections, codeExamples: codeExamples.slice(0, 10), // Limit code examples fullText: fullText.substring(0, 30000), // Limit full text }; } /** * Formats DocContent as Markdown. */ export function formatAsMarkdown(doc: DocContent): string { const lines: string[] = []; lines.push(`# ${doc.title}`); lines.push(""); lines.push(`**URL**: ${doc.url}`); lines.push(""); if (doc.description) { lines.push(doc.description); lines.push(""); } lines.push("## Table of Contents"); lines.push(""); for (const section of doc.sections) { const indent = " ".repeat(section.level - 1); lines.push(`${indent}- ${section.title}`); } lines.push(""); lines.push("## Content"); lines.push(""); for (const section of doc.sections) { const headingPrefix = "#".repeat(Math.min(section.level + 1, 6)); lines.push(`${headingPrefix} ${section.title}`); lines.push(""); if (section.content) { lines.push(section.content); lines.push(""); } } if (doc.codeExamples.length > 0) { lines.push("## Code Examples"); lines.push(""); for (const example of doc.codeExamples.slice(0, 5)) { lines.push(`\`\`\`${example.language}`); lines.push(example.code.substring(0, 2000)); lines.push("```"); lines.push(""); } } return lines.join("\n"); } /** * Sanitizes a string by removing or replacing control characters. */ function sanitizeString(str: string): string { // Remove or replace control characters (except common whitespace) return str .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "") // Remove control chars .replace(/\t/g, " ") // Replace tabs with spaces .replace(/\r\n/g, "\n") // Normalize line endings .replace(/\r/g, "\n"); } /** * Recursively sanitizes all strings in an object. */ function sanitizeObject<T>(obj: T): T { if (typeof obj === "string") { return sanitizeString(obj) as T; } if (Array.isArray(obj)) { return obj.map(sanitizeObject) as T; } if (obj && typeof obj === "object") { const result: Record<string, unknown> = {}; for (const [key, value] of Object.entries(obj)) { result[key] = sanitizeObject(value); } return result as T; } return obj; } /** * Truncates DocContent to fit within size limits. */ function truncateDocContent(doc: DocContent, maxFullTextLength: number = 20000): DocContent { return { ...doc, fullText: doc.fullText.substring(0, maxFullTextLength), sections: doc.sections.map(s => ({ ...s, content: s.content.substring(0, 2000), // Limit section content })), codeExamples: doc.codeExamples.slice(0, 5).map(e => ({ ...e, code: e.code.substring(0, 1500), // Limit code examples })), }; } /** * Formats DocContent as JSON. */ export function formatAsJson(doc: DocContent): string { const truncated = truncateDocContent(doc); const sanitized = sanitizeObject(truncated); return JSON.stringify(sanitized, null, 2); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/LeomaiaJr/gemini-docs-skills'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

parser.ts•6.05 KiB