Skip to main content
Glama

documcp

by tosin2013
check-documentation-links.ts19.3 kB
import { z } from "zod"; import { readFile, readdir, stat } from "fs/promises"; import { join, extname, resolve, relative, dirname } from "path"; import { MCPToolResponse } from "../types/api.js"; // Input validation schema const LinkCheckInputSchema = z.object({ documentation_path: z.string().default("./docs"), check_external_links: z.boolean().default(true), check_internal_links: z.boolean().default(true), check_anchor_links: z.boolean().default(true), timeout_ms: z.number().min(1000).max(30000).default(5000), max_concurrent_checks: z.number().min(1).max(20).default(5), allowed_domains: z.array(z.string()).default([]), ignore_patterns: z.array(z.string()).default([]), fail_on_broken_links: z.boolean().default(false), output_format: z.enum(["summary", "detailed", "json"]).default("detailed"), }); type LinkCheckInput = z.infer<typeof LinkCheckInputSchema>; interface LinkCheckResult { url: string; status: "valid" | "broken" | "warning" | "skipped"; statusCode?: number; error?: string; responseTime?: number; sourceFile: string; lineNumber?: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; } interface LinkCheckReport { summary: { totalLinks: number; validLinks: number; brokenLinks: number; warningLinks: number; skippedLinks: number; executionTime: number; filesScanned: number; }; results: LinkCheckResult[]; recommendations: string[]; configuration: { checkExternalLinks: boolean; checkInternalLinks: boolean; checkAnchorLinks: boolean; timeoutMs: number; maxConcurrentChecks: number; }; } export async function checkDocumentationLinks( input: Partial<LinkCheckInput>, ): Promise<MCPToolResponse<LinkCheckReport>> { const startTime = Date.now(); try { // Validate input with defaults const validatedInput = LinkCheckInputSchema.parse(input); const { documentation_path, check_external_links, check_internal_links, check_anchor_links, timeout_ms, max_concurrent_checks, allowed_domains, ignore_patterns, fail_on_broken_links, } = validatedInput; // Scan documentation files const documentationFiles = await scanDocumentationFiles(documentation_path); if (documentationFiles.length === 0) { return { success: false, error: { code: "NO_DOCUMENTATION_FILES", message: "No documentation files found in the specified path", details: `Searched in: ${documentation_path}`, resolution: "Verify the documentation_path parameter points to a directory containing markdown files", }, metadata: { toolVersion: "1.0.0", executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } // Extract all links from documentation files const allLinks = await extractLinksFromFiles( documentationFiles, documentation_path, ); // Filter links based on configuration const filteredLinks = filterLinks(allLinks, { checkExternalLinks: check_external_links, checkInternalLinks: check_internal_links, checkAnchorLinks: check_anchor_links, ignorePatterns: ignore_patterns, }); // Check links with concurrency control const linkResults = await checkLinksWithConcurrency(filteredLinks, { timeoutMs: timeout_ms, maxConcurrent: max_concurrent_checks, allowedDomains: allowed_domains, documentationPath: documentation_path, }); // Generate report const report = generateLinkCheckReport(linkResults, { checkExternalLinks: check_external_links, checkInternalLinks: check_internal_links, checkAnchorLinks: check_anchor_links, timeoutMs: timeout_ms, maxConcurrentChecks: max_concurrent_checks, filesScanned: documentationFiles.length, executionTime: Date.now() - startTime, }); // Check if we should fail on broken links if (fail_on_broken_links && report.summary.brokenLinks > 0) { return { success: false, error: { code: "BROKEN_LINKS_FOUND", message: `Found ${report.summary.brokenLinks} broken links`, details: `${report.summary.brokenLinks} out of ${report.summary.totalLinks} links are broken`, resolution: "Fix the broken links or set fail_on_broken_links to false", }, data: report, metadata: { toolVersion: "1.0.0", executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } return { success: true, data: report, metadata: { toolVersion: "1.0.0", executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } catch (error) { return { success: false, error: { code: "LINK_CHECK_ERROR", message: "Failed to check documentation links", details: error instanceof Error ? error.message : "Unknown error occurred", resolution: "Check the documentation path and ensure files are accessible", }, metadata: { toolVersion: "1.0.0", executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } } async function scanDocumentationFiles(basePath: string): Promise<string[]> { const files: string[] = []; async function scanDirectory(dirPath: string): Promise<void> { try { const entries = await readdir(dirPath); for (const entry of entries) { const fullPath = join(dirPath, entry); const stats = await stat(fullPath); if (stats.isDirectory()) { // Skip node_modules and hidden directories if (!entry.startsWith(".") && entry !== "node_modules") { await scanDirectory(fullPath); } } else if (stats.isFile()) { const ext = extname(entry).toLowerCase(); if ([".md", ".mdx", ".markdown"].includes(ext)) { files.push(fullPath); } } } } catch (error) { // Skip directories we can't read } } await scanDirectory(basePath); return files; } async function extractLinksFromFiles( files: string[], basePath: string, ): Promise< Array<{ url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }> > { const allLinks: Array<{ url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }> = []; // Regex patterns for different link types const markdownLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g; const htmlLinkRegex = /<a[^>]+href=["']([^"']+)["'][^>]*>/gi; const refLinkRegex = /\[([^\]]+)\]:\s*(.+)/g; for (const file of files) { try { const content = await readFile(file, "utf-8"); const lines = content.split("\n"); // Create proper relative file path const absoluteBasePath = resolve(basePath); const absoluteFilePath = resolve(file); const relativeFile = relative(absoluteBasePath, absoluteFilePath).replace( /\\/g, "/", ); // Extract markdown links lines.forEach((line, index) => { let match; // Markdown links [text](url) while ((match = markdownLinkRegex.exec(line)) !== null) { const url = match[2].trim(); if (url && !url.startsWith("#")) { // Skip empty and anchor-only links allLinks.push({ url, sourceFile: relativeFile, lineNumber: index + 1, linkType: determineLinkType(url), }); } } // HTML links while ((match = htmlLinkRegex.exec(line)) !== null) { const url = match[1].trim(); if (url && !url.startsWith("#")) { allLinks.push({ url, sourceFile: relativeFile, lineNumber: index + 1, linkType: determineLinkType(url), }); } } // Reference links while ((match = refLinkRegex.exec(line)) !== null) { const url = match[2].trim(); if (url && !url.startsWith("#")) { allLinks.push({ url, sourceFile: relativeFile, lineNumber: index + 1, linkType: determineLinkType(url), }); } } }); } catch (error) { // Skip files we can't read } } return allLinks; } function determineLinkType( url: string, ): "internal" | "external" | "anchor" | "mailto" | "tel" { if (url.startsWith("mailto:")) return "mailto"; if (url.startsWith("tel:")) return "tel"; if (url.startsWith("#")) return "anchor"; if (url.startsWith("http://") || url.startsWith("https://")) return "external"; return "internal"; } function filterLinks( links: Array<{ url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }>, options: { checkExternalLinks: boolean; checkInternalLinks: boolean; checkAnchorLinks: boolean; ignorePatterns: string[]; }, ) { return links.filter((link) => { // Check if link should be ignored based on patterns if (options.ignorePatterns.some((pattern) => link.url.includes(pattern))) { return false; } // Filter by link type switch (link.linkType) { case "external": return options.checkExternalLinks; case "internal": return options.checkInternalLinks; case "anchor": return options.checkAnchorLinks; case "mailto": case "tel": return false; // Skip these for now default: return true; } }); } async function checkLinksWithConcurrency( links: Array<{ url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }>, options: { timeoutMs: number; maxConcurrent: number; allowedDomains: string[]; documentationPath: string; }, ): Promise<LinkCheckResult[]> { const results: LinkCheckResult[] = []; async function checkSingleLink(link: { url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }): Promise<LinkCheckResult> { const startTime = Date.now(); try { if (link.linkType === "internal") { return await checkInternalLink(link, options.documentationPath); } else if (link.linkType === "external") { return await checkExternalLink( link, options.timeoutMs, options.allowedDomains, ); } else if (link.linkType === "anchor") { return await checkAnchorLink(link, options.documentationPath); } return { url: link.url, status: "skipped", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } catch (error) { return { url: link.url, status: "broken", error: error instanceof Error ? error.message : "Unknown error", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } // Process links with concurrency control const chunks = []; for (let i = 0; i < links.length; i += options.maxConcurrent) { chunks.push(links.slice(i, i + options.maxConcurrent)); } for (const chunk of chunks) { const chunkResults = await Promise.all(chunk.map(checkSingleLink)); results.push(...chunkResults); } return results; } async function checkInternalLink( link: { url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }, documentationPath: string, ): Promise<LinkCheckResult> { const startTime = Date.now(); try { let targetPath = link.url; // Remove anchor if present const [filePath] = targetPath.split("#"); // Handle relative paths properly using Node.js path resolution const absoluteDocPath = resolve(documentationPath); const sourceFileAbsolutePath = resolve(absoluteDocPath, link.sourceFile); const sourceDir = dirname(sourceFileAbsolutePath); if (filePath.startsWith("./")) { // Current directory reference - resolve relative to source file directory targetPath = resolve(sourceDir, filePath.substring(2)); } else if (filePath.startsWith("../")) { // Parent directory reference - resolve relative to source file directory targetPath = resolve(sourceDir, filePath); } else if (filePath.startsWith("/")) { // Absolute path from documentation root targetPath = resolve(absoluteDocPath, filePath.substring(1)); } else { // Relative path - resolve relative to source file directory targetPath = resolve(sourceDir, filePath); } try { await stat(targetPath); return { url: link.url, status: "valid", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } catch { return { url: link.url, status: "broken", error: "File not found", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } catch (error) { return { url: link.url, status: "broken", error: error instanceof Error ? error.message : "Unknown error", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } async function checkExternalLink( link: { url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }, timeoutMs: number, allowedDomains: string[], ): Promise<LinkCheckResult> { const startTime = Date.now(); try { // Check if domain is in allowed list (if specified) if (allowedDomains.length > 0) { const url = new URL(link.url); const isAllowed = allowedDomains.some( (domain) => url.hostname === domain || url.hostname.endsWith("." + domain), ); if (!isAllowed) { return { url: link.url, status: "skipped", error: "Domain not in allowed list", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } // Simple HEAD request to check if URL is accessible const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeoutMs); try { const response = await fetch(link.url, { method: "HEAD", signal: controller.signal, headers: { "User-Agent": "DocuMCP Link Checker 1.0", }, }); clearTimeout(timeoutId); if (response.ok) { return { url: link.url, status: "valid", statusCode: response.status, sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } else { return { url: link.url, status: "broken", statusCode: response.status, error: `HTTP ${response.status}: ${response.statusText}`, sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } catch (fetchError) { clearTimeout(timeoutId); if (fetchError instanceof Error && fetchError.name === "AbortError") { return { url: link.url, status: "warning", error: "Request timeout", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } throw fetchError; } } catch (error) { return { url: link.url, status: "broken", error: error instanceof Error ? error.message : "Unknown error", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } async function checkAnchorLink( link: { url: string; sourceFile: string; lineNumber: number; linkType: "internal" | "external" | "anchor" | "mailto" | "tel"; }, _documentationPath: string, ): Promise<LinkCheckResult> { const startTime = Date.now(); // For now, just mark anchor links as valid // In a more sophisticated implementation, we would parse the target file // and check if the anchor exists return { url: link.url, status: "valid", sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } function generateLinkCheckReport( results: LinkCheckResult[], config: { checkExternalLinks: boolean; checkInternalLinks: boolean; checkAnchorLinks: boolean; timeoutMs: number; maxConcurrentChecks: number; filesScanned: number; executionTime: number; }, ): LinkCheckReport { const summary = { totalLinks: results.length, validLinks: results.filter((r) => r.status === "valid").length, brokenLinks: results.filter((r) => r.status === "broken").length, warningLinks: results.filter((r) => r.status === "warning").length, skippedLinks: results.filter((r) => r.status === "skipped").length, executionTime: config.executionTime, filesScanned: config.filesScanned, }; const recommendations: string[] = []; if (summary.brokenLinks > 0) { recommendations.push( `🔴 Fix ${summary.brokenLinks} broken links to improve documentation quality`, ); } if (summary.warningLinks > 0) { recommendations.push( `🟡 Review ${summary.warningLinks} warning links that may need attention`, ); } if (summary.validLinks === summary.totalLinks) { recommendations.push( "✅ All links are valid - excellent documentation quality!", ); } if (summary.totalLinks > 100) { recommendations.push( "📊 Consider implementing automated link checking in CI/CD pipeline", ); } return { summary, results, recommendations, configuration: { checkExternalLinks: config.checkExternalLinks, checkInternalLinks: config.checkInternalLinks, checkAnchorLinks: config.checkAnchorLinks, timeoutMs: config.timeoutMs, maxConcurrentChecks: config.maxConcurrentChecks, }, }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/documcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server