Skip to main content
Glama

documcp

by tosin2013
check-documentation-links.ts19.1 kB
import { z } from 'zod'; import { readFile, readdir, stat } from 'fs/promises'; import { join, extname, resolve, relative, dirname } from 'path'; import { MCPToolResponse } from '../types/api.js'; // Input validation schema const LinkCheckInputSchema = z.object({ documentation_path: z.string().default('./docs'), check_external_links: z.boolean().default(true), check_internal_links: z.boolean().default(true), check_anchor_links: z.boolean().default(true), timeout_ms: z.number().min(1000).max(30000).default(5000), max_concurrent_checks: z.number().min(1).max(20).default(5), allowed_domains: z.array(z.string()).default([]), ignore_patterns: z.array(z.string()).default([]), fail_on_broken_links: z.boolean().default(false), output_format: z.enum(['summary', 'detailed', 'json']).default('detailed'), }); type LinkCheckInput = z.infer<typeof LinkCheckInputSchema>; interface LinkCheckResult { url: string; status: 'valid' | 'broken' | 'warning' | 'skipped'; statusCode?: number; error?: string; responseTime?: number; sourceFile: string; lineNumber?: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; } interface LinkCheckReport { summary: { totalLinks: number; validLinks: number; brokenLinks: number; warningLinks: number; skippedLinks: number; executionTime: number; filesScanned: number; }; results: LinkCheckResult[]; recommendations: string[]; configuration: { checkExternalLinks: boolean; checkInternalLinks: boolean; checkAnchorLinks: boolean; timeoutMs: number; maxConcurrentChecks: number; }; } export async function checkDocumentationLinks( input: Partial<LinkCheckInput>, ): Promise<MCPToolResponse<LinkCheckReport>> { const startTime = Date.now(); try { // Validate input with defaults const validatedInput = LinkCheckInputSchema.parse(input); const { documentation_path, check_external_links, check_internal_links, check_anchor_links, timeout_ms, max_concurrent_checks, allowed_domains, ignore_patterns, fail_on_broken_links, } = validatedInput; // Scan documentation files const documentationFiles = await scanDocumentationFiles(documentation_path); if (documentationFiles.length === 0) { return { success: false, error: { code: 'NO_DOCUMENTATION_FILES', message: 'No documentation files found in the specified path', details: `Searched in: ${documentation_path}`, resolution: 'Verify the documentation_path parameter points to a directory containing markdown files', }, metadata: { toolVersion: '1.0.0', executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } // Extract all links from documentation files const allLinks = await extractLinksFromFiles(documentationFiles, documentation_path); // Filter links based on configuration const filteredLinks = filterLinks(allLinks, { checkExternalLinks: check_external_links, checkInternalLinks: check_internal_links, checkAnchorLinks: check_anchor_links, ignorePatterns: ignore_patterns, }); // Check links with concurrency control const linkResults = await checkLinksWithConcurrency(filteredLinks, { timeoutMs: timeout_ms, maxConcurrent: max_concurrent_checks, allowedDomains: allowed_domains, documentationPath: documentation_path, }); // Generate report const report = generateLinkCheckReport(linkResults, { checkExternalLinks: check_external_links, checkInternalLinks: check_internal_links, checkAnchorLinks: check_anchor_links, timeoutMs: timeout_ms, maxConcurrentChecks: max_concurrent_checks, filesScanned: documentationFiles.length, executionTime: Date.now() - startTime, }); // Check if we should fail on broken links if (fail_on_broken_links && report.summary.brokenLinks > 0) { return { success: false, error: { code: 'BROKEN_LINKS_FOUND', message: `Found ${report.summary.brokenLinks} broken links`, details: `${report.summary.brokenLinks} out of ${report.summary.totalLinks} links are broken`, resolution: 'Fix the broken links or set fail_on_broken_links to false', }, data: report, metadata: { toolVersion: '1.0.0', executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } return { success: true, data: report, metadata: { toolVersion: '1.0.0', executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } catch (error) { return { success: false, error: { code: 'LINK_CHECK_ERROR', message: 'Failed to check documentation links', details: error instanceof Error ? error.message : 'Unknown error occurred', resolution: 'Check the documentation path and ensure files are accessible', }, metadata: { toolVersion: '1.0.0', executionTime: Date.now() - startTime, timestamp: new Date().toISOString(), }, }; } } async function scanDocumentationFiles(basePath: string): Promise<string[]> { const files: string[] = []; async function scanDirectory(dirPath: string): Promise<void> { try { const entries = await readdir(dirPath); for (const entry of entries) { const fullPath = join(dirPath, entry); const stats = await stat(fullPath); if (stats.isDirectory()) { // Skip node_modules and hidden directories if (!entry.startsWith('.') && entry !== 'node_modules') { await scanDirectory(fullPath); } } else if (stats.isFile()) { const ext = extname(entry).toLowerCase(); if (['.md', '.mdx', '.markdown'].includes(ext)) { files.push(fullPath); } } } } catch (error) { // Skip directories we can't read } } await scanDirectory(basePath); return files; } async function extractLinksFromFiles( files: string[], basePath: string, ): Promise< Array<{ url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }> > { const allLinks: Array<{ url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }> = []; // Regex patterns for different link types const markdownLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g; const htmlLinkRegex = /<a[^>]+href=["']([^"']+)["'][^>]*>/gi; const refLinkRegex = /\[([^\]]+)\]:\s*(.+)/g; for (const file of files) { try { const content = await readFile(file, 'utf-8'); const lines = content.split('\n'); // Create proper relative file path const absoluteBasePath = resolve(basePath); const absoluteFilePath = resolve(file); const relativeFile = relative(absoluteBasePath, absoluteFilePath).replace(/\\/g, '/'); // Extract markdown links lines.forEach((line, index) => { let match; // Markdown links [text](url) while ((match = markdownLinkRegex.exec(line)) !== null) { const url = match[2].trim(); if (url && !url.startsWith('#')) { // Skip empty and anchor-only links allLinks.push({ url, sourceFile: relativeFile, lineNumber: index + 1, linkType: determineLinkType(url), }); } } // HTML links while ((match = htmlLinkRegex.exec(line)) !== null) { const url = match[1].trim(); if (url && !url.startsWith('#')) { allLinks.push({ url, sourceFile: relativeFile, lineNumber: index + 1, linkType: determineLinkType(url), }); } } // Reference links while ((match = refLinkRegex.exec(line)) !== null) { const url = match[2].trim(); if (url && !url.startsWith('#')) { allLinks.push({ url, sourceFile: relativeFile, lineNumber: index + 1, linkType: determineLinkType(url), }); } } }); } catch (error) { // Skip files we can't read } } return allLinks; } function determineLinkType(url: string): 'internal' | 'external' | 'anchor' | 'mailto' | 'tel' { if (url.startsWith('mailto:')) return 'mailto'; if (url.startsWith('tel:')) return 'tel'; if (url.startsWith('#')) return 'anchor'; if (url.startsWith('http://') || url.startsWith('https://')) return 'external'; return 'internal'; } function filterLinks( links: Array<{ url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }>, options: { checkExternalLinks: boolean; checkInternalLinks: boolean; checkAnchorLinks: boolean; ignorePatterns: string[]; }, ) { return links.filter((link) => { // Check if link should be ignored based on patterns if (options.ignorePatterns.some((pattern) => link.url.includes(pattern))) { return false; } // Filter by link type switch (link.linkType) { case 'external': return options.checkExternalLinks; case 'internal': return options.checkInternalLinks; case 'anchor': return options.checkAnchorLinks; case 'mailto': case 'tel': return false; // Skip these for now default: return true; } }); } async function checkLinksWithConcurrency( links: Array<{ url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }>, options: { timeoutMs: number; maxConcurrent: number; allowedDomains: string[]; documentationPath: string; }, ): Promise<LinkCheckResult[]> { const results: LinkCheckResult[] = []; async function checkSingleLink(link: { url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }): Promise<LinkCheckResult> { const startTime = Date.now(); try { if (link.linkType === 'internal') { return await checkInternalLink(link, options.documentationPath); } else if (link.linkType === 'external') { return await checkExternalLink(link, options.timeoutMs, options.allowedDomains); } else if (link.linkType === 'anchor') { return await checkAnchorLink(link, options.documentationPath); } return { url: link.url, status: 'skipped', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } catch (error) { return { url: link.url, status: 'broken', error: error instanceof Error ? error.message : 'Unknown error', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } // Process links with concurrency control const chunks = []; for (let i = 0; i < links.length; i += options.maxConcurrent) { chunks.push(links.slice(i, i + options.maxConcurrent)); } for (const chunk of chunks) { const chunkResults = await Promise.all(chunk.map(checkSingleLink)); results.push(...chunkResults); } return results; } async function checkInternalLink( link: { url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }, documentationPath: string, ): Promise<LinkCheckResult> { const startTime = Date.now(); try { let targetPath = link.url; // Remove anchor if present const [filePath] = targetPath.split('#'); // Handle relative paths properly using Node.js path resolution const absoluteDocPath = resolve(documentationPath); const sourceFileAbsolutePath = resolve(absoluteDocPath, link.sourceFile); const sourceDir = dirname(sourceFileAbsolutePath); if (filePath.startsWith('./')) { // Current directory reference - resolve relative to source file directory targetPath = resolve(sourceDir, filePath.substring(2)); } else if (filePath.startsWith('../')) { // Parent directory reference - resolve relative to source file directory targetPath = resolve(sourceDir, filePath); } else if (filePath.startsWith('/')) { // Absolute path from documentation root targetPath = resolve(absoluteDocPath, filePath.substring(1)); } else { // Relative path - resolve relative to source file directory targetPath = resolve(sourceDir, filePath); } try { await stat(targetPath); return { url: link.url, status: 'valid', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } catch { return { url: link.url, status: 'broken', error: 'File not found', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } catch (error) { return { url: link.url, status: 'broken', error: error instanceof Error ? error.message : 'Unknown error', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } async function checkExternalLink( link: { url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }, timeoutMs: number, allowedDomains: string[], ): Promise<LinkCheckResult> { const startTime = Date.now(); try { // Check if domain is in allowed list (if specified) if (allowedDomains.length > 0) { const url = new URL(link.url); const isAllowed = allowedDomains.some( (domain) => url.hostname === domain || url.hostname.endsWith('.' + domain), ); if (!isAllowed) { return { url: link.url, status: 'skipped', error: 'Domain not in allowed list', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } // Simple HEAD request to check if URL is accessible const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeoutMs); try { const response = await fetch(link.url, { method: 'HEAD', signal: controller.signal, headers: { 'User-Agent': 'DocuMCP Link Checker 1.0', }, }); clearTimeout(timeoutId); if (response.ok) { return { url: link.url, status: 'valid', statusCode: response.status, sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } else { return { url: link.url, status: 'broken', statusCode: response.status, error: `HTTP ${response.status}: ${response.statusText}`, sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } catch (fetchError) { clearTimeout(timeoutId); if (fetchError instanceof Error && fetchError.name === 'AbortError') { return { url: link.url, status: 'warning', error: 'Request timeout', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } throw fetchError; } } catch (error) { return { url: link.url, status: 'broken', error: error instanceof Error ? error.message : 'Unknown error', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } } async function checkAnchorLink( link: { url: string; sourceFile: string; lineNumber: number; linkType: 'internal' | 'external' | 'anchor' | 'mailto' | 'tel'; }, _documentationPath: string, ): Promise<LinkCheckResult> { const startTime = Date.now(); // For now, just mark anchor links as valid // In a more sophisticated implementation, we would parse the target file // and check if the anchor exists return { url: link.url, status: 'valid', sourceFile: link.sourceFile, lineNumber: link.lineNumber, linkType: link.linkType, responseTime: Date.now() - startTime, }; } function generateLinkCheckReport( results: LinkCheckResult[], config: { checkExternalLinks: boolean; checkInternalLinks: boolean; checkAnchorLinks: boolean; timeoutMs: number; maxConcurrentChecks: number; filesScanned: number; executionTime: number; }, ): LinkCheckReport { const summary = { totalLinks: results.length, validLinks: results.filter((r) => r.status === 'valid').length, brokenLinks: results.filter((r) => r.status === 'broken').length, warningLinks: results.filter((r) => r.status === 'warning').length, skippedLinks: results.filter((r) => r.status === 'skipped').length, executionTime: config.executionTime, filesScanned: config.filesScanned, }; const recommendations: string[] = []; if (summary.brokenLinks > 0) { recommendations.push( `🔴 Fix ${summary.brokenLinks} broken links to improve documentation quality`, ); } if (summary.warningLinks > 0) { recommendations.push(`🟡 Review ${summary.warningLinks} warning links that may need attention`); } if (summary.validLinks === summary.totalLinks) { recommendations.push('✅ All links are valid - excellent documentation quality!'); } if (summary.totalLinks > 100) { recommendations.push('📊 Consider implementing automated link checking in CI/CD pipeline'); } return { summary, results, recommendations, configuration: { checkExternalLinks: config.checkExternalLinks, checkInternalLinks: config.checkInternalLinks, checkAnchorLinks: config.checkAnchorLinks, timeoutMs: config.timeoutMs, maxConcurrentChecks: config.maxConcurrentChecks, }, }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tosin2013/documcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server