Documentation MCP Server

your-docs-mcp
scripts

validate_docs.py•27.4 KiB

#!/usr/bin/env python3 """ Documentation Validation Script Validates markdown documentation files against project standards: - YAML frontmatter validation - Markdown syntax checks - Link validation - Line length limits - Code block language tags - Content quality checks Usage: python scripts/validate_docs.py [paths...] [options] Examples: python scripts/validate_docs.py docs/ python scripts/validate_docs.py docs/guides/getting-started.md python scripts/validate_docs.py docs/ --fix python scripts/validate_docs.py docs/ --report=validation-report.md """ import argparse import re import sys from dataclasses import dataclass, field from enum import Enum from pathlib import Path try: import yaml except ImportError: print("Error: PyYAML is required. Install with: pip install pyyaml") sys.exit(1) class Severity(Enum): """Issue severity levels.""" CRITICAL = "critical" # Must fix WARNING = "warning" # Should fix INFO = "info" # Optional @dataclass class ValidationIssue: """Represents a validation issue found in documentation.""" severity: Severity file_path: str line_number: int | None issue: str current: str | None = None fix: str | None = None suggestion: str | None = None def __str__(self) -> str: """Format issue as human-readable string.""" emoji = {Severity.CRITICAL: "❌", Severity.WARNING: "⚠️", Severity.INFO: "ℹ️"} result = f"{emoji[self.severity]} {self.severity.value.upper()}: {self.issue}\n" result += f" Location: {self.file_path}:{self.line_number or 0}\n" if self.current: result += f" Current: {self.current[:80]}\n" if self.fix: result += f" Fix: {self.fix[:80]}\n" if self.suggestion: result += f" Suggestion: {self.suggestion}\n" return result @dataclass class ValidationResult: """Results of validating a documentation file.""" file_path: str issues: list[ValidationIssue] = field(default_factory=list) @property def critical_count(self) -> int: return sum(1 for i in self.issues if i.severity == Severity.CRITICAL) @property def warning_count(self) -> int: return sum(1 for i in self.issues if i.severity == Severity.WARNING) @property def info_count(self) -> int: return sum(1 for i in self.issues if i.severity == Severity.INFO) @property def has_critical(self) -> bool: return self.critical_count > 0 @property def passed(self) -> bool: return self.critical_count == 0 class DocumentValidator: """Validates markdown documentation files.""" REQUIRED_FRONTMATTER = ["title", "category", "tags", "order"] VALID_CATEGORIES = [ "Guides", "API Reference", "Architecture", "Configuration", "Security", "Development", "Tutorials", ] MAX_LINE_LENGTH = 100 def __init__(self, docs_root: Path): self.docs_root = docs_root self.results: list[ValidationResult] = [] def validate_file(self, file_path: Path) -> ValidationResult: """Validate a single markdown file.""" result = ValidationResult(file_path=str(file_path)) try: with open(file_path, encoding="utf-8") as f: content = f.read() lines = content.splitlines() except Exception as e: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=None, issue=f"Failed to read file: {e}", ) ) return result # Run validation checks self._validate_frontmatter(file_path, content, result) self._validate_headers(file_path, lines, result) self._validate_code_blocks(file_path, lines, result) self._validate_line_length(file_path, lines, result) self._validate_links(file_path, lines, result) self._validate_lists(file_path, lines, result) self._validate_content_structure(file_path, content, result) self.results.append(result) return result def _validate_frontmatter( self, file_path: Path, content: str, result: ValidationResult ) -> None: """Validate YAML frontmatter.""" if not content.startswith("---"): result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=1, issue="Missing YAML frontmatter", fix="Add YAML frontmatter at the beginning of the file", ) ) return parts = content.split("---", 2) if len(parts) < 3: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=1, issue="Incomplete YAML frontmatter (missing closing ---)", fix="Add closing --- after frontmatter", ) ) return frontmatter_text = parts[1] # Try to parse YAML try: frontmatter = yaml.safe_load(frontmatter_text) except yaml.YAMLError as e: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=self._get_yaml_error_line(str(e)), issue=f"Invalid YAML syntax: {e}", suggestion="Check for proper quoting, indentation, and syntax", ) ) return if not isinstance(frontmatter, dict): result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=2, issue="Frontmatter must be a dictionary", fix="Use key: value pairs in frontmatter", ) ) return # Check required fields for field_name in self.REQUIRED_FRONTMATTER: if field_name not in frontmatter: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=None, issue=f"Missing required frontmatter field: {field_name}", fix=f"Add '{field_name}:' to frontmatter", ) ) # Validate field types and values if "title" in frontmatter: if not isinstance(frontmatter["title"], str): result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=None, issue="'title' must be a string", current=str(frontmatter["title"]), fix='title: "Your Title Here"', ) ) if "category" in frontmatter: if not isinstance(frontmatter["category"], str): result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=None, issue="'category' should be a string", current=str(frontmatter["category"]), ) ) elif frontmatter["category"] not in self.VALID_CATEGORIES: result.issues.append( ValidationIssue( severity=Severity.INFO, file_path=str(file_path), line_number=None, issue=f"Unusual category: {frontmatter['category']}", suggestion=f"Common categories: {', '.join(self.VALID_CATEGORIES)}", ) ) if "tags" in frontmatter: if not isinstance(frontmatter["tags"], list): result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=None, issue="'tags' must be an array", current=str(frontmatter["tags"]), fix='tags: ["tag1", "tag2"]', ) ) elif len(frontmatter["tags"]) == 0: result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=None, issue="'tags' array is empty", suggestion="Add relevant tags for searchability", ) ) if "order" in frontmatter: if not isinstance(frontmatter["order"], int): result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=None, issue="'order' must be an integer", current=str(frontmatter["order"]), fix="order: 1", ) ) def _validate_headers( self, file_path: Path, lines: list[str], result: ValidationResult ) -> None: """Validate markdown headers.""" headers = [] h1_count = 0 in_frontmatter = False frontmatter_closed = False in_code_block = False for i, line in enumerate(lines, 1): # Track code blocks if line.strip().startswith("```"): in_code_block = not in_code_block continue if in_code_block: continue # Track frontmatter if line.strip() == "---": if not in_frontmatter and not frontmatter_closed: in_frontmatter = True continue elif in_frontmatter: in_frontmatter = False frontmatter_closed = True continue if in_frontmatter: continue if line.startswith("#"): # Check for space after # if not re.match(r"^#+\s", line): result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=i, issue="Missing space after # in header", current=line, fix=line.replace("#", "# ", 1), ) ) level = len(line) - len(line.lstrip("#")) level = min(level, line.index(" ") if " " in line else level) headers.append((i, level, line)) if level == 1: h1_count += 1 # Check H1 count if h1_count == 0: result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=None, issue="No H1 header found", suggestion="Add a main title with # Header", ) ) elif h1_count > 1: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=None, issue=f"Multiple H1 headers found ({h1_count})", fix="Use only one H1 header per document", ) ) # Check header hierarchy for i in range(1, len(headers)): prev_line, prev_level, prev_text = headers[i - 1] curr_line, curr_level, curr_text = headers[i] if curr_level > prev_level + 1: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=curr_line, issue=f"Skipped header level (H{prev_level} -> H{curr_level})", current=curr_text, fix=f"Use H{prev_level + 1} instead", ) ) def _validate_code_blocks( self, file_path: Path, lines: list[str], result: ValidationResult ) -> None: """Validate code blocks have language specifiers.""" in_code_block = False for i, line in enumerate(lines, 1): if line.strip().startswith("```"): if not in_code_block: # Opening code block lang = line.strip()[3:].strip() if not lang: result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=i, issue="Code block missing language specifier", current=line, fix="```python (or appropriate language)", ) ) in_code_block = True else: # Closing code block in_code_block = False def _validate_line_length( self, file_path: Path, lines: list[str], result: ValidationResult ) -> None: """Validate line length limits.""" in_code_block = False in_table = False for i, line in enumerate(lines, 1): # Skip code blocks if line.strip().startswith("```"): in_code_block = not in_code_block continue if in_code_block: continue # Skip tables if "|" in line: in_table = True elif in_table and not line.strip(): in_table = False if in_table: continue # Skip URLs if "http://" in line or "https://" in line: continue if len(line) > self.MAX_LINE_LENGTH: result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=i, issue=f"Line exceeds {self.MAX_LINE_LENGTH} characters ({len(line)} chars)", current=line[:80] + "...", suggestion="Break line at logical point (punctuation, conjunction)", ) ) def _validate_links(self, file_path: Path, lines: list[str], result: ValidationResult) -> None: """Validate internal and external links.""" link_pattern = r"\[([^\]]+)\]\(([^\)]+)\)" for i, line in enumerate(lines, 1): for match in re.finditer(link_pattern, line): link_text = match.group(1) link_url = match.group(2) # Check for poor link text if link_text.lower() in ["here", "click here", "this", "link"]: result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=i, issue=f"Non-descriptive link text: '{link_text}'", current=match.group(0), suggestion="Use descriptive text that explains the destination", ) ) # Validate internal links if link_url.startswith("./") or link_url.startswith("../"): # Extract path (remove anchor) link_path = link_url.split("#")[0] # Check for .md extension if not link_path.endswith(".md") and not link_path.endswith("/"): result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=i, issue=f"Internal link missing .md extension: {link_path}", current=link_url, fix=link_url.replace(link_path, link_path + ".md"), ) ) # Check if target exists if link_path.endswith(".md"): target_path = (file_path.parent / link_path).resolve() if not target_path.exists(): result.issues.append( ValidationIssue( severity=Severity.CRITICAL, file_path=str(file_path), line_number=i, issue=f"Broken internal link: {link_url}", current=match.group(0), suggestion=f"Target file does not exist: {target_path}", ) ) # Check for absolute paths elif link_url.startswith("/") and not link_url.startswith("//"): result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=i, issue=f"Absolute path used for internal link: {link_url}", current=match.group(0), suggestion="Use relative paths (./file.md or ../parent/file.md)", ) ) def _validate_lists(self, file_path: Path, lines: list[str], result: ValidationResult) -> None: """Validate list formatting.""" in_code_block = False bullet_chars = set() for i, line in enumerate(lines, 1): # Track code blocks if line.strip().startswith("```"): in_code_block = not in_code_block continue if in_code_block: continue # Check for unordered list items if re.match(r"^[\s]*[-*+]\s", line): # Extract bullet character bullet = re.match(r"^[\s]*([-*+])", line).group(1) bullet_chars.add(bullet) # Check indentation (should be multiple of 2) indent = len(line) - len(line.lstrip()) if indent % 2 != 0: result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=i, issue=f"List item indentation should be multiple of 2 (found {indent})", current=line, suggestion="Use 2-space indentation for nested lists", ) ) # Check for inconsistent bullet characters if len(bullet_chars) > 1: result.issues.append( ValidationIssue( severity=Severity.WARNING, file_path=str(file_path), line_number=None, issue=f"Inconsistent bullet characters: {bullet_chars}", suggestion="Use consistent bullet character throughout (prefer '-')", ) ) def _validate_content_structure( self, file_path: Path, content: str, result: ValidationResult ) -> None: """Validate overall content structure.""" lines = content.splitlines() # Skip frontmatter if content.startswith("---"): parts = content.split("---", 2) if len(parts) >= 3: content_without_frontmatter = parts[2] lines = content_without_frontmatter.splitlines() # Count sections (H2 headers) section_count = sum(1 for line in lines if line.startswith("## ")) # Check for table of contents has_toc = any("table of contents" in line.lower() for line in lines) if section_count > 3 and not has_toc: result.issues.append( ValidationIssue( severity=Severity.INFO, file_path=str(file_path), line_number=None, issue=f"Document has {section_count} sections but no table of contents", suggestion="Add table of contents for better navigation", ) ) # Check for examples section in guides/tutorials if "guide" in str(file_path).lower() or "tutorial" in str(file_path).lower(): has_examples = any("example" in line.lower() for line in lines) if not has_examples: result.issues.append( ValidationIssue( severity=Severity.INFO, file_path=str(file_path), line_number=None, issue="Guide/tutorial missing examples section", suggestion="Add practical examples to help readers", ) ) @staticmethod def _get_yaml_error_line(error_msg: str) -> int | None: """Extract line number from YAML error message.""" match = re.search(r"line (\d+)", error_msg) return int(match.group(1)) if match else None def generate_report(self, output_path: Path | None = None) -> str: """Generate validation report.""" report_lines = ["# Documentation Validation Report\n"] total_critical = sum(r.critical_count for r in self.results) total_warning = sum(r.warning_count for r in self.results) total_info = sum(r.info_count for r in self.results) total_issues = total_critical + total_warning + total_info files_with_issues = sum(1 for r in self.results if r.issues) files_passed = sum(1 for r in self.results if r.passed) # Summary report_lines.append("## Summary\n") report_lines.append(f"**Total Files:** {len(self.results)}") report_lines.append(f"**Files Passed:** {files_passed} ✅") report_lines.append(f"**Files with Issues:** {files_with_issues}") report_lines.append(f"**Total Issues:** {total_issues}") report_lines.append(f"- Critical: {total_critical} ❌") report_lines.append(f"- Warnings: {total_warning} ⚠️") report_lines.append(f"- Info: {total_info} ℹ️\n") # Results by severity if total_critical > 0: report_lines.append("## Critical Issues (Must Fix)\n") for result in self.results: critical = [i for i in result.issues if i.severity == Severity.CRITICAL] if critical: report_lines.append(f"### {result.file_path}\n") for issue in critical: report_lines.append(f"**Line {issue.line_number or 'N/A'}:** {issue.issue}") if issue.current: report_lines.append(f"```\nCurrent: {issue.current}\n```") if issue.fix: report_lines.append(f"```\nFix: {issue.fix}\n```") report_lines.append("") if total_warning > 0: report_lines.append("## Warnings (Should Fix)\n") for result in self.results: warnings = [i for i in result.issues if i.severity == Severity.WARNING] if warnings: report_lines.append(f"### {result.file_path}\n") for issue in warnings[:5]: # Limit to 5 per file report_lines.append(f"**Line {issue.line_number or 'N/A'}:** {issue.issue}") if issue.suggestion: report_lines.append(f"*Suggestion: {issue.suggestion}*") report_lines.append("") report = "\n".join(report_lines) if output_path: with open(output_path, "w", encoding="utf-8") as f: f.write(report) return report def main(): """Main entry point for the validation script.""" parser = argparse.ArgumentParser( description="Validate markdown documentation against project standards" ) parser.add_argument( "paths", nargs="+", help="Paths to markdown files or directories to validate" ) parser.add_argument("--report", help="Output detailed report to file") parser.add_argument( "--fix", action="store_true", help="Attempt to auto-fix issues where possible (not implemented yet)", ) parser.add_argument("--no-warnings", action="store_true", help="Only report critical issues") args = parser.parse_args() # Collect all markdown files to validate files_to_validate = [] for path_str in args.paths: path = Path(path_str) if not path.exists(): print(f"Error: Path does not exist: {path}", file=sys.stderr) continue if path.is_file() and path.suffix == ".md": files_to_validate.append(path) elif path.is_dir(): files_to_validate.extend(path.rglob("*.md")) if not files_to_validate: print("No markdown files found to validate", file=sys.stderr) return 1 # Validate files validator = DocumentValidator(docs_root=Path.cwd()) print(f"Validating {len(files_to_validate)} markdown files...\n") for file_path in sorted(files_to_validate): result = validator.validate_file(file_path) if result.passed: print(f"✅ {file_path}") else: print( f"❌ {file_path} ({result.critical_count} critical, " f"{result.warning_count} warnings, {result.info_count} info)" ) if not args.no_warnings: for issue in result.issues: print(f" {issue}") # Generate report print("\n" + "=" * 80) report = validator.generate_report(output_path=Path(args.report) if args.report else None) print(report) # Exit with error if critical issues found has_critical = any(r.has_critical for r in validator.results) if has_critical: print("\n❌ Validation failed: Critical issues must be fixed") return 1 else: print("\n✅ Validation passed: No critical issues found") return 0 if __name__ == "__main__": sys.exit(main())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/esola-thomas/your-docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

validate_docs.py•27.4 KiB