check_documentation_links.py•6.03 kB
#!/usr/bin/env python3
"""
Script to check for broken internal links in markdown files.
Checks relative links to files within the repository.
Usage:
    python scripts/check_documentation_links.py
    python scripts/check_documentation_links.py --verbose
    python scripts/check_documentation_links.py --fix-suggestions
"""
import os
import re
import argparse
from pathlib import Path
from typing import List, Tuple, Dict
def find_markdown_files(root_dir: str) -> List[Path]:
    """Find all markdown files in the repository."""
    root = Path(root_dir)
    md_files = []
    
    for path in root.rglob("*.md"):
        # Skip venv and node_modules
        if ".venv" in path.parts or "venv" in path.parts or "node_modules" in path.parts:
            continue
        md_files.append(path)
    
    return md_files
def extract_links(content: str) -> List[Tuple[str, str]]:
    """Extract markdown links from content with their text."""
    # Pattern for markdown links: [text](url)
    link_pattern = r'\[([^\]]*)\]\(([^)]+)\)'
    links = re.findall(link_pattern, content)
    return links  # Return (text, url) tuples
def is_internal_link(link: str) -> bool:
    """Check if a link is internal (relative path)."""
    # Skip external URLs, anchors, and mailto links
    if (link.startswith('http://') or 
        link.startswith('https://') or 
        link.startswith('mailto:') or
        link.startswith('#')):
        return False
    return True
def resolve_link_path(md_file_path: Path, link: str) -> Path:
    """Resolve relative link path from markdown file location."""
    # Remove any anchor fragments
    link_path = link.split('#')[0]
    
    # Resolve relative to the markdown file's directory
    return (md_file_path.parent / link_path).resolve()
def suggest_fixes(broken_link: str, repo_root: Path) -> List[str]:
    """Suggest possible fixes for broken links."""
    suggestions = []
    
    # Extract filename from the broken link
    filename = Path(broken_link).name
    
    # Search for files with similar names
    for md_file in find_markdown_files(str(repo_root)):
        if md_file.name.lower() == filename.lower():
            suggestions.append(str(md_file.relative_to(repo_root)))
        elif filename.lower() in md_file.name.lower():
            suggestions.append(str(md_file.relative_to(repo_root)))
    
    return suggestions[:3]  # Return top 3 suggestions
def check_links_in_file(md_file: Path, repo_root: Path) -> List[Tuple[str, str, str, bool]]:
    """Check all internal links in a markdown file."""
    try:
        with open(md_file, 'r', encoding='utf-8') as f:
            content = f.read()
    except Exception as e:
        print(f"Error reading {md_file}: {e}")
        return []
    
    links = extract_links(content)
    internal_links = [(text, link) for text, link in links if is_internal_link(link)]
    
    results = []
    for link_text, link in internal_links:
        try:
            target_path = resolve_link_path(md_file, link)
            exists = target_path.exists()
            results.append((link_text, link, str(target_path), exists))
        except Exception as e:
            results.append((link_text, link, f"Error resolving: {e}", False))
    
    return results
def main():
    parser = argparse.ArgumentParser(description='Check for broken internal links in markdown documentation')
    parser.add_argument('--verbose', '-v', action='store_true', help='Show all links, not just broken ones')
    parser.add_argument('--fix-suggestions', '-s', action='store_true', help='Suggest fixes for broken links')
    parser.add_argument('--format', choices=['text', 'markdown', 'json'], default='text', help='Output format')
    
    args = parser.parse_args()
    
    repo_root = Path(__file__).parent.parent
    md_files = find_markdown_files(str(repo_root))
    
    print(f"Checking {len(md_files)} markdown files for broken links...\n")
    
    broken_links = []
    total_links = 0
    file_results = {}
    
    for md_file in sorted(md_files):
        rel_path = md_file.relative_to(repo_root)
        link_results = check_links_in_file(md_file, repo_root)
        
        if link_results:
            file_results[str(rel_path)] = link_results
            
            if args.verbose or any(not exists for _, _, _, exists in link_results):
                print(f"\n[FILE] {rel_path}")
                
            for link_text, link, target, exists in link_results:
                total_links += 1
                status = "[OK]" if exists else "[ERROR]"
                
                if args.verbose or not exists:
                    print(f"  {status} [{link_text}]({link})")
                    if not exists:
                        print(f"     -> Target: {target}")
                        broken_links.append((str(rel_path), link_text, link, target))
    
    # Summary
    print(f"\n" + "="*60)
    print(f"SUMMARY:")
    print(f"Total internal links checked: {total_links}")
    print(f"Broken links found: {len(broken_links)}")
    
    if broken_links:
        print(f"\n❌ BROKEN LINKS:")
        for file_path, link_text, link, target in broken_links:
            print(f"\n  📄 {file_path}")
            print(f"     Text: {link_text}")
            print(f"     Link: {link}")
            print(f"     Target: {target}")
            
            if args.fix_suggestions:
                suggestions = suggest_fixes(link, repo_root)
                if suggestions:
                    print(f"     💡 Suggestions:")
                    for suggestion in suggestions:
                        print(f"        - {suggestion}")
    
    # Exit with error code if broken links found
    exit_code = 1 if broken_links else 0
    
    if broken_links:
        print(f"\n⚠️  Found {len(broken_links)} broken links. Use --fix-suggestions for repair ideas.")
    else:
        print(f"\n✅ All documentation links are working correctly!")
    
    return exit_code
if __name__ == "__main__":
    exit(main())