docs_updater.py•130 kB
#!/usr/bin/env python3
"""
Laravel Documentation Updater
This module handles automatic fetching and updating of Laravel documentation
from the official GitHub repository.
"""
import sys
import logging
import argparse
import shutil
import tempfile
import re
from pathlib import Path
from typing import Dict, List, Optional, Union
import urllib.request
import urllib.error
import zipfile
import json
import time
from enum import Enum
import random
import html
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("laravel-docs-updater")
# GitHub API URLs
GITHUB_API_URL = "https://api.github.com"
LARAVEL_DOCS_REPO = "laravel/docs"
USER_AGENT = "Laravel-MCP-Companion (+https://github.com/brianirish/laravel-mcp-companion)"
def get_supported_versions() -> list[str]:
    """Get supported Laravel versions dynamically from GitHub API.
    
    Returns:
        List of supported version branches (e.g., ['6.x', '7.x', '8.x', ...])
    """
    logger.debug("Fetching supported Laravel versions from GitHub API")
    
    url = f"{GITHUB_API_URL}/repos/{LARAVEL_DOCS_REPO}/branches"
    
    try:
        request = urllib.request.Request(
            url,
            headers={
                "User-Agent": USER_AGENT,
                "Accept": "application/vnd.github.v3+json"
            }
        )
        
        with urllib.request.urlopen(request) as response:
            branches = json.loads(response.read().decode())
            
            # Filter for version branches (X.x format) starting from 6.x
            version_branches = []
            for branch in branches:
                name = branch["name"]
                if re.match(r'^\d+\.x$', name):
                    major_version = int(name.split('.')[0])
                    if major_version >= 6:
                        version_branches.append(name)
            
            # Sort versions numerically
            version_branches.sort(key=lambda v: int(v.split('.')[0]))
            
            if not version_branches:
                logger.warning("No version branches found, falling back to hardcoded list")
                return ["6.x", "7.x", "8.x", "9.x", "10.x", "11.x", "12.x"]
            
            logger.debug(f"Found {len(version_branches)} supported versions: {', '.join(version_branches)}")
            return version_branches
            
    except Exception as e:
        logger.warning(f"Error fetching versions from GitHub API: {str(e)}, falling back to hardcoded list")
        return ["6.x", "7.x", "8.x", "9.x", "10.x", "11.x", "12.x"]
# Cache supported versions to avoid repeated API calls
_SUPPORTED_VERSIONS_CACHE = None
def get_cached_supported_versions() -> list[str]:
    """Get cached supported versions or fetch them if not cached."""
    global _SUPPORTED_VERSIONS_CACHE
    if _SUPPORTED_VERSIONS_CACHE is None:
        _SUPPORTED_VERSIONS_CACHE = get_supported_versions()
    return _SUPPORTED_VERSIONS_CACHE
SUPPORTED_VERSIONS = get_cached_supported_versions()
DEFAULT_VERSION = SUPPORTED_VERSIONS[-1]  # Always use the latest version as default
USER_AGENT = "Laravel-MCP-Companion (+https://github.com/brianirish/laravel-mcp-companion)"
class DocumentationSourceType(Enum):
    """Types of documentation sources supported."""
    GITHUB_REPO = "github_repo"
    DIRECT_URL = "direct_url"
    LARAVEL_SERVICE = "laravel_service"
    COMMUNITY_PACKAGE = "community_package"
class DocumentationAutoDiscovery:
    """Handles automatic discovery of documentation sections from Laravel services."""
    
    def __init__(self, max_retries: int = 3, request_delay: float = 1.0):
        """
        Initialize the documentation auto-discovery system.
        
        Args:
            max_retries: Maximum number of retry attempts for failed requests
            request_delay: Delay between requests in seconds to be respectful
        """
        self.max_retries = max_retries
        self.request_delay = request_delay
        
        # Common asset file extensions and patterns to exclude
        self.asset_extensions = {'.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.woff', '.woff2', '.ttf', '.eot'}
        self.asset_patterns = {'/_next/', '/static/', '/assets/', '/images/', '/fonts/', '/favicon'}
        
    def discover_sections(self, service: str, service_config: Dict) -> List[str]:
        """
        Discover documentation sections for a given service.
        
        Args:
            service: Service name (forge, vapor, envoyer, nova)
            service_config: Service configuration dictionary
            
        Returns:
            List of discovered section paths
        """
        if not service_config.get("auto_discovery", False):
            logger.debug(f"Auto-discovery disabled for {service}")
            return []
        
        discovery_rules = service_config.get("discovery_rules", {})
        discovered_sections = []
        
        try:
            logger.info(f"Starting auto-discovery for {service}")
            
            if service == "forge":
                discovered_sections = self._discover_forge_sections(service_config, discovery_rules)
            elif service == "nova":
                discovered_sections = self._discover_nova_sections(service_config, discovery_rules)
            elif service == "vapor":
                discovered_sections = self._discover_vapor_sections(service_config, discovery_rules)
            elif service == "envoyer":
                discovered_sections = self._discover_envoyer_sections(service_config, discovery_rules)
            else:
                logger.warning(f"No discovery method available for service: {service}")
                
            logger.info(f"Auto-discovery completed for {service}: found {len(discovered_sections)} sections")
            return discovered_sections
            
        except Exception as e:
            logger.error(f"Error during auto-discovery for {service}: {str(e)}")
            return []
    
    def _is_asset_file(self, path: str) -> bool:
        """
        Check if a path represents an asset file (CSS, JS, images, etc.).
        
        Args:
            path: URL path to check
            
        Returns:
            True if the path is an asset file, False otherwise
        """
        # Remove query parameters for extension check
        clean_path = path.split('?')[0].lower()
        
        # Check if path contains common asset directories
        for pattern in self.asset_patterns:
            if pattern in clean_path:
                return True
        
        # Check file extension
        for ext in self.asset_extensions:
            if clean_path.endswith(ext):
                return True
                
        return False
    
    def _discover_forge_sections(self, config: Dict, rules: Dict) -> List[str]:
        """Discover Forge documentation sections by parsing the docs index page."""
        base_url = config["base_url"]
        sections = []
        
        try:
            # Fetch the main docs page
            content_bytes = self._retry_request(f"{base_url}")
            content = content_bytes.decode('utf-8')
            
            # Extract href="/docs/*" links using regex
            doc_links = re.findall(r'href="(/docs/[^"]*)"', content, re.IGNORECASE)
            
            for link in doc_links:
                # Remove query parameters if present
                clean_link = link.split('?')[0]
                
                # Check if this is an asset file (CSS, JS, images, etc.)
                if self._is_asset_file(clean_link):
                    continue
                    
                # Remove the /docs/ prefix to get the section name
                section = clean_link.replace('/docs/', '')
                if section and section not in sections:
                    sections.append(section)
                    
            # Sort sections to maintain consistent ordering
            sections.sort()
            logger.debug(f"Discovered {len(sections)} Forge sections")
            
        except Exception as e:
            logger.warning(f"Error discovering Forge sections: {str(e)}")
            
        return sections
    
    def _discover_nova_sections(self, config: Dict, rules: Dict) -> List[str]:
        """Discover Nova documentation sections and auto-detect version."""
        base_url = config["base_url"]
        sections = []
        
        try:
            # First, try to detect the latest version
            nova_base = "https://nova.laravel.com/docs"
            
            # Try to find version links or check if current version is still valid
            content_bytes = self._retry_request(f"{nova_base}")
            content = content_bytes.decode('utf-8')
            
            # Look for version links like /docs/v6, /docs/v5, etc.
            version_matches = re.findall(r'/docs/(v\d+)', content)
            if version_matches:
                latest_version = max(version_matches, key=lambda v: int(v[1:]))
                actual_base_url = f"{nova_base}/{latest_version}"
                logger.info(f"Auto-detected Nova version: {latest_version}")
            else:
                actual_base_url = base_url
            
            # Fetch the navigation/index page
            nav_content_bytes = self._retry_request(f"{actual_base_url}")
            nav_content = nav_content_bytes.decode('utf-8')
            
            # Extract navigation links - Nova typically uses relative links
            nav_links = re.findall(r'href="(/docs/[^"]*)"', nav_content, re.IGNORECASE)
            
            for link in nav_links:
                # Extract section after version (e.g., /docs/v5/installation -> installation)
                section_match = re.search(r'/docs/v\d+/(.+)', link)
                if section_match:
                    section = section_match.group(1)
                    if section and section not in sections:
                        sections.append(section)
            
            sections.sort()
            logger.debug(f"Discovered {len(sections)} Nova sections")
            
        except Exception as e:
            logger.warning(f"Error discovering Nova sections: {str(e)}")
            
        return sections
    
    def _discover_vapor_sections(self, config: Dict, rules: Dict) -> List[str]:
        """Discover Vapor documentation sections by parsing Mintlify navigation."""
        base_url = config["base_url"]
        sections = []
        
        try:
            # Vapor uses Mintlify, which often has a special navigation structure
            content_bytes = self._retry_request(f"{base_url}")
            content = content_bytes.decode('utf-8')
            
            # Look for Mintlify navigation patterns
            # Try multiple patterns that Mintlify commonly uses
            nav_patterns = [
                r'href="(/[^"]*)"[^>]*>([^<]+)</a>',  # General link pattern
                r'"href":"(/[^"]*)"',  # JSON-style navigation
                r'data-href="(/[^"]*)"',  # Data attribute pattern
            ]
            
            for pattern in nav_patterns:
                links = re.findall(pattern, content, re.IGNORECASE)
                for link in links:
                    if isinstance(link, tuple):
                        path = link[0]
                    else:
                        path = link
                    
                    # Filter for documentation paths (exclude external links, assets, etc.)
                    if (path.startswith('/') and 
                        not path.startswith('//') and 
                        not self._is_asset_file(path) and
                        path != '/'):
                        
                        section = path.lstrip('/')
                        if section and section not in sections:
                            sections.append(section)
            
            # Remove dupliculates and sort
            sections = list(set(sections))
            sections.sort()
            logger.debug(f"Discovered {len(sections)} Vapor sections")
            
        except Exception as e:
            logger.warning(f"Error discovering Vapor sections: {str(e)}")
            
        return sections
    
    def _discover_envoyer_sections(self, config: Dict, rules: Dict) -> List[str]:
        """Discover Envoyer documentation sections, handling category redirects."""
        base_url = config["base_url"]
        sections = []
        
        try:
            # Fetch the main docs page
            content_bytes = self._retry_request(f"{base_url}")
            content = content_bytes.decode('utf-8')
            
            # Extract documentation links
            doc_links = re.findall(r'href="(/docs/[^"]*)"', content, re.IGNORECASE)
            
            for link in doc_links:
                # Remove the /docs/ prefix
                section = link.replace('/docs/', '')
                if section and section not in sections:
                    # Test if this is a real page (not a redirect)
                    try:
                        test_url = f"{base_url}/{section}"
                        test_content_bytes = self._retry_request(test_url)
                        test_content = test_content_bytes.decode('utf-8')
                        
                        # Check if this is actual documentation content
                        if self._is_valid_envoyer_content(test_content, section):
                            sections.append(section)
                        else:
                            logger.debug(f"Skipping {section} - appears to be redirect or invalid content")
                            
                    except Exception as test_e:
                        logger.debug(f"Skipping {section} - error testing content: {str(test_e)}")
                        continue
            
            sections.sort()
            logger.debug(f"Discovered {len(sections)} Envoyer sections")
            
        except Exception as e:
            logger.warning(f"Error discovering Envoyer sections: {str(e)}")
            
        return sections
    
    def _is_valid_envoyer_content(self, content: str, section: str) -> bool:
        """Check if Envoyer content is actual documentation (not redirect page)."""
        # Look for common documentation indicators
        doc_indicators = [
            'envoyer', 'deployment', 'zero downtime', 'project', 
            'server', 'hook', 'notification', 'repository'
        ]
        
        # Look for redirect indicators (things that suggest this isn't real content)
        redirect_indicators = [
            'window.location', 'http-equiv="refresh"', 'redirecting',
            'please wait', 'loading...', 'not found'
        ]
        
        content_lower = content.lower()
        
        # Check for documentation indicators
        doc_score = sum(1 for indicator in doc_indicators if indicator in content_lower)
        
        # Check for redirect indicators
        redirect_score = sum(1 for indicator in redirect_indicators if indicator in content_lower)
        
        # Must have some documentation indicators and minimal redirect indicators
        return doc_score >= 1 and redirect_score == 0 and len(content.strip()) > 500
    
    def _retry_request(self, url: str, headers: Optional[Dict] = None) -> bytes:
        """
        Make a request with retry logic and respectful delays.
        
        Args:
            url: URL to request
            headers: Optional headers to include
            
        Returns:
            Response content as bytes
        """
        if headers is None:
            headers = {"User-Agent": USER_AGENT}
        
        last_exception: Optional[Exception] = None
        
        for attempt in range(self.max_retries + 1):
            try:
                # Add respectful delay between requests
                if attempt > 0:
                    time.sleep(self.request_delay * (2 ** (attempt - 1)))
                
                request = urllib.request.Request(url, headers=headers)
                with urllib.request.urlopen(request) as response:
                    return response.read()
                    
            except urllib.error.HTTPError as e:
                last_exception = e
                if e.code == 404:
                    # Don't retry 404 errors
                    raise
                elif e.code == 429 or (e.code == 403 and "rate limit" in str(e.reason).lower()):
                    # Rate limiting - wait longer
                    if attempt < self.max_retries:
                        wait_time = min(300, (2 ** attempt) * 10 + random.uniform(0, 5))
                        logger.warning(f"Rate limited on attempt {attempt + 1}, waiting {wait_time:.1f}s")
                        time.sleep(wait_time)
                    else:
                        raise
                elif e.code >= 500 and attempt < self.max_retries:
                    # Server errors are worth retrying
                    wait_time = min(60, (2 ** attempt) + random.uniform(0, 2))
                    logger.warning(f"Server error {e.code} on attempt {attempt + 1}, retrying in {wait_time:.1f}s")
                    time.sleep(wait_time)
                else:
                    raise
                    
            except Exception as e:
                last_exception = e
                if attempt < self.max_retries:
                    wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
                    logger.warning(f"Request error on attempt {attempt + 1}, retrying in {wait_time:.1f}s: {str(e)}")
                    time.sleep(wait_time)
                else:
                    raise
        
        # This should never be reached, but just in case
        if last_exception:
            raise last_exception
        else:
            raise RuntimeError(f"Failed to fetch {url} after {self.max_retries + 1} attempts")
class ExternalDocsFetcher:
    """Handles fetching documentation from external Laravel services and packages."""
    
    def __init__(self, target_dir: Path, cache_duration: int = 86400, max_retries: int = 3):
        """
        Initialize the external documentation fetcher.
        
        Args:
            target_dir: Directory where external docs should be stored
            cache_duration: Cache duration in seconds (default: 24 hours)
            max_retries: Maximum number of retry attempts for failed requests
        """
        self.target_dir = target_dir
        self.cache_duration = cache_duration
        self.max_retries = max_retries
        self.external_dir = target_dir / "external"
        self.external_dir.mkdir(parents=True, exist_ok=True)
        
        # Initialize auto-discovery system
        self.auto_discovery = DocumentationAutoDiscovery(max_retries=max_retries)
        
        # Laravel services documentation sources
        self.laravel_services = {
            "forge": {
                "name": "Laravel Forge",
                "type": DocumentationSourceType.LARAVEL_SERVICE,
                "base_url": "https://forge.laravel.com/docs",
                "auto_discovery": True,
                "discovery_rules": {
                    "index_url": "https://forge.laravel.com/docs",
                    "link_pattern": r'href="(/docs/[^"]*)"',
                    "nested_sections": ["accounts", "servers", "sites", "resources"],
                    "exclude_patterns": ["#", "javascript:", "mailto:"]
                },
                "sections": [
                    # Get Started (manual fallback)
                    "introduction", "cli", "sdk",
                    # Accounts  
                    "accounts/your-account", "accounts/circles", "accounts/source-control", "accounts/ssh", "accounts/api",
                    # Servers
                    "servers/providers", "servers/types", "servers/management", "servers/provisioning-process",
                    "servers/ssh", "servers/php", "servers/packages", "servers/recipes", "servers/load-balancing",
                    "servers/nginx-templates", "servers/backups", "servers/monitoring", "servers/cookbook",
                    # Sites
                    "sites/the-basics", "sites/applications", "sites/deployments", "sites/commands",
                    "sites/packages", "sites/queues", "sites/security-rules", "sites/redirects",
                    "sites/ssl", "sites/user-isolation", "sites/cookbook",
                    # Resources
                    "resources/daemons", "resources/databases", "resources/caches", "resources/network",
                    "resources/scheduler", "resources/integrations", "resources/cookbook"
                ]
            },
            "vapor": {
                "name": "Laravel Vapor", 
                "type": DocumentationSourceType.LARAVEL_SERVICE,
                "base_url": "https://docs.vapor.build",
                "auto_discovery": True,
                "discovery_rules": {
                    "index_url": "https://docs.vapor.build",
                    "navigation_patterns": [
                        r'href="(/[^"]*)"[^>]*>([^<]+)</a>',
                        r'"href":"(/[^"]*)"',
                        r'data-href="(/[^"]*)"'
                    ],
                    "exclude_extensions": [".css", ".js", ".png", ".jpg", ".svg"],
                    "min_content_length": 500
                },
                "sections": [
                    # Manual fallback sections
                    "introduction", "projects/the-basics", "projects/environments",
                    "projects/deployments", "resources/queues", "resources/storage",
                    "resources/databases", "resources/caches"
                ]
            },
            "envoyer": {
                "name": "Laravel Envoyer",
                "type": DocumentationSourceType.LARAVEL_SERVICE,
                "base_url": "https://docs.envoyer.io",
                "auto_discovery": True,
                "discovery_rules": {
                    "index_url": "https://docs.envoyer.io",
                    "link_pattern": r'href="(/docs/[^"]*)"',
                    "validate_content": True,
                    "content_indicators": ["envoyer", "deployment", "zero downtime", "project"],
                    "redirect_indicators": ["window.location", "redirecting", "loading..."]
                },
                "sections": [
                    # Manual fallback sections
                    "introduction", "quick-start",
                    "accounts/source-control", "accounts/your-account",
                    "projects/management", "projects/servers", "projects/deployment-hooks",
                    "projects/heartbeats", "projects/notifications", "projects/collaborators"
                ]
            },
            "nova": {
                "name": "Laravel Nova",
                "type": DocumentationSourceType.LARAVEL_SERVICE,
                "base_url": "https://nova.laravel.com/docs/v5",
                "auto_discovery": True,
                "discovery_rules": {
                    "base_url": "https://nova.laravel.com/docs",
                    "version_detection": True,
                    "version_pattern": r'/docs/(v\d+)',
                    "link_pattern": r'href="(/docs/[^"]*)"',
                    "section_pattern": r'/docs/v\d+/(.+)',
                    "navigation_sections": ["Get Started", "Resources", "Search", "Filters", "Lenses", "Actions", "Metrics", "Digging Deeper"]
                },
                "sections": [
                    # Manual fallback sections
                    # Get Started
                    "installation", "releases", "upgrade",
                    # Resources
                    "resources/the-basics", "resources/fields", "resources/dependent-fields",
                    "resources/date-fields", "resources/file-fields", "resources/repeater-fields",
                    "resources/panels", "resources/relationships", "resources/validation", "resources/authorization",
                    # Search
                    "search/the-basics", "search/global-search", "search/scout-integration", 
                    # Filters
                    "filters/defining-filters", "filters/registering-filters",
                    # Lenses  
                    "lenses/defining-lenses", "lenses/registering-lenses",
                    # Actions
                    "actions/defining-actions", "actions/registering-actions",
                    # Metrics
                    "metrics/defining-metrics", "metrics/registering-metrics",
                    # Digging Deeper (Customization)
                    "customization/dashboards", "customization/menus", "customization/notifications",
                    "customization/authentication", "customization/impersonation", "customization/tools",
                    "customization/resource-tools", "customization/cards", "customization/fields",
                    "customization/filters", "customization/frontend", "customization/assets",
                    "customization/localization", "customization/stubs"
                ]
            }
        }
    
    def get_service_cache_path(self, service: str) -> Path:
        """Get the cache directory path for a service."""
        service_dir = self.external_dir / service
        service_dir.mkdir(exist_ok=True)
        return service_dir
    
    def get_cache_metadata_path(self, service: str) -> Path:
        """Get the metadata file path for a service."""
        return self.get_service_cache_path(service) / ".cache_metadata.json"
    
    def is_cache_valid(self, service: str) -> bool:
        """Check if the cached documentation for a service is still valid."""
        metadata_path = self.get_cache_metadata_path(service)
        
        if not metadata_path.exists():
            return False
        
        try:
            # Use file modification time instead of stored cached_at
            cache_time = metadata_path.stat().st_mtime
            return (time.time() - cache_time) < self.cache_duration
        except Exception as e:
            logger.warning(f"Error reading cache metadata for {service}: {str(e)}")
            return False
    
    def save_cache_metadata(self, service: str, metadata: Dict) -> None:
        """Save cache metadata for a service."""
        metadata_path = self.get_cache_metadata_path(service)
        
        try:
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2)
        except Exception as e:
            logger.error(f"Error saving cache metadata for {service}: {str(e)}")
    
    def fetch_laravel_service_docs(self, service: str) -> bool:
        """
        Fetch documentation for a Laravel service.
        
        Args:
            service: Service name (forge, vapor, envoyer, nova)
            
        Returns:
            True if successful, False otherwise
        """
        if service not in self.laravel_services:
            logger.error(f"Unknown Laravel service: {service}")
            return False
        
        # Check if cache is valid
        if self.is_cache_valid(service):
            logger.debug(f"Using cached documentation for {service}")
            return True
        
        service_config = self.laravel_services[service]
        service_dir = self.get_service_cache_path(service)
        
        logger.info(f"Fetching documentation for {service_config['name']}")
        
        try:
            if service_config["type"] == DocumentationSourceType.LARAVEL_SERVICE:
                return self._fetch_service_documentation(service, service_config, service_dir)
            elif service_config["type"] == DocumentationSourceType.GITHUB_REPO:
                return self._fetch_github_documentation(service, service_config, service_dir)
            else:
                logger.error(f"Unsupported documentation source type for {service}")
                return False
        except Exception as e:
            logger.error(f"Error fetching documentation for {service}: {str(e)}")
            return False
    
    def _fetch_service_documentation(self, service: str, config: Dict, target_dir: Path) -> bool:
        """Fetch documentation from Laravel service websites."""
        base_url = config["base_url"]
        
        # Try auto-discovery first, fallback to manual sections
        discovered_sections = []
        if config.get("auto_discovery", False):
            try:
                discovered_sections = self.auto_discovery.discover_sections(service, config)
                logger.info(f"Auto-discovery found {len(discovered_sections)} sections for {service}")
            except Exception as e:
                logger.warning(f"Auto-discovery failed for {service}: {str(e)}, falling back to manual sections")
        
        # Use discovered sections if available, otherwise use manual sections
        if discovered_sections:
            sections = discovered_sections
            discovery_method = "auto-discovery"
        else:
            sections = config.get("sections", [])
            discovery_method = "manual configuration"
            
        logger.info(f"Using {discovery_method} for {service}: {len(sections)} sections")
        
        # All configured services are now publicly accessible
        # No longer creating placeholder documentation
        
        fetched_sections = []
        
        for section in sections:
            # Double-check that this isn't an asset file
            if self.auto_discovery._is_asset_file(section):
                logger.debug(f"Skipping asset file: {section}")
                continue
                
            section_url = f"{base_url}/{section}"
            section_file = target_dir / f"{section}.md"
            
            # Create parent directories if needed for nested sections
            section_file.parent.mkdir(parents=True, exist_ok=True)
            
            try:
                logger.debug(f"Fetching {section} documentation from {section_url}")
                content_bytes = self._retry_request(section_url)
                content = content_bytes.decode('utf-8')
                
                # Extract main content (this would need service-specific parsing)
                # For now, we'll save the raw HTML and note that it needs processing
                processed_content = self._process_service_html(content, service, section)
                
                with open(section_file, 'w', encoding='utf-8') as f:
                    f.write(processed_content)
                
                fetched_sections.append(section)
                logger.debug(f"Successfully fetched {section} documentation")
                
            except urllib.error.HTTPError as e:
                if e.code == 404:
                    logger.info(f"Section {section} not found (404) - may not be available")
                else:
                    logger.warning(f"Failed to fetch {section} documentation: HTTP {e.code}")
                continue
            except Exception as e:
                logger.warning(f"Failed to fetch {section} documentation: {str(e)}")
                continue
        
        if fetched_sections:
            # Save metadata about what was fetched
            metadata = {
                "service": service,
                "fetched_sections": fetched_sections,
                "total_sections": len(sections),
                "success_rate": len(fetched_sections) / len(sections),
                "discovery_method": discovery_method,
                "auto_discovery_enabled": config.get("auto_discovery", False),
                "discovered_count": len(discovered_sections) if discovered_sections else 0,
                "manual_fallback": discovery_method == "manual configuration" and config.get("auto_discovery", False)
            }
            self.save_cache_metadata(service, metadata)
            logger.info(f"Successfully fetched {len(fetched_sections)}/{len(sections)} sections for {service} using {discovery_method}")
            return True
        
        return False
    
    def _create_placeholder_documentation(self, service: str, config: Dict, target_dir: Path) -> bool:
        """Create placeholder documentation for services that require authentication."""
        logger.info(f"Creating placeholder documentation for {service} (authentication required)")
        
        sections = config.get("sections", [])
        service_name = config.get("name", service.title())
        base_url = config.get("base_url", "")
        
        for section in sections:
            section_file = target_dir / f"{section}.md"
            
            content = f"# {service_name} - {section.replace('-', ' ').title()}\n\n"
            content += f"*Note: {service_name} documentation requires authentication to access.*\n\n"
            content += "## Overview\n\n"
            content += f"This section covers {section.replace('-', ' ')} functionality in {service_name}.\n\n"
            content += "## Documentation Access\n\n"
            content += f"To access the complete {service_name} documentation:\n\n"
            content += f"1. Visit [{service_name}]({base_url.replace('/docs/1.0', '')})\n"
            content += "2. Sign in to your account\n"
            content += "3. Navigate to the documentation section\n\n"
            content += "## Common Use Cases\n\n"
            
            if service == "vapor":
                if section == "getting-started":
                    content += "- Setting up serverless Laravel applications\n"
                    content += "- Configuring AWS Lambda deployment\n"
                elif section == "projects":
                    content += "- Creating and managing Vapor projects\n"
                    content += "- Environment configuration\n"
                elif section == "deployments":
                    content += "- Deploying Laravel applications to AWS Lambda\n"
                    content += "- Managing deployment rollbacks\n"
            elif service == "envoyer":
                if section == "getting-started":
                    content += "- Setting up zero-downtime deployment\n"
                    content += "- Connecting your repositories\n"
                elif section == "projects":
                    content += "- Creating deployment projects\n"
                    content += "- Managing project settings\n"
                elif section == "deployments":
                    content += "- Configuring deployment hooks\n"
                    content += "- Managing deployment history\n"
            
            content += f"\n*For detailed information, please visit the official {service_name} documentation.*\n"
            
            with open(section_file, 'w', encoding='utf-8') as f:
                f.write(content)
        
        # Save metadata
        metadata = {
            "service": service,
            "fetched_sections": sections,
            "total_sections": len(sections),
            "success_rate": 1.0,
            "type": "placeholder"
        }
        self.save_cache_metadata(service, metadata)
        
        logger.info(f"Created placeholder documentation for {service} with {len(sections)} sections")
        return True
    
    def _fetch_github_documentation(self, service: str, config: Dict, target_dir: Path) -> bool:
        """Fetch documentation from GitHub repositories."""
        repo = config["repo"]
        branch = config.get("branch", "main")
        
        # Use similar logic to the main DocsUpdater but for external repos
        archive_url = f"https://github.com/{repo}/archive/refs/heads/{branch}.zip"
        
        try:
            with tempfile.TemporaryDirectory() as temp_dir:
                temp_path = Path(temp_dir)
                zip_path = temp_path / f"{service}_docs.zip"
                
                logger.debug(f"Downloading {service} documentation from {archive_url}")
                content_bytes = self._retry_request(archive_url)
                
                with open(zip_path, 'wb') as out_file:
                    out_file.write(content_bytes)
                
                # Extract the zip file
                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                    zip_ref.extractall(temp_path)
                
                # Find the extracted directory
                extracted_dirs = [d for d in temp_path.iterdir() if d.is_dir() and d.name.startswith(repo.split('/')[-1])]
                
                if not extracted_dirs:
                    raise FileNotFoundError(f"Could not find extracted {service} documentation directory")
                
                extracted_dir = extracted_dirs[0]
                
                # Clear the target directory
                if target_dir.exists():
                    shutil.rmtree(target_dir)
                target_dir.mkdir(parents=True)
                
                # Copy documentation files
                for item in extracted_dir.iterdir():
                    if item.is_dir():
                        shutil.copytree(item, target_dir / item.name)
                    else:
                        shutil.copy2(item, target_dir / item.name)
                
                # Save metadata
                metadata = {
                    "service": service,
                    "repo": repo,
                    "branch": branch,
                    "fetch_method": "github_archive"
                }
                self.save_cache_metadata(service, metadata)
                
                logger.info(f"Successfully fetched GitHub documentation for {service}")
                return True
                
        except Exception as e:
            logger.error(f"Error fetching GitHub documentation for {service}: {str(e)}")
            return False
    
    def _process_service_html(self, html_content: str, service: str, section: str) -> str:
        """
        Process HTML content from Laravel services to extract documentation.
        Extracts main content from HTML and converts to markdown-like format.
        """
        # Basic HTML content extraction
        processed_content = f"# {service.title()} - {section.title()}\n\n"
        processed_content += f"*Source: {self.laravel_services[service]['base_url']}/{section}*\n\n"
        processed_content += "---\n\n"
        
        try:
            # Try to extract meaningful content from HTML
            content_text = self._extract_html_content(html_content)
            if len(content_text.strip()) > 100:  # Simplified validation for testing
                processed_content += content_text
            else:
                # Log warning but don't create fake content
                logger.warning(f"Content extraction failed for {service}/{section} - content too short or invalid")
                processed_content += f"*Content extraction failed for {service.title()} {section}.*\n"
                processed_content += "*This may indicate a URL redirect or parsing issue.*\n\n"
                processed_content += "*Please visit the official documentation at the source URL above.*\n\n"
                processed_content += f"<!-- Content length: {len(content_text.strip()) if content_text else 0} characters -->\n"
        except Exception as e:
            logger.warning(f"Error processing HTML content for {service}/{section}: {str(e)}")
            processed_content += f"*Content processing error: {str(e)}*\n\n"
            processed_content += "*Please visit the official documentation at the source URL above.*\n\n"
        
        return processed_content
    
    def _extract_html_content(self, html_content: str) -> str:
        """
        Extract readable content from HTML using markdownify.
        Simplified approach that leverages markdownify's built-in HTML parsing.
        """
        try:
            from bs4 import BeautifulSoup
        except ImportError:
            logger.warning("BeautifulSoup not installed, using simple extraction")
            # Fallback to just converting the entire HTML
            text_content = self._html_to_text(html_content)
            return text_content[:10000] if len(text_content) > 10000 else text_content
        
        # Parse HTML with BeautifulSoup for better content extraction
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Try to find main content areas
        content_areas = [
            soup.find('main'),
            soup.find('article'),
            soup.find('div', class_=re.compile(r'prose|content|documentation|markdown', re.I)),
            soup.find('div', id=re.compile(r'content|docs|documentation', re.I)),
            soup.find('section', class_=re.compile(r'content|docs', re.I)),
        ]
        
        # Use the first valid content area found
        content_html = None
        for area in content_areas:
            if area and len(str(area)) > 200:  # Ensure it has substantial content
                content_html = str(area)
                break
        
        # If no specific content area found, use the body
        if not content_html:
            body = soup.find('body')
            content_html = str(body) if body else html_content
        
        # Convert to markdown
        text_content = self._html_to_text(content_html)
        
        # Limit length to prevent extremely long outputs
        if len(text_content) > 10000:
            text_content = text_content[:10000] + "\n\n*[Content truncated for length]*"
        
        return text_content
    
    def _is_valid_content(self, content: str, service: str, section: str) -> bool:
        """
        Validate that extracted content is actually documentation.
        Enhanced version with better quality scoring.
        
        Args:
            content: Extracted text content
            service: Service name (forge, vapor, etc.)
            section: Section name
            
        Returns:
            True if content appears to be valid documentation
        """
        if not content or len(content.strip()) < 200:
            return False
        
        # Check for service-specific keywords that indicate real documentation
        service_keywords = {
            "forge": ["server", "deployment", "laravel", "forge", "provision", "ssh", "nginx", "database"],
            "vapor": ["serverless", "lambda", "aws", "vapor", "deployment", "environment", "queue", "cache"],
            "envoyer": ["zero downtime", "deployment", "envoyer", "rollback", "hook", "notification", "repository"],
            "nova": ["admin", "resource", "nova", "eloquent", "dashboard", "field", "filter", "lens", "action", "metric"]
        }
        
        # Enhanced section-specific keywords
        section_keywords = {
            "introduction": ["overview", "getting started", "what is", "welcome"],
            "installation": ["install", "composer", "requirements", "setup"],
            "deployment": ["deploy", "release", "production", "build"],
            "backups": ["backup", "database", "restore", "snapshot"],
            "ssl": ["certificate", "https", "tls", "ssl", "encryption"],
            "authentication": ["auth", "login", "user", "password", "token"],
            "configuration": ["config", "settings", "environment", "env"],
            "database": ["mysql", "postgresql", "migration", "schema", "query"]
        }
        
        content_lower = content.lower()
        
        # Quality scoring system
        quality_score = 0
        
        # Service keyword matching (higher weight)
        service_matches = 0
        if service in service_keywords:
            service_matches = sum(1 for keyword in service_keywords[service] if keyword in content_lower)
            quality_score += service_matches * 2
        
        # Section keyword matching
        section_base = section.split('/')[-1].replace('-', ' ')
        if section_base in section_keywords:
            section_matches = sum(1 for keyword in section_keywords[section_base] if keyword in content_lower)
            quality_score += section_matches
        
        # Generic documentation indicators
        doc_indicators = [
            "documentation", "guide", "tutorial", "reference", "api", "configuration", 
            "deploy", "server", "application", "framework", "laravel", "php",
            "example", "usage", "method", "class", "function", "parameter"
        ]
        doc_matches = sum(1 for indicator in doc_indicators if indicator in content_lower)
        quality_score += doc_matches
        
        # Structural indicators (signs of well-structured documentation)
        structure_indicators = ["# ", "## ", "### ", "```", "**", "*", "1.", "2.", "-", "•"]
        structure_matches = sum(1 for indicator in structure_indicators if indicator in content)
        quality_score += min(structure_matches, 5)  # Cap at 5 to avoid over-weighting
        
        # Negative indicators (things that suggest this isn't documentation)
        bad_indicators = [
            "search...", "⌘k", "dashboard", "login", "sign in", "register", 
            "404", "not found", "error", "loading...", "please wait",
            "window.location", "redirect", "javascript:", "mailto:",
            "cookie", "privacy policy", "terms of service"
        ]
        bad_matches = sum(1 for indicator in bad_indicators if indicator in content_lower)
        quality_score -= bad_matches * 2
        
        # Length bonus (longer content is generally better documentation)
        if len(content) > 1000:
            quality_score += 2
        elif len(content) > 500:
            quality_score += 1
        
        # Content must have a minimum quality score to be considered valid
        min_score = 3
        is_valid = quality_score >= min_score
        
        if not is_valid:
            logger.debug(f"Content validation failed for {service}/{section}: score {quality_score} < {min_score}")
        
        return is_valid
    
    def _html_to_text(self, html_content: str) -> str:
        """
        Convert HTML to Markdown using markdownify.
        """
        try:
            from markdownify import markdownify as md
            from bs4 import BeautifulSoup
        except ImportError:
            logger.error("markdownify not installed. Please install it with: pip install markdownify")
            # Fallback to basic conversion
            return re.sub(r'<[^>]+>', '', html_content)
        
        # Parse HTML and remove script and style elements completely
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Remove all script and style tags and their contents
        for tag in soup(['script', 'style']):
            tag.decompose()
        
        # Get the cleaned HTML
        cleaned_html = str(soup)
        
        # Convert HTML to Markdown with specific options
        markdown = md(
            cleaned_html,
            strip=['nav', 'header', 'footer', 'aside', 'meta', 'link'],
            heading_style='ATX',  # Use # style headings
            bullets='-',  # Use - for unordered lists
            code_language='',  # Don't assume code language
            escape_asterisks=False,  # Don't escape asterisks
            escape_underscores=False,  # Don't escape underscores
            escape_misc=False,  # Don't escape other special chars
            autolinks=True,  # Convert URLs to links automatically
        )
        
        # Post-process to handle CloudFlare email protection links
        # Replace all email protection links with [email protected]
        markdown = re.sub(
            r'\[\[email protected\]\]\(/cdn-cgi/l/email-protection#[a-f0-9]+\)',
            '[email protected]',
            markdown
        )
        # Handle Support links with email protection
        markdown = re.sub(
            r'\[Support\]\(/cdn-cgi/l/email-protection[^)]+\)',
            'Support',
            markdown
        )
        
        # Remove any remaining inline JavaScript patterns
        markdown = re.sub(r'\(self\.__next_s=self\.__next_s\|\|\[\]\)\.push[^\n]+', '', markdown)
        markdown = re.sub(r'\(function\s+[a-zA-Z]\([^)]*\)\s*\{[^}]+\}\)[^\n]*', '', markdown)
        
        # Remove CSS blocks that might have been left
        markdown = re.sub(r'h1,\s*h2,\s*h3,\s*h4\s*\{[^}]+\}', '', markdown)
        markdown = re.sub(r'\.[a-zA-Z0-9-]+\s*\{[^}]+\}', '', markdown)
        markdown = re.sub(r'#[a-zA-Z0-9-]+\s*>\s*[^{]+\{[^}]+\}', '', markdown)
        
        # Clean up excessive newlines
        markdown = re.sub(r'\n\s*\n\s*\n', '\n\n', markdown)
        
        return markdown.strip()
    
    def fetch_all_services(self, force: bool = False) -> Dict[str, bool]:
        """
        Fetch documentation for all configured Laravel services.
        
        Args:
            force: Force refresh even if cache is valid
            
        Returns:
            Dictionary mapping service names to success status
        """
        results = {}
        
        for service in self.laravel_services.keys():
            if force or not self.is_cache_valid(service):
                results[service] = self.fetch_laravel_service_docs(service)
            else:
                results[service] = True
                logger.debug(f"Skipping {service} (cache valid)")
        
        return results
    
    def list_available_services(self) -> List[str]:
        """List all available Laravel services."""
        return list(self.laravel_services.keys())
    
    def get_service_info(self, service: str) -> Optional[Dict]:
        """Get information about a specific service."""
        return self.laravel_services.get(service)
    
    def _retry_request(self, url: str, headers: Optional[Dict] = None, max_retries: Optional[int] = None) -> bytes:
        """
        Make a request with retry logic and exponential backoff.
        
        Args:
            url: URL to request
            headers: Optional headers to include
            max_retries: Override default max_retries
            
        Returns:
            Response content as bytes
            
        Raises:
            urllib.error.URLError: If all retry attempts fail
        """
        if headers is None:
            headers = {"User-Agent": USER_AGENT}
        
        retries = max_retries if max_retries is not None else self.max_retries
        last_exception: Optional[Union[urllib.error.HTTPError, urllib.error.URLError, Exception]] = None
        
        for attempt in range(retries + 1):
            try:
                request = urllib.request.Request(url, headers=headers)
                with urllib.request.urlopen(request) as response:
                    return response.read()
            except urllib.error.HTTPError as e:
                last_exception = e
                if e.code == 404:
                    # Don't retry 404 errors
                    raise
                elif e.code == 403 and "rate limit" in str(e.reason).lower():
                    # For rate limiting, wait longer
                    wait_time = min(300, (2 ** attempt) * 5 + random.uniform(0, 5))
                    logger.warning(f"Rate limited on attempt {attempt + 1}/{retries + 1}, waiting {wait_time:.1f}s")
                    time.sleep(wait_time)
                elif e.code >= 500:
                    # Server errors are worth retrying
                    if attempt < retries:
                        wait_time = min(60, (2 ** attempt) + random.uniform(0, 2))
                        logger.warning(f"Server error {e.code} on attempt {attempt + 1}/{retries + 1}, retrying in {wait_time:.1f}s")
                        time.sleep(wait_time)
                    else:
                        raise
                else:
                    # Other HTTP errors shouldn't be retried
                    raise
            except urllib.error.URLError as e:
                last_exception = e
                if attempt < retries:
                    wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
                    logger.warning(f"Network error on attempt {attempt + 1}/{retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
                    time.sleep(wait_time)
                else:
                    raise
            except Exception as e:
                last_exception = e
                if attempt < retries:
                    wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
                    logger.warning(f"Unexpected error on attempt {attempt + 1}/{retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
                    time.sleep(wait_time)
                else:
                    raise
        
        # This should never be reached, but just in case
        if last_exception:
            raise last_exception
        else:
            raise RuntimeError(f"Failed to fetch {url} after {retries + 1} attempts")
class DocsUpdater:
    """Handles downloading and updating Laravel documentation from GitHub."""
    
    def __init__(self, target_dir: Path, version: str = DEFAULT_VERSION):
        """
        Initialize the documentation updater.
        
        Args:
            target_dir: Directory where docs should be stored
            version: Laravel version branch to pull documentation from (e.g., "12.x")
        """
        self.target_dir = target_dir
        self.version = version
        self.github_api_url = GITHUB_API_URL
        self.repo = LARAVEL_DOCS_REPO
        
        # Create version-specific directory
        self.version_dir = target_dir / version
        self.version_dir.mkdir(parents=True, exist_ok=True)
        
        # Create metadata directory if it doesn't exist
        self.metadata_dir = self.version_dir / ".metadata"
        self.metadata_dir.mkdir(exist_ok=True)
        self.metadata_file = self.metadata_dir / "sync_info.json"
    
    def get_latest_commit(self, max_retries: int = 3) -> Dict[str, str]:
        """Get information about the latest commit on the specified branch."""
        logger.debug(f"Getting latest commit info for {self.repo} on branch {self.version}")
        
        url = f"{self.github_api_url}/repos/{self.repo}/branches/{self.version}"
        
        last_exception: Optional[Exception] = None
        for attempt in range(max_retries + 1):
            try:
                request = urllib.request.Request(
                    url,
                    headers={
                        "User-Agent": USER_AGENT,
                        "Accept": "application/vnd.github.v3+json"
                    }
                )
                
                with urllib.request.urlopen(request) as response:
                    data = json.loads(response.read().decode())
                    return {
                        "sha": data["commit"]["sha"],
                        "date": data["commit"]["commit"]["committer"]["date"],
                        "message": data["commit"]["commit"]["message"],
                        "url": data["commit"]["html_url"]
                    }
            except urllib.error.HTTPError as e:
                last_exception = e
                if e.code == 403 and "rate limit" in str(e.reason).lower():
                    if attempt < max_retries:
                        wait_time = min(300, (2 ** attempt) * 30)
                        logger.warning(f"GitHub API rate limit exceeded on attempt {attempt + 1}/{max_retries + 1}, waiting {wait_time}s")
                        time.sleep(wait_time)
                        continue
                    else:
                        logger.error("GitHub API rate limit exceeded. Try again later.")
                        raise
                elif e.code == 404:
                    logger.error(f"Branch {self.version} not found in repository {self.repo}")
                    raise
                else:
                    if attempt < max_retries and e.code >= 500:
                        wait_time = min(60, (2 ** attempt) + random.uniform(0, 2))
                        logger.warning(f"GitHub API error {e.code} on attempt {attempt + 1}/{max_retries + 1}, retrying in {wait_time:.1f}s")
                        time.sleep(wait_time)
                        continue
                    else:
                        logger.error(f"HTTP error {e.code}: {e.reason}")
                        raise
            except Exception as e:
                last_exception = e
                if attempt < max_retries:
                    wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
                    logger.warning(f"Error fetching commit info on attempt {attempt + 1}/{max_retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
                    time.sleep(wait_time)
                else:
                    logger.error(f"Error fetching latest commit info: {str(e)}")
                    raise
        
        # This should never be reached, but just in case
        if last_exception:
            raise last_exception
        else:
            raise RuntimeError(f"Failed to get latest commit after {max_retries + 1} attempts")
    
    def read_local_metadata(self) -> Dict:
        """Read local metadata about the last sync."""
        if not self.metadata_file.exists():
            return {}
        
        try:
            with open(self.metadata_file, 'r') as f:
                return json.load(f)
        except Exception as e:
            logger.warning(f"Error reading metadata file: {str(e)}")
            return {}
    
    def write_local_metadata(self, data: Dict) -> None:
        """Write local metadata about the current sync."""
        try:
            with open(self.metadata_file, 'w') as f:
                json.dump(data, f, indent=2)
        except Exception as e:
            logger.error(f"Error writing metadata file: {str(e)}")
    
    def download_documentation(self) -> Path:
        """
        Download the Laravel documentation as a zip file.
        
        Returns:
            Path to the downloaded and extracted documentation directory
        """
        logger.info(f"Downloading documentation for Laravel {self.version}")
        
        # GitHub archive URL for the specific branch
        archive_url = f"https://github.com/{self.repo}/archive/refs/heads/{self.version}.zip"
        
        try:
            # Create a temporary directory
            with tempfile.TemporaryDirectory(delete=False) as temp_dir:
                temp_path = Path(temp_dir)
                zip_path = temp_path / "laravel_docs.zip"
                
                # Download the zip file
                logger.debug(f"Downloading from {archive_url}")
                
                # Retry mechanism for downloading
                max_retries = 3
                
                for attempt in range(max_retries + 1):
                    try:
                        request = urllib.request.Request(
                            archive_url,
                            headers={"User-Agent": USER_AGENT}
                        )
                        
                        with urllib.request.urlopen(request) as response, open(zip_path, 'wb') as out_file:
                            shutil.copyfileobj(response, out_file)
                        break  # Success, exit retry loop
                    except Exception as e:
                        if attempt < max_retries:
                            wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
                            logger.warning(f"Download failed on attempt {attempt + 1}/{max_retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
                            time.sleep(wait_time)
                        else:
                            logger.error(f"Failed to download after {max_retries + 1} attempts: {str(e)}")
                            raise
                
                # Extract the zip file
                logger.debug(f"Extracting archive to {temp_path}")
                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                    zip_ref.extractall(temp_path)
                
                # Find the extracted directory (should be named like "docs-12.x")
                extracted_dirs = [d for d in temp_path.iterdir() if d.is_dir() and (d.name.startswith(f"{self.repo.split('/')[-1]}-"))]
                
                if not extracted_dirs:
                    raise FileNotFoundError("Could not find extracted documentation directory")
                
                extracted_dir = extracted_dirs[0]
                logger.debug(f"Found extracted directory: {extracted_dir}")
                
                # Return the directory containing markdown files
                return extracted_dir
        except Exception as e:
            logger.error(f"Error downloading documentation: {str(e)}")
            raise
    
    def needs_update(self) -> bool:
        """Check if documentation needs to be updated based on remote commits."""
        try:
            # Get the latest commit info
            latest_commit = self.get_latest_commit()
            
            # Get local metadata
            local_meta = self.read_local_metadata()
            
            # Check if we already have the latest version
            if local_meta.get("version") == self.version and local_meta.get("commit_sha") == latest_commit["sha"]:
                logger.debug("Documentation is already up to date.")
                return False
            
            # If we reach here, an update is needed
            return True
        except Exception as e:
            logger.error(f"Error checking for updates: {str(e)}")
            logger.info("Assuming update is needed due to error")
            return True
    
    def update(self, force: bool = False) -> bool:
        """
        Update the documentation if needed or if forced.
        
        Args:
            force: Force update even if already up to date
            
        Returns:
            True if update was performed, False otherwise
        """
        if not force and not self.needs_update():
            return False
        
        try:
            # Get the latest commit info for metadata
            latest_commit = self.get_latest_commit()
            
            # Download the documentation
            source_dir = self.download_documentation()
            
            # Clear the version directory (except .metadata)
            for item in self.version_dir.iterdir():
                if item.name != ".metadata":
                    if item.is_dir():
                        shutil.rmtree(item)
                    else:
                        item.unlink()
            
            # Copy files to the version directory
            for item in source_dir.iterdir():
                if item.is_dir():
                    shutil.copytree(item, self.version_dir / item.name)
                else:
                    shutil.copy2(item, self.version_dir / item.name)
            
            # Update metadata
            metadata = {
                "version": self.version,
                "commit_sha": latest_commit["sha"],
                "commit_date": latest_commit["date"],
                "commit_message": latest_commit["message"],
                "commit_url": latest_commit["url"],
                "sync_time": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
            }
            self.write_local_metadata(metadata)
            shutil.rmtree(source_dir.parent)  # Remove the temporary directory
            logger.debug(f"Removed temporary directory: {source_dir.parent}")
            
            logger.info(f"Documentation updated successfully to {self.version} ({latest_commit['sha'][:7]})")
            return True
        except Exception as e:
            logger.error(f"Error updating documentation: {str(e)}")
            raise
class CommunityPackageFetcher:
    """Handles fetching documentation from community Laravel packages."""
    
    def __init__(self, target_dir: Path, cache_duration: int = 86400, max_retries: int = 3):
        """
        Initialize the community package documentation fetcher.
        
        Args:
            target_dir: Directory where package docs should be stored
            cache_duration: Cache duration in seconds (default: 24 hours)
            max_retries: Maximum number of retry attempts for failed requests
        """
        self.target_dir = target_dir
        self.cache_duration = cache_duration
        self.max_retries = max_retries
        self.packages_dir = target_dir / "packages"
        self.packages_dir.mkdir(parents=True, exist_ok=True)
        
        # Community packages documentation sources
        self.community_packages = {
            "spatie": {
                "name": "Spatie Packages",
                "type": DocumentationSourceType.COMMUNITY_PACKAGE,
                "base_url": "https://spatie.be/docs",
                "packages": {
                    "laravel-permission": {
                        "name": "Laravel Permission",
                        "docs_url": "https://spatie.be/docs/laravel-permission/v6/introduction",
                        "version_pattern": r'/v(\d+)/',
                        "sections": [
                            "introduction", "installation-laravel", "basic-usage/basic-usage",
                            "basic-usage/role-permissions", "basic-usage/direct-permissions",
                            "basic-usage/multiple-guards", "basic-usage/teams-permissions",
                            "basic-usage/blade-directives", "basic-usage/artisan",
                            "basic-usage/middleware", "basic-usage/wildcard-permissions",
                            "advanced-usage/cache", "advanced-usage/extending",
                            "advanced-usage/exceptions", "advanced-usage/seeding",
                            "advanced-usage/testing", "api/models", "api/traits"
                        ]
                    },
                    "laravel-medialibrary": {
                        "name": "Laravel Media Library",
                        "docs_url": "https://spatie.be/docs/laravel-medialibrary/v11/introduction",
                        "version_pattern": r'/v(\d+)/',
                        "sections": [
                            "introduction", "installation-setup", "basic-usage/associating-files",
                            "basic-usage/retrieving-media", "converting-images/defining-conversions",
                            "converting-images/retrieving-converted-images", "responsive-images",
                            "downloading-media/downloading-a-single-file", "advanced-usage/using-s3"
                        ]
                    },
                    "laravel-backup": {
                        "name": "Laravel Backup",
                        "docs_url": "https://spatie.be/docs/laravel-backup/v9/introduction",
                        "version_pattern": r'/v(\d+)/',
                        "sections": [
                            "introduction", "installation-and-setup", "backing-up/overview",
                            "backing-up/events", "cleaning-up-old-backups/overview",
                            "sending-notifications/overview", "monitoring-health/overview"
                        ]
                    }
                }
            },
            "livewire": {
                "name": "Livewire",
                "type": DocumentationSourceType.COMMUNITY_PACKAGE,
                "base_url": "https://livewire.laravel.com/docs",
                "sections": [
                    "quickstart", "installation", "components", "properties", "actions",
                    "forms", "lifecycle-hooks", "nesting", "events", "security",
                    "uploads", "downloads", "validation", "pagination",
                    "redirecting", "wire-model", "wire-click", "wire-submit", 
                    "wire-loading", "wire-transition", "wire-poll", "wire-init", 
                    "wire-dirty", "wire-offline", "alpine", "morphing", "teleport",
                    "lazy", "locked", "computed-properties", "url", "navigate", 
                    "offline", "testing", "troubleshooting", "javascript"
                ]
            },
            "inertia": {
                "name": "Inertia.js",
                "type": DocumentationSourceType.GITHUB_REPO,
                "repo": "inertiajs/inertiajs.com",
                "branch": "master",
                "docs_path": "resources/js/Pages",
                "sections": [
                    "how-it-works", "who-is-it-for", "the-protocol",
                    "server-side-setup", "client-side-setup", "pages", "responses", 
                    "redirects", "routing", "title-and-meta", "links", "manual-visits", 
                    "forms", "file-uploads", "validation", "shared-data", "partial-reloads", 
                    "scroll-management", "authentication", "authorization", "csrf-protection",
                    "error-handling", "asset-versioning", "progress-indicators",
                    "remembering-state", "server-side-rendering", "testing"
                ]
            },
            "filament": {
                "name": "Filament",
                "type": DocumentationSourceType.COMMUNITY_PACKAGE,
                "base_url": "https://filamentphp.com/docs",
                "version": "3.x",
                "sections": [
                    "panels/installation", "panels/configuration", "panels/resources/getting-started",
                    "panels/resources/listing-records", "panels/resources/creating-records",
                    "panels/resources/editing-records", "panels/resources/viewing-records",
                    "panels/resources/deleting-records", "panels/resources/custom-pages",
                    "panels/resources/relation-managers", "panels/resources/widgets",
                    "panels/pages", "panels/dashboard", "panels/navigation",
                    "panels/users", "panels/tenancy", "panels/plugins",
                    "forms/fields/getting-started", "forms/fields/text-input",
                    "forms/fields/select", "forms/fields/checkbox", "forms/fields/toggle",
                    "forms/fields/radio", "forms/fields/date-time-picker",
                    "forms/fields/file-upload", "forms/fields/rich-editor",
                    "forms/fields/markdown-editor", "forms/fields/repeater",
                    "forms/fields/builder", "forms/fields/tags-input",
                    "forms/fields/textarea", "forms/fields/key-value",
                    "forms/fields/color-picker", "forms/fields/hidden",
                    "forms/fields/placeholder", "forms/fields/fieldset",
                    "forms/layout/getting-started", "forms/layout/grid",
                    "forms/layout/tabs", "forms/layout/wizard",
                    "forms/validation", "forms/advanced",
                    "tables/columns/getting-started", "tables/columns/text",
                    "tables/columns/icon", "tables/columns/image", "tables/columns/badge",
                    "tables/columns/tags", "tables/columns/toggle",
                    "tables/filters", "tables/actions", "tables/bulk-actions",
                    "tables/summaries", "tables/grouping", "tables/advanced",
                    "actions/overview", "actions/prebuilt-actions", "actions/modals",
                    "notifications/overview", "notifications/sending-notifications",
                    "notifications/database-notifications", "widgets/overview"
                ]
            },
            "debugbar": {
                "name": "Laravel Debugbar",
                "type": DocumentationSourceType.COMMUNITY_PACKAGE,
                "base_url": "https://laraveldebugbar.com",
                "sections": [
                    "installation", "usage", "features", "collectors"
                ]
            },
            "ide-helper": {
                "name": "Laravel IDE Helper",
                "type": DocumentationSourceType.GITHUB_REPO,
                "repo": "barryvdh/laravel-ide-helper",
                "branch": "master",
                "file": "README.md"
            }
        }
    
    def get_package_cache_path(self, package: str, subpackage: Optional[str] = None) -> Path:
        """Get the cache directory path for a package."""
        if subpackage:
            package_dir = self.packages_dir / package / subpackage
        else:
            package_dir = self.packages_dir / package
        package_dir.mkdir(parents=True, exist_ok=True)
        return package_dir
    
    def get_cache_metadata_path(self, package: str, subpackage: Optional[str] = None) -> Path:
        """Get the cache metadata file path for a package."""
        cache_dir = self.get_package_cache_path(package, subpackage)
        return cache_dir / ".metadata" / "cache.json"
    
    def is_cache_valid(self, package: str, subpackage: Optional[str] = None) -> bool:
        """Check if the cache for a package is still valid."""
        metadata_path = self.get_cache_metadata_path(package, subpackage)
        
        if not metadata_path.exists():
            return False
        
        try:
            # Use file modification time instead of stored cache_time
            cache_time = metadata_path.stat().st_mtime
            current_time = time.time()
            
            if current_time - cache_time > self.cache_duration:
                logger.debug(f"Cache expired for {package}/{subpackage or 'all'}")
                return False
            
            return True
        except Exception as e:
            logger.warning(f"Error reading cache metadata for {package}: {str(e)}")
            return False
    
    def fetch_package_docs(self, package: str, force: bool = False) -> bool:
        """
        Fetch documentation for a community package.
        
        Args:
            package: Package name (spatie, livewire, inertia, filament)
            force: Force refresh even if cache is valid
            
        Returns:
            True if successful, False otherwise
        """
        if package not in self.community_packages:
            logger.error(f"Unknown package: {package}")
            return False
        
        # Check cache validity
        if not force and self.is_cache_valid(package):
            logger.info(f"Using cached documentation for {package}")
            return True
        
        logger.info(f"Fetching documentation for {package}")
        package_config = self.community_packages[package]
        
        try:
            if package == "spatie":
                return self._fetch_spatie_docs(package_config)
            elif package == "livewire":
                return self._fetch_livewire_docs(package_config)
            elif package == "inertia":
                return self._fetch_inertia_docs(package_config)
            elif package == "filament":
                return self._fetch_filament_docs(package_config)
            elif package == "debugbar":
                return self._fetch_debugbar_docs(package_config)
            elif package == "ide-helper":
                return self._fetch_ide_helper_docs(package_config)
            else:
                logger.error(f"No fetch method implemented for package: {package}")
                return False
        except Exception as e:
            logger.error(f"Error fetching {package} documentation: {str(e)}")
            return False
    
    def _fetch_spatie_docs(self, config: Dict) -> bool:
        """Fetch documentation for Spatie packages."""
        success_count = 0
        packages = config.get("packages", {})
        
        for package_key, package_info in packages.items():
            try:
                logger.info(f"Fetching Spatie {package_info['name']} documentation")
                package_dir = self.get_package_cache_path("spatie", package_key)
                
                base_url = package_info["docs_url"].rsplit('/', 1)[0]
                sections = package_info.get("sections", [])
                
                fetched_sections = 0
                for section in sections:
                    section_url = f"{base_url}/{section}"
                    content = self._fetch_and_process_content(section_url, "spatie", section)
                    
                    if content:
                        # Save the processed content
                        file_path = package_dir / f"{section.replace('/', '-')}.md"
                        file_path.parent.mkdir(parents=True, exist_ok=True)
                        
                        with open(file_path, 'w', encoding='utf-8') as f:
                            f.write(content)
                        
                        fetched_sections += 1
                
                if fetched_sections > 0:
                    # Update cache metadata
                    metadata = {
                        "package": package_key,
                        "name": package_info['name'],
                        "sections_count": fetched_sections,
                        "base_url": base_url
                    }
                    
                    metadata_path = self.get_cache_metadata_path("spatie", package_key)
                    metadata_path.parent.mkdir(parents=True, exist_ok=True)
                    
                    with open(metadata_path, 'w') as f:
                        json.dump(metadata, f, indent=2)
                    
                    success_count += 1
                    logger.info(f"Successfully fetched {fetched_sections} sections for Spatie {package_info['name']}")
                
            except Exception as e:
                logger.error(f"Error fetching Spatie {package_key} documentation: {str(e)}")
        
        return success_count > 0
    
    def _fetch_livewire_docs(self, config: Dict) -> bool:
        """Fetch Livewire documentation."""
        base_url = config["base_url"]
        sections = config.get("sections", [])
        package_dir = self.get_package_cache_path("livewire")
        
        fetched_sections = 0
        for section in sections:
            try:
                section_url = f"{base_url}/{section}"
                content = self._fetch_and_process_content(section_url, "livewire", section)
                
                if content:
                    file_path = package_dir / f"{section}.md"
                    file_path.parent.mkdir(parents=True, exist_ok=True)
                    
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.write(content)
                    
                    fetched_sections += 1
            except Exception as e:
                logger.warning(f"Error fetching Livewire section {section}: {str(e)}")
        
        if fetched_sections > 0:
            # Update cache metadata
            metadata = {
                "package": "livewire",
                "name": config['name'],
                "sections_count": fetched_sections,
                "base_url": base_url
            }
            
            metadata_path = self.get_cache_metadata_path("livewire")
            metadata_path.parent.mkdir(parents=True, exist_ok=True)
            
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2)
            
            logger.info(f"Successfully fetched {fetched_sections} sections for Livewire")
            return True
        
        return False
    
    def _fetch_inertia_docs(self, config: Dict) -> bool:
        """Fetch Inertia.js documentation from GitHub repository."""
        repo = config["repo"]
        branch = config["branch"]
        docs_path = config["docs_path"]
        sections = config.get("sections", [])
        package_dir = self.get_package_cache_path("inertia")
        
        fetched_sections = 0
        for section in sections:
            try:
                # Map section names to JSX file names
                jsx_filename = f"{section}.jsx"
                github_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{docs_path}/{jsx_filename}"
                
                logger.debug(f"Fetching {section} from {github_url}")
                
                request = urllib.request.Request(
                    github_url,
                    headers={"User-Agent": "Laravel-MCP-Companion/1.0"}
                )
                
                with urllib.request.urlopen(request) as response:
                    jsx_content = response.read().decode('utf-8')
                
                # Extract content from JSX file and convert to markdown
                markdown_content = self._process_jsx_to_markdown(jsx_content, section)
                
                if markdown_content:
                    file_path = package_dir / f"{section}.md"
                    file_path.parent.mkdir(parents=True, exist_ok=True)
                    
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.write(f"# Inertia - {section.replace('-', ' ').title()}\n\n")
                        f.write(f"Source: https://inertiajs.com/{section}\n\n")
                        f.write(markdown_content)
                    
                    fetched_sections += 1
                    logger.debug(f"Successfully processed {section}")
                else:
                    logger.warning(f"No content extracted from {section}")
                    
            except urllib.error.HTTPError as e:
                if e.code == 404:
                    logger.warning(f"Inertia section {section} not found (404)")
                else:
                    logger.warning(f"HTTP error fetching Inertia section {section}: {e}")
            except Exception as e:
                logger.warning(f"Error fetching Inertia section {section}: {str(e)}")
        
        if fetched_sections > 0:
            # Update cache metadata
            metadata = {
                "package": "inertia",
                "name": config['name'],
                "sections_count": fetched_sections,
                "source_type": "github_repo",
                "repo": repo,
                "branch": branch,
                "docs_path": docs_path
            }
            
            metadata_path = self.get_cache_metadata_path("inertia")
            metadata_path.parent.mkdir(parents=True, exist_ok=True)
            
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2)
            
            logger.info(f"Successfully fetched {fetched_sections} sections for Inertia.js from GitHub")
            return True
        
        return False
    
    def _process_jsx_to_markdown(self, jsx_content: str, section: str) -> Optional[str]:
        """
        Process JSX content and extract documentation text to markdown.
        
        Args:
            jsx_content: Raw JSX file content
            section: Section name for context
            
        Returns:
            Extracted markdown content or None if no content found
        """
        try:
            import re
            
            # Remove import statements and React component structure
            content = jsx_content
            
            # Remove imports
            content = re.sub(r'^import\s+.*?from\s+.*?[;\n]', '', content, flags=re.MULTILINE)
            
            # Remove export statements
            content = re.sub(r'^export\s+.*?[;\n]', '', content, flags=re.MULTILINE)
            
            # Extract text content from JSX elements
            text_content = []
            
            # Extract headings (both standard and custom components)
            heading_patterns = [
                r'<[Hh]([1-6])[^>]*>(.*?)</[Hh][1-6]>',  # Standard h1-h6
                r'<H([1-6])[^>]*>(.*?)</H[1-6]>',         # Custom H1-H6 components
            ]
            
            for pattern in heading_patterns:
                headings = re.findall(pattern, content, re.DOTALL)
                for level, text in headings:
                    clean_text = self._clean_jsx_text(text)
                    if clean_text.strip():
                        text_content.append(f"{'#' * int(level)} {clean_text}\n")
            
            # Extract paragraphs (both standard and custom components)
            paragraph_patterns = [
                r'<[Pp][^>]*>(.*?)</[Pp]>',  # Standard p tags
                r'<P[^>]*>(.*?)</P>',        # Custom P components
            ]
            
            for pattern in paragraph_patterns:
                paragraphs = re.findall(pattern, content, re.DOTALL)
                for para in paragraphs:
                    clean_text = self._clean_jsx_text(para)
                    if clean_text.strip():
                        text_content.append(f"{clean_text}\n")
            
            # Extract code blocks (both standard and custom components)
            code_patterns = [
                r'<pre[^>]*><code[^>]*>(.*?)</code></pre>',  # Standard code blocks
                r'<Code[^>]*>(.*?)</Code>',                   # Custom Code components
                r'<code[^>]*>(.*?)</code>',                   # Inline code
            ]
            
            for pattern in code_patterns:
                code_blocks = re.findall(pattern, content, re.DOTALL)
                for code in code_blocks:
                    clean_code = self._clean_jsx_text(code)
                    if clean_code.strip():
                        if '\n' in clean_code:
                            text_content.append(f"```\n{clean_code}\n```\n")
                        else:
                            text_content.append(f"`{clean_code}`")
            
            # Extract list items
            list_items = re.findall(r'<li[^>]*>(.*?)</li>', content, re.DOTALL)
            for item in list_items:
                clean_text = self._clean_jsx_text(item)
                if clean_text.strip():
                    text_content.append(f"- {clean_text}")
            
            # Extract strong/bold text
            strong_patterns = [
                r'<strong[^>]*>(.*?)</strong>',
                r'<Strong[^>]*>(.*?)</Strong>',
                r'<b[^>]*>(.*?)</b>',
            ]
            
            for pattern in strong_patterns:
                strong_texts = re.findall(pattern, content, re.DOTALL)
                for text in strong_texts:
                    clean_text = self._clean_jsx_text(text)
                    if clean_text.strip():
                        text_content.append(f"**{clean_text}**")
            
            # Extract links
            link_patterns = [
                r'<a[^>]*href=["\']([^"\']*)["\'][^>]*>(.*?)</a>',
                r'<A[^>]*href=["\']([^"\']*)["\'][^>]*>(.*?)</A>',
            ]
            
            for pattern in link_patterns:
                links = re.findall(pattern, content, re.DOTALL)
                for href, link_text in links:
                    clean_text = self._clean_jsx_text(link_text)
                    if clean_text.strip():
                        text_content.append(f"[{clean_text}]({href})")
            
            # Extract any remaining text content from string literals (but exclude JSX markup)
            # Look for strings that appear to be documentation content
            string_content = re.findall(r'["`\']([^"`\']{30,})["`\']', content)
            for text in string_content:
                # Skip if it looks like code, imports, JSX, or other non-documentation content
                if not any(pattern in text.lower() for pattern in [
                    'import', 'export', 'from', 'require', 'function', 'const', 'let', 'var', 
                    '===', '!==', '=>', 'return', 'props', 'component', '</', '/>', 'jsx', 'react'
                ]):
                    clean_text = text.strip()
                    if clean_text and len(clean_text.split()) > 5:  # At least 5 words
                        # Don't add if it's already covered by component extraction
                        if not any(clean_text in existing for existing in text_content):
                            text_content.append(f"{clean_text}\n")
            
            if text_content:
                # Remove duplicates while preserving order
                seen = set()
                unique_content = []
                for item in text_content:
                    item_clean = item.strip()
                    if item_clean and item_clean not in seen:
                        # Skip if it looks like JSX remnants
                        if not any(jsx_marker in item_clean for jsx_marker in ['<', '>', '{', '}', 'return (', '=>', 'export default']):
                            seen.add(item_clean)
                            unique_content.append(item)
                
                return '\n'.join(unique_content)
            else:
                logger.debug(f"No extractable content found in {section} JSX file")
                return None
                
        except Exception as e:
            logger.warning(f"Error processing JSX content for {section}: {str(e)}")
            return None
    
    def _clean_jsx_text(self, text: str) -> str:
        """Clean JSX text content of React syntax and HTML entities."""
        import re
        
        # Remove JSX curly braces and expressions
        text = re.sub(r'\{[^}]*\}', '', text)
        
        # Remove HTML tags
        text = re.sub(r'<[^>]*>', '', text)
        
        # Decode HTML entities
        text = html.unescape(text)
        
        # Clean up whitespace
        text = re.sub(r'\s+', ' ', text)
        text = text.strip()
        
        return text
    
    def _fetch_filament_docs(self, config: Dict) -> bool:
        """Fetch Filament documentation."""
        base_url = config["base_url"]
        version = config.get("version", "3.x")
        sections = config.get("sections", [])
        package_dir = self.get_package_cache_path("filament")
        
        fetched_sections = 0
        for section in sections:
            try:
                section_url = f"{base_url}/{version}/{section}"
                content = self._fetch_and_process_content(section_url, "filament", section)
                
                if content:
                    file_path = package_dir / f"{section.replace('/', '-')}.md"
                    file_path.parent.mkdir(parents=True, exist_ok=True)
                    
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.write(content)
                    
                    fetched_sections += 1
            except Exception as e:
                logger.warning(f"Error fetching Filament section {section}: {str(e)}")
        
        if fetched_sections > 0:
            # Update cache metadata
            metadata = {
                "package": "filament",
                "name": config['name'],
                "version": version,
                "sections_count": fetched_sections,
                "base_url": base_url
            }
            
            metadata_path = self.get_cache_metadata_path("filament")
            metadata_path.parent.mkdir(parents=True, exist_ok=True)
            
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2)
            
            logger.info(f"Successfully fetched {fetched_sections} sections for Filament")
            return True
        
        return False
    
    def _fetch_debugbar_docs(self, config: Dict) -> bool:
        """Fetch Laravel Debugbar documentation from website."""
        base_url = config["base_url"]
        sections = config.get("sections", [])
        package_dir = self.get_package_cache_path("debugbar")
        
        fetched_sections = 0
        for section in sections:
            try:
                section_url = f"{base_url}/{section}/"
                content = self._fetch_and_process_content(section_url, "debugbar", section)
                
                if content:
                    file_path = package_dir / f"{section}.md"
                    file_path.parent.mkdir(parents=True, exist_ok=True)
                    
                    with open(file_path, 'w', encoding='utf-8') as f:
                        f.write(content)
                    
                    fetched_sections += 1
            except Exception as e:
                logger.warning(f"Error fetching Debugbar section {section}: {str(e)}")
        
        if fetched_sections > 0:
            # Update cache metadata
            metadata = {
                "package": "debugbar",
                "name": config['name'],
                "sections_count": fetched_sections,
                "base_url": base_url
            }
            
            metadata_path = self.get_cache_metadata_path("debugbar")
            metadata_path.parent.mkdir(parents=True, exist_ok=True)
            
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=2)
            
            logger.info(f"Successfully fetched {fetched_sections} sections for Laravel Debugbar")
            return True
        
        return False
    
    def _fetch_ide_helper_docs(self, config: Dict) -> bool:
        """Fetch Laravel IDE Helper documentation from GitHub README."""
        repo = config["repo"]
        branch = config.get("branch", "master")
        file = config.get("file", "README.md")
        package_dir = self.get_package_cache_path("ide-helper")
        
        try:
            # Fetch README from GitHub
            github_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{file}"
            
            logger.info(f"Fetching IDE Helper documentation from {github_url}")
            
            request = urllib.request.Request(
                github_url,
                headers={"User-Agent": USER_AGENT}
            )
            
            with urllib.request.urlopen(request) as response:
                content = response.read().decode('utf-8')
            
            # Process the README content
            if content:
                # Add header
                header = f"# {config['name']}\n\n"
                header += f"Source: https://github.com/{repo}\n\n"
                header += "---\n\n"
                
                # Save the processed content
                file_path = package_dir / "readme.md"
                file_path.parent.mkdir(parents=True, exist_ok=True)
                
                with open(file_path, 'w', encoding='utf-8') as f:
                    f.write(header + content)
                
                # Update cache metadata
                metadata = {
                    "package": "ide-helper",
                    "name": config['name'],
                    "source_type": "github_readme",
                    "repo": repo,
                    "branch": branch,
                    "file": file
                }
                
                metadata_path = self.get_cache_metadata_path("ide-helper")
                metadata_path.parent.mkdir(parents=True, exist_ok=True)
                
                with open(metadata_path, 'w') as f:
                    json.dump(metadata, f, indent=2)
                
                logger.info("Successfully fetched documentation for Laravel IDE Helper")
                return True
                
        except urllib.error.HTTPError as e:
            if e.code == 404:
                logger.error(f"IDE Helper README not found at {github_url}")
            else:
                logger.error(f"HTTP error fetching IDE Helper documentation: {e}")
        except Exception as e:
            logger.error(f"Error fetching IDE Helper documentation: {str(e)}")
        
        return False
    
    def _fetch_and_process_content(self, url: str, package: str, section: str) -> Optional[str]:
        """Fetch and process content from a URL."""
        try:
            # Use markdownify for HTML to Markdown conversion
            from markdownify import markdownify as md
            from bs4 import BeautifulSoup
            
            request = urllib.request.Request(
                url,
                headers={
                    "User-Agent": USER_AGENT,
                    "Accept": "text/html,application/xhtml+xml"
                }
            )
            
            with urllib.request.urlopen(request, timeout=30) as response:
                content_bytes = response.read()
                content = content_bytes.decode('utf-8')
            
            # Parse with BeautifulSoup
            soup = BeautifulSoup(content, 'html.parser')
            
            # Remove navigation, header, footer elements
            for tag in soup.find_all(['nav', 'header', 'footer', 'aside']):
                tag.decompose()
            
            # Remove stats/metrics elements that contain dynamic numbers
            # For Spatie packages, remove elements that contain download counts and issue numbers
            if package == "spatie":
                # Find and remove the stats section that appears at the top of Spatie docs
                # This typically contains Repository, Open Issues, and download counts
                stats_removed = False
                
                # Look for the pattern: Repository -> Open Issues -> large numbers
                for element in soup.find_all(string=re.compile(r'^Repository$', re.I)):
                    # Find the container that holds this stats section
                    container = element.parent
                    while container and container.name not in ['body', 'html', 'main', 'article']:
                        # Check if this container has "Open Issues" and large numbers
                        text_content = container.get_text()
                        if 'Open Issues' in text_content and re.search(r'\d{3,}', text_content):
                            # This looks like the stats container
                            container.decompose()
                            stats_removed = True
                            logger.debug(f"Removed stats container from {url}")
                            break
                        # Try parent container
                        if container.parent and container.parent.name in ['div', 'section', 'aside', 'header']:
                            container = container.parent
                        else:
                            break
                    if stats_removed:
                        break
                
                # Also remove any standalone large numbers that might be stats
                for tag in soup.find_all(string=re.compile(r'^\s*[\d,]+\s*$')):
                    if tag.parent:
                        num_str = str(tag).strip().replace(',', '')
                        try:
                            # Remove numbers larger than 1000 (likely stats, not code examples)
                            if num_str.isdigit() and int(num_str) > 1000:
                                # Don't remove if it's inside a code block
                                if not any(p.name in ['code', 'pre'] for p in tag.parents):
                                    tag.parent.extract()
                        except Exception:
                            pass
                
                # Remove any divs or sections that look like stats containers
                for tag in soup.find_all(['div', 'section'], class_=re.compile(r'stats|metrics|numbers|count', re.I)):
                    tag.decompose()
            
            # Try to find main content area
            main_content = None
            
            # Package-specific selectors
            if package == "inertia":
                # Inertia uses div with id="top" for main content
                main_content = soup.find('div', id='top')
                if not main_content:
                    logger.debug(f"Could not find #top div for Inertia on {url}")
            elif package == "filament":
                # Filament might use different selectors
                main_content = soup.select_one('.docs-content, .prose, main')
            elif package == "debugbar":
                # Debugbar documentation site selectors
                main_content = soup.select_one('.prose, .content, main, article')
            
            # If no package-specific selector worked, try common selectors
            if not main_content:
                content_selectors = [
                    '#top',  # Try #top first as it seems common
                    'main', 'article', '[role="main"]', '.content', '.docs-content',
                    '.documentation', '#content', '.prose', '.markdown-body'
                ]
                
                for selector in content_selectors:
                    main_content = soup.select_one(selector)
                    if main_content:
                        logger.debug(f"Found content using selector: {selector}")
                        break
            
            if not main_content:
                logger.warning(f"Could not find main content area for {url}, using body")
                main_content = soup.find('body') or soup
            
            # Convert to markdown
            markdown_content = md(str(main_content), strip=['a'], code_language='php')
            
            # Clean up the content
            markdown_content = self._clean_markdown_content(markdown_content)
            
            # Check if we got any actual content
            if len(markdown_content.strip()) < 50:
                logger.warning(f"Very little content extracted from {url} (len: {len(markdown_content.strip())})")
                # Log first 200 chars of HTML to debug
                html_preview = str(main_content)[:500]
                logger.debug(f"HTML preview: {html_preview}")
            
            # Add metadata header
            header = f"# {package.title()} - {section.replace('-', ' ').title()}\n\n"
            header += f"Source: {url}\n\n"
            
            return header + markdown_content
            
        except Exception as e:
            logger.error(f"Error fetching content from {url}: {str(e)}")
            return None
    
    def _clean_markdown_content(self, content: str) -> str:
        """Clean up markdown content."""
        # Remove excessive blank lines
        content = re.sub(r'\n{3,}', '\n\n', content)
        
        # Fix code blocks
        content = re.sub(r'```\s*\n', '```\n', content)
        
        # Remove CloudFlare email protection links
        content = re.sub(
            r'\[\[email protected\]\]\(/cdn-cgi/l/email-protection#[a-f0-9]+\)',
            '[email protected]',
            content
        )
        
        # Remove trailing whitespace
        content = '\n'.join(line.rstrip() for line in content.split('\n'))
        
        return content.strip()
    
    def list_available_packages(self) -> List[str]:
        """List all available community packages."""
        return list(self.community_packages.keys())
    
    def fetch_all_packages(self, force: bool = False) -> Dict[str, bool]:
        """
        Fetch documentation for all community packages.
        
        Args:
            force: Force refresh even if cache is valid
            
        Returns:
            Dictionary mapping package names to success status
        """
        results = {}
        
        for package in self.list_available_packages():
            logger.info(f"Processing community package: {package}")
            results[package] = self.fetch_package_docs(package, force=force)
        
        # Log summary
        success_count = sum(1 for success in results.values() if success)
        total_count = len(results)
        logger.info(f"Community package documentation update complete: {success_count}/{total_count} packages")
        
        return results
class MultiSourceDocsUpdater:
    """Handles updating documentation from multiple sources including core Laravel and external services."""
    
    def __init__(self, target_dir: Path, version: str = DEFAULT_VERSION):
        """
        Initialize the multi-source documentation updater.
        
        Args:
            target_dir: Directory where all docs should be stored
            version: Laravel version for core documentation
        """
        self.target_dir = target_dir
        self.version = version
        
        # Initialize core Laravel docs updater
        self.core_updater = DocsUpdater(target_dir, version)
        
        # Initialize external docs fetcher
        self.external_fetcher = ExternalDocsFetcher(target_dir)
        
        # Initialize community package fetcher
        self.package_fetcher = CommunityPackageFetcher(target_dir)
    
    def update_core_docs(self, force: bool = False) -> bool:
        """Update core Laravel documentation."""
        logger.info("Updating core Laravel documentation")
        return self.core_updater.update(force=force)
    
    def update_external_docs(self, services: Optional[List[str]] = None, force: bool = False) -> Dict[str, bool]:
        """
        Update external Laravel services documentation.
        
        Args:
            services: List of specific services to update. If None, updates all.
            force: Force refresh even if cache is valid
            
        Returns:
            Dictionary mapping service names to success status
        """
        logger.info("Updating external Laravel services documentation")
        
        if services:
            results = {}
            for service in services:
                if service in self.external_fetcher.laravel_services:
                    results[service] = self.external_fetcher.fetch_laravel_service_docs(service)
                else:
                    logger.error(f"Unknown service: {service}")
                    results[service] = False
            return results
        else:
            return self.external_fetcher.fetch_all_services(force=force)
    
    def update_package_docs(self, packages: Optional[List[str]] = None, force: bool = False) -> Dict[str, bool]:
        """
        Update community package documentation.
        
        Args:
            packages: List of specific packages to update. If None, updates all.
            force: Force refresh even if cache is valid
            
        Returns:
            Dictionary mapping package names to success status
        """
        logger.info("Updating community package documentation")
        
        if packages:
            results = {}
            for package in packages:
                if package in self.package_fetcher.community_packages:
                    results[package] = self.package_fetcher.fetch_package_docs(package, force=force)
                else:
                    logger.error(f"Unknown package: {package}")
                    results[package] = False
            return results
        else:
            return self.package_fetcher.fetch_all_packages(force=force)
    
    def update_all(self, force_core: bool = False, force_external: bool = False, force_packages: bool = False) -> Dict[str, object]:
        """
        Update all documentation sources.
        
        Args:
            force_core: Force update of core documentation
            force_external: Force update of external documentation
            force_packages: Force update of community packages
            
        Returns:
            Dictionary with results for core, external, and package updates
        """
        logger.info("Starting comprehensive documentation update")
        
        results: Dict[str, object] = {
            "core": False,
            "external": {},
            "packages": {}
        }
        
        try:
            # Update core Laravel documentation
            results["core"] = self.update_core_docs(force=force_core)
            
            # Update external services documentation
            results["external"] = self.update_external_docs(force=force_external)
            
            # Update community package documentation
            results["packages"] = self.update_package_docs(force=force_packages)
            
            # Log summary
            core_status = "updated" if results["core"] else "up-to-date"
            external_results = results["external"]
            package_results = results["packages"]
            
            if isinstance(external_results, dict):
                external_count = sum(1 for success in external_results.values() if success)
                total_external = len(external_results)
            else:
                external_count = 0
                total_external = 0
            
            if isinstance(package_results, dict):
                package_count = sum(1 for success in package_results.values() if success)
                total_packages = len(package_results)
            else:
                package_count = 0
                total_packages = 0
            
            logger.info(f"Documentation update complete: Core {core_status}, External {external_count}/{total_external} services, Packages {package_count}/{total_packages}")
            
        except Exception as e:
            logger.error(f"Error during comprehensive documentation update: {str(e)}")
        
        return results
    
    def get_all_documentation_status(self) -> Dict[str, Dict]:
        """Get status information for all documentation sources."""
        status: Dict[str, Dict] = {
            "core": {},
            "external": {},
            "packages": {}
        }
        
        # Get core documentation status
        try:
            core_metadata = self.core_updater.read_local_metadata()
            status["core"] = {
                "version": self.version,
                "available": bool(core_metadata),
                "last_updated": core_metadata.get("sync_time", "unknown"),
                "commit_sha": core_metadata.get("commit_sha", "unknown")
            }
        except Exception as e:
            status["core"] = {"error": str(e)}
        
        # Get external documentation status
        for service in self.external_fetcher.list_available_services():
            try:
                cache_valid = self.external_fetcher.is_cache_valid(service)
                service_info = self.external_fetcher.get_service_info(service)
                if service_info is None:
                    continue
                
                # Try to read cache metadata
                metadata_path = self.external_fetcher.get_cache_metadata_path(service)
                if metadata_path.exists():
                    try:
                        with open(metadata_path, 'r') as f:
                            metadata = json.load(f)
                    except Exception:
                        metadata = {}
                else:
                    metadata = {}
                
                status["external"][service] = {
                    "name": service_info.get("name", service),
                    "type": service_info.get("type", "unknown").value if hasattr(service_info.get("type"), 'value') else str(service_info.get("type", "unknown")),
                    "cache_valid": cache_valid,
                    "last_fetched": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(metadata_path.stat().st_mtime)) if metadata_path.exists() else "never",
                    "success_rate": metadata.get("success_rate", "unknown")
                }
            except Exception as e:
                status["external"][service] = {"error": str(e)}
        
        # Get community package documentation status
        for package in self.package_fetcher.list_available_packages():
            try:
                cache_valid = self.package_fetcher.is_cache_valid(package)
                package_info = self.package_fetcher.community_packages.get(package, {})
                
                # Try to read cache metadata
                metadata_path = self.package_fetcher.get_cache_metadata_path(package)
                if metadata_path.exists():
                    try:
                        with open(metadata_path, 'r') as f:
                            metadata = json.load(f)
                    except Exception:
                        metadata = {}
                else:
                    metadata = {}
                
                # Get the type value safely
                package_type = package_info.get("type", DocumentationSourceType.COMMUNITY_PACKAGE)
                if hasattr(package_type, 'value'):
                    type_value = package_type.value
                else:
                    type_value = str(package_type) if package_type else "community_package"
                
                status["packages"][package] = {
                    "name": package_info.get("name", package),
                    "type": type_value,
                    "cache_valid": cache_valid,
                    "last_fetched": metadata.get("cache_time", "never"),
                    "sections_count": metadata.get("sections_count", 0)
                }
                
                # For Spatie, include sub-packages
                if package == "spatie" and "packages" in package_info:
                    status["packages"][package]["sub_packages"] = {}
                    packages_dict = package_info.get("packages", {})
                    if isinstance(packages_dict, dict):
                        for sub_pkg, sub_info in packages_dict.items():
                            sub_metadata_path = self.package_fetcher.get_cache_metadata_path("spatie", sub_pkg)
                            if sub_metadata_path.exists():
                                try:
                                    with open(sub_metadata_path, 'r') as f:
                                        sub_metadata = json.load(f)
                                    status["packages"][package]["sub_packages"][sub_pkg] = {
                                        "name": sub_info.get("name", sub_pkg),
                                        "sections_count": sub_metadata.get("sections_count", 0)
                                    }
                                except Exception:
                                    pass
                
            except Exception as e:
                status["packages"][package] = {"error": str(e)}
        
        return status
    
    def needs_update(self, check_external: bool = True, check_packages: bool = True) -> Dict[str, Union[bool, Dict[str, bool]]]:
        """
        Check which documentation sources need updating.
        
        Args:
            check_external: Whether to check external services
            check_packages: Whether to check community packages
            
        Returns:
            Dictionary indicating which sources need updates
        """
        needs_update: Dict[str, Union[bool, Dict[str, bool]]] = {
            "core": False,
            "external": {},
            "packages": {}
        }
        
        # Check core documentation
        try:
            needs_update["core"] = self.core_updater.needs_update()
        except Exception as e:
            logger.warning(f"Error checking core documentation update status: {str(e)}")
            needs_update["core"] = True
        
        # Check external documentation
        if check_external:
            external_dict = needs_update["external"]
            if isinstance(external_dict, dict):
                for service in self.external_fetcher.list_available_services():
                    try:
                        external_dict[service] = not self.external_fetcher.is_cache_valid(service)
                    except Exception as e:
                        logger.warning(f"Error checking {service} documentation status: {str(e)}")
                        external_dict[service] = True
        
        # Check community package documentation
        if check_packages:
            packages_dict = needs_update["packages"]
            if isinstance(packages_dict, dict):
                for package in self.package_fetcher.list_available_packages():
                    try:
                        packages_dict[package] = not self.package_fetcher.is_cache_valid(package)
                    except Exception as e:
                        logger.warning(f"Error checking {package} documentation status: {str(e)}")
                        packages_dict[package] = True
        
        return needs_update
def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Laravel Documentation Updater"
    )
    parser.add_argument(
        "--target-dir", 
        type=str,
        default="./docs",
        help="Path to store documentation (default: ./docs)"
    )
    parser.add_argument(
        "--version",
        type=str,
        default=DEFAULT_VERSION,
        help=f"Laravel version branch to use (default: {DEFAULT_VERSION}). Supported: {', '.join(SUPPORTED_VERSIONS)}"
    )
    parser.add_argument(
        "--all-versions",
        action="store_true",
        help="Update documentation for all supported versions"
    )
    parser.add_argument(
        "--force",
        action="store_true",
        help="Force update even if already up to date"
    )
    parser.add_argument(
        "--check-only",
        action="store_true",
        help="Only check if update is needed, don't perform update"
    )
    parser.add_argument(
        "--update",
        action="store_true",
        help="Update all documentation (Laravel core, services, and community packages)"
    )
    parser.add_argument(
        "--external-only",
        action="store_true",
        help="Only update external Laravel services documentation (deprecated: use --update)"
    )
    parser.add_argument(
        "--core-only",
        action="store_true",
        help="Only update core Laravel documentation (deprecated: use --update)"
    )
    parser.add_argument(
        "--packages-only",
        action="store_true",
        help="Only update community package documentation (deprecated: use --update)"
    )
    parser.add_argument(
        "--services",
        type=str,
        nargs="+",
        help="Specific Laravel services to update (deprecated: use --update)"
    )
    parser.add_argument(
        "--packages",
        type=str,
        nargs="+",
        help="Specific community packages to update (deprecated: use --update)"
    )
    parser.add_argument(
        "--list-services",
        action="store_true",
        help="List all available Laravel services"
    )
    parser.add_argument(
        "--list-packages",
        action="store_true",
        help="List all available community packages"
    )
    parser.add_argument(
        "--status",
        action="store_true",
        help="Show status of all documentation sources"
    )
    parser.add_argument(
        "--log-level",
        type=str,
        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
        default="INFO",
        help="Logging level (default: INFO)"
    )
    
    return parser.parse_args()
def update_version(target_dir: Path, version: str, force: bool, check_only: bool) -> tuple[bool, bool]:
    """Update documentation for a single version.
    
    Returns:
        (success, updated): success indicates if operation completed without error,
                           updated indicates if files were actually updated
    """
    try:
        updater = DocsUpdater(target_dir, version)
        
        if check_only:
            needs_update = updater.needs_update()
            logger.info(f"Version {version}: {'needs' if needs_update else 'does not need'} updating.")
            return True, needs_update
        else:
            updated = updater.update(force=force)
            if updated:
                logger.info(f"Version {version}: Updated successfully")
            else:
                logger.info(f"Version {version}: Already up to date")
            return True, updated
    except Exception as e:
        logger.error(f"Version {version}: Update failed - {str(e)}")
        return False, False
def handle_update_command(args, updater):
    """Handle the unified --update command - updates everything."""
    # Just update all documentation
    results = updater.update_all(force_core=args.force, force_external=args.force, force_packages=args.force)
    
    core_success = results["core"]
    external_results = results["external"]
    package_results = results.get("packages", {})
    external_success_count = sum(1 for success in external_results.values() if success)
    external_total = len(external_results)
    package_success_count = sum(1 for success in package_results.values() if success)
    package_total = len(package_results)
    
    logger.info(f"Complete documentation update: Core {'successful' if core_success else 'failed'}, External {external_success_count}/{external_total}, Packages {package_success_count}/{package_total}")
    
    # Return success if core succeeded and at least some external services/packages succeeded
    overall_success = core_success and ((external_success_count > 0 or external_total == 0) or (package_success_count > 0 or package_total == 0))
    return 0 if overall_success else 1
def main():
    """Main entry point for the Laravel Docs Updater."""
    args = parse_arguments()
    
    # Set logging level
    logger.setLevel(getattr(logging, args.log_level))
    
    # Create target directory if it doesn't exist
    target_dir = Path(args.target_dir).resolve()
    target_dir.mkdir(parents=True, exist_ok=True)
    
    # Initialize multi-source updater
    updater = MultiSourceDocsUpdater(target_dir, args.version)
    
    try:
        # Handle list services command
        if args.list_services:
            services = updater.external_fetcher.list_available_services()
            print("Available Laravel Services:")
            for service in services:
                info = updater.external_fetcher.get_service_info(service)
                print(f"  {service}: {info.get('name', service)}")
            return 0
        
        # Handle list packages command
        if args.list_packages:
            packages = updater.package_fetcher.list_available_packages()
            print("Available Community Packages:")
            for package in packages:
                info = updater.package_fetcher.community_packages.get(package, {})
                print(f"  {package}: {info.get('name', package)}")
                # Show sub-packages for Spatie
                if package == "spatie" and "packages" in info:
                    for sub_pkg, sub_info in info["packages"].items():
                        print(f"    - {sub_pkg}: {sub_info.get('name', sub_pkg)}")
            return 0
        
        # Handle status command
        if args.status:
            status = updater.get_all_documentation_status()
            print("Documentation Status:")
            print(f"\nCore Laravel Documentation ({args.version}):")
            core_status = status["core"]
            if "error" in core_status:
                print(f"  Error: {core_status['error']}")
            else:
                print(f"  Available: {core_status.get('available', False)}")
                print(f"  Last Updated: {core_status.get('last_updated', 'unknown')}")
                print(f"  Commit: {core_status.get('commit_sha', 'unknown')[:7]}")
            
            print("\nExternal Services:")
            for service, info in status["external"].items():
                if "error" in info:
                    print(f"  {service}: Error - {info['error']}")
                else:
                    print(f"  {service} ({info.get('name', service)}):")
                    print(f"    Cache Valid: {info.get('cache_valid', False)}")
                    print(f"    Type: {info.get('type', 'unknown')}")
                    if info.get('success_rate') != 'unknown':
                        print(f"    Success Rate: {info.get('success_rate', 'unknown'):.1%}")
                    
                    # Show auto-discovery status if available
                    metadata_path = updater.external_fetcher.get_cache_metadata_path(service)
                    if metadata_path.exists():
                        try:
                            import json
                            with open(metadata_path, 'r') as f:
                                metadata = json.load(f)
                            if metadata.get('auto_discovery_enabled'):
                                discovery_method = metadata.get('discovery_method', 'unknown')
                                discovered_count = metadata.get('discovered_count', 0)
                                print(f"    Auto-Discovery: ✅ {discovery_method} ({discovered_count} sections)")
                                if metadata.get('manual_fallback'):
                                    print("    Fallback: Used manual configuration (auto-discovery failed)")
                            else:
                                print("    Auto-Discovery: ❌ disabled (using manual configuration)")
                        except Exception:
                            pass
            
            print("\nCommunity Packages:")
            for package, info in status["packages"].items():
                if "error" in info:
                    print(f"  {package}: Error - {info['error']}")
                else:
                    print(f"  {package} ({info.get('name', package)}):")
                    print(f"    Cache Valid: {info.get('cache_valid', False)}")
                    print(f"    Sections: {info.get('sections_count', 0)}")
                    
                    # Show sub-packages for Spatie
                    if package == "spatie" and "sub_packages" in info:
                        print("    Sub-packages:")
                        for sub_pkg, sub_info in info["sub_packages"].items():
                            print(f"      - {sub_info.get('name', sub_pkg)}: {sub_info.get('sections_count', 0)} sections")
            
            return 0
        
        # Validate version if not updating all
        if not args.all_versions and args.version not in SUPPORTED_VERSIONS:
            logger.error(f"Unsupported version: {args.version}. Supported versions: {', '.join(SUPPORTED_VERSIONS)}")
            return 1
        
        # Handle check-only command
        if args.check_only:
            needs_update = updater.needs_update()
            print("Update Status:")
            print(f"Core Laravel ({args.version}): {'needs update' if needs_update['core'] else 'up to date'}")
            print("External Services:")
            for service, needs in needs_update["external"].items():
                print(f"  {service}: {'needs update' if needs else 'up to date'}")
            print("Community Packages:")
            for package, needs in needs_update["packages"].items():
                print(f"  {package}: {'needs update' if needs else 'up to date'}")
            
            # Return 1 if any updates needed, 0 if all up to date
            any_needs_update = needs_update["core"] or any(needs_update["external"].values()) or any(needs_update["packages"].values())
            return 1 if any_needs_update else 0
        
        # Handle new unified --update parameter
        if args.update:
            return handle_update_command(args, updater)
        
        # Handle deprecated parameters - all now just update everything
        if args.external_only or args.core_only or args.packages_only or args.packages or args.services:
            logger.warning("Deprecated parameter used. Please use --update instead.")
            return handle_update_command(args, updater)
        
        # Handle specific update modes (deprecated but still supported)
        if args.all_versions:
            # Update all supported versions (core only)
            all_success = True
            
            for version in SUPPORTED_VERSIONS:
                logger.info(f"Processing version {version}...")
                version_updater = MultiSourceDocsUpdater(target_dir, version)
                success = version_updater.update_core_docs(force=args.force)
                
                if not success:
                    all_success = False
            
            return 0 if all_success else 1
        
        else:
            # Default: update all (core, external services, and packages)
            results = updater.update_all(force_core=args.force, force_external=args.force, force_packages=args.force)
            
            core_success = results["core"]
            external_results = results["external"]
            package_results = results.get("packages", {})
            external_success_count = sum(1 for success in external_results.values() if success)
            external_total = len(external_results)
            package_success_count = sum(1 for success in package_results.values() if success)
            package_total = len(package_results)
            
            logger.info(f"Complete documentation update: Core {'successful' if core_success else 'failed'}, External {external_success_count}/{external_total}, Packages {package_success_count}/{package_total}")
            
            # Return success if core succeeded and at least some external services/packages succeeded
            overall_success = core_success and ((external_success_count > 0 or external_total == 0) or (package_success_count > 0 or package_total == 0))
            return 0 if overall_success else 1
                
    except KeyboardInterrupt:
        logger.info("Operation cancelled by user")
        return 130
if __name__ == "__main__":
    sys.exit(main())