docs_updater.py•130 kB
#!/usr/bin/env python3
"""
Laravel Documentation Updater
This module handles automatic fetching and updating of Laravel documentation
from the official GitHub repository.
"""
import sys
import logging
import argparse
import shutil
import tempfile
import re
from pathlib import Path
from typing import Dict, List, Optional, Union
import urllib.request
import urllib.error
import zipfile
import json
import time
from enum import Enum
import random
import html
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("laravel-docs-updater")
# GitHub API URLs
GITHUB_API_URL = "https://api.github.com"
LARAVEL_DOCS_REPO = "laravel/docs"
USER_AGENT = "Laravel-MCP-Companion (+https://github.com/brianirish/laravel-mcp-companion)"
def get_supported_versions() -> list[str]:
"""Get supported Laravel versions dynamically from GitHub API.
Returns:
List of supported version branches (e.g., ['6.x', '7.x', '8.x', ...])
"""
logger.debug("Fetching supported Laravel versions from GitHub API")
url = f"{GITHUB_API_URL}/repos/{LARAVEL_DOCS_REPO}/branches"
try:
request = urllib.request.Request(
url,
headers={
"User-Agent": USER_AGENT,
"Accept": "application/vnd.github.v3+json"
}
)
with urllib.request.urlopen(request) as response:
branches = json.loads(response.read().decode())
# Filter for version branches (X.x format) starting from 6.x
version_branches = []
for branch in branches:
name = branch["name"]
if re.match(r'^\d+\.x$', name):
major_version = int(name.split('.')[0])
if major_version >= 6:
version_branches.append(name)
# Sort versions numerically
version_branches.sort(key=lambda v: int(v.split('.')[0]))
if not version_branches:
logger.warning("No version branches found, falling back to hardcoded list")
return ["6.x", "7.x", "8.x", "9.x", "10.x", "11.x", "12.x"]
logger.debug(f"Found {len(version_branches)} supported versions: {', '.join(version_branches)}")
return version_branches
except Exception as e:
logger.warning(f"Error fetching versions from GitHub API: {str(e)}, falling back to hardcoded list")
return ["6.x", "7.x", "8.x", "9.x", "10.x", "11.x", "12.x"]
# Cache supported versions to avoid repeated API calls
_SUPPORTED_VERSIONS_CACHE = None
def get_cached_supported_versions() -> list[str]:
"""Get cached supported versions or fetch them if not cached."""
global _SUPPORTED_VERSIONS_CACHE
if _SUPPORTED_VERSIONS_CACHE is None:
_SUPPORTED_VERSIONS_CACHE = get_supported_versions()
return _SUPPORTED_VERSIONS_CACHE
SUPPORTED_VERSIONS = get_cached_supported_versions()
DEFAULT_VERSION = SUPPORTED_VERSIONS[-1] # Always use the latest version as default
USER_AGENT = "Laravel-MCP-Companion (+https://github.com/brianirish/laravel-mcp-companion)"
class DocumentationSourceType(Enum):
"""Types of documentation sources supported."""
GITHUB_REPO = "github_repo"
DIRECT_URL = "direct_url"
LARAVEL_SERVICE = "laravel_service"
COMMUNITY_PACKAGE = "community_package"
class DocumentationAutoDiscovery:
"""Handles automatic discovery of documentation sections from Laravel services."""
def __init__(self, max_retries: int = 3, request_delay: float = 1.0):
"""
Initialize the documentation auto-discovery system.
Args:
max_retries: Maximum number of retry attempts for failed requests
request_delay: Delay between requests in seconds to be respectful
"""
self.max_retries = max_retries
self.request_delay = request_delay
# Common asset file extensions and patterns to exclude
self.asset_extensions = {'.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.woff', '.woff2', '.ttf', '.eot'}
self.asset_patterns = {'/_next/', '/static/', '/assets/', '/images/', '/fonts/', '/favicon'}
def discover_sections(self, service: str, service_config: Dict) -> List[str]:
"""
Discover documentation sections for a given service.
Args:
service: Service name (forge, vapor, envoyer, nova)
service_config: Service configuration dictionary
Returns:
List of discovered section paths
"""
if not service_config.get("auto_discovery", False):
logger.debug(f"Auto-discovery disabled for {service}")
return []
discovery_rules = service_config.get("discovery_rules", {})
discovered_sections = []
try:
logger.info(f"Starting auto-discovery for {service}")
if service == "forge":
discovered_sections = self._discover_forge_sections(service_config, discovery_rules)
elif service == "nova":
discovered_sections = self._discover_nova_sections(service_config, discovery_rules)
elif service == "vapor":
discovered_sections = self._discover_vapor_sections(service_config, discovery_rules)
elif service == "envoyer":
discovered_sections = self._discover_envoyer_sections(service_config, discovery_rules)
else:
logger.warning(f"No discovery method available for service: {service}")
logger.info(f"Auto-discovery completed for {service}: found {len(discovered_sections)} sections")
return discovered_sections
except Exception as e:
logger.error(f"Error during auto-discovery for {service}: {str(e)}")
return []
def _is_asset_file(self, path: str) -> bool:
"""
Check if a path represents an asset file (CSS, JS, images, etc.).
Args:
path: URL path to check
Returns:
True if the path is an asset file, False otherwise
"""
# Remove query parameters for extension check
clean_path = path.split('?')[0].lower()
# Check if path contains common asset directories
for pattern in self.asset_patterns:
if pattern in clean_path:
return True
# Check file extension
for ext in self.asset_extensions:
if clean_path.endswith(ext):
return True
return False
def _discover_forge_sections(self, config: Dict, rules: Dict) -> List[str]:
"""Discover Forge documentation sections by parsing the docs index page."""
base_url = config["base_url"]
sections = []
try:
# Fetch the main docs page
content_bytes = self._retry_request(f"{base_url}")
content = content_bytes.decode('utf-8')
# Extract href="/docs/*" links using regex
doc_links = re.findall(r'href="(/docs/[^"]*)"', content, re.IGNORECASE)
for link in doc_links:
# Remove query parameters if present
clean_link = link.split('?')[0]
# Check if this is an asset file (CSS, JS, images, etc.)
if self._is_asset_file(clean_link):
continue
# Remove the /docs/ prefix to get the section name
section = clean_link.replace('/docs/', '')
if section and section not in sections:
sections.append(section)
# Sort sections to maintain consistent ordering
sections.sort()
logger.debug(f"Discovered {len(sections)} Forge sections")
except Exception as e:
logger.warning(f"Error discovering Forge sections: {str(e)}")
return sections
def _discover_nova_sections(self, config: Dict, rules: Dict) -> List[str]:
"""Discover Nova documentation sections and auto-detect version."""
base_url = config["base_url"]
sections = []
try:
# First, try to detect the latest version
nova_base = "https://nova.laravel.com/docs"
# Try to find version links or check if current version is still valid
content_bytes = self._retry_request(f"{nova_base}")
content = content_bytes.decode('utf-8')
# Look for version links like /docs/v6, /docs/v5, etc.
version_matches = re.findall(r'/docs/(v\d+)', content)
if version_matches:
latest_version = max(version_matches, key=lambda v: int(v[1:]))
actual_base_url = f"{nova_base}/{latest_version}"
logger.info(f"Auto-detected Nova version: {latest_version}")
else:
actual_base_url = base_url
# Fetch the navigation/index page
nav_content_bytes = self._retry_request(f"{actual_base_url}")
nav_content = nav_content_bytes.decode('utf-8')
# Extract navigation links - Nova typically uses relative links
nav_links = re.findall(r'href="(/docs/[^"]*)"', nav_content, re.IGNORECASE)
for link in nav_links:
# Extract section after version (e.g., /docs/v5/installation -> installation)
section_match = re.search(r'/docs/v\d+/(.+)', link)
if section_match:
section = section_match.group(1)
if section and section not in sections:
sections.append(section)
sections.sort()
logger.debug(f"Discovered {len(sections)} Nova sections")
except Exception as e:
logger.warning(f"Error discovering Nova sections: {str(e)}")
return sections
def _discover_vapor_sections(self, config: Dict, rules: Dict) -> List[str]:
"""Discover Vapor documentation sections by parsing Mintlify navigation."""
base_url = config["base_url"]
sections = []
try:
# Vapor uses Mintlify, which often has a special navigation structure
content_bytes = self._retry_request(f"{base_url}")
content = content_bytes.decode('utf-8')
# Look for Mintlify navigation patterns
# Try multiple patterns that Mintlify commonly uses
nav_patterns = [
r'href="(/[^"]*)"[^>]*>([^<]+)</a>', # General link pattern
r'"href":"(/[^"]*)"', # JSON-style navigation
r'data-href="(/[^"]*)"', # Data attribute pattern
]
for pattern in nav_patterns:
links = re.findall(pattern, content, re.IGNORECASE)
for link in links:
if isinstance(link, tuple):
path = link[0]
else:
path = link
# Filter for documentation paths (exclude external links, assets, etc.)
if (path.startswith('/') and
not path.startswith('//') and
not self._is_asset_file(path) and
path != '/'):
section = path.lstrip('/')
if section and section not in sections:
sections.append(section)
# Remove dupliculates and sort
sections = list(set(sections))
sections.sort()
logger.debug(f"Discovered {len(sections)} Vapor sections")
except Exception as e:
logger.warning(f"Error discovering Vapor sections: {str(e)}")
return sections
def _discover_envoyer_sections(self, config: Dict, rules: Dict) -> List[str]:
"""Discover Envoyer documentation sections, handling category redirects."""
base_url = config["base_url"]
sections = []
try:
# Fetch the main docs page
content_bytes = self._retry_request(f"{base_url}")
content = content_bytes.decode('utf-8')
# Extract documentation links
doc_links = re.findall(r'href="(/docs/[^"]*)"', content, re.IGNORECASE)
for link in doc_links:
# Remove the /docs/ prefix
section = link.replace('/docs/', '')
if section and section not in sections:
# Test if this is a real page (not a redirect)
try:
test_url = f"{base_url}/{section}"
test_content_bytes = self._retry_request(test_url)
test_content = test_content_bytes.decode('utf-8')
# Check if this is actual documentation content
if self._is_valid_envoyer_content(test_content, section):
sections.append(section)
else:
logger.debug(f"Skipping {section} - appears to be redirect or invalid content")
except Exception as test_e:
logger.debug(f"Skipping {section} - error testing content: {str(test_e)}")
continue
sections.sort()
logger.debug(f"Discovered {len(sections)} Envoyer sections")
except Exception as e:
logger.warning(f"Error discovering Envoyer sections: {str(e)}")
return sections
def _is_valid_envoyer_content(self, content: str, section: str) -> bool:
"""Check if Envoyer content is actual documentation (not redirect page)."""
# Look for common documentation indicators
doc_indicators = [
'envoyer', 'deployment', 'zero downtime', 'project',
'server', 'hook', 'notification', 'repository'
]
# Look for redirect indicators (things that suggest this isn't real content)
redirect_indicators = [
'window.location', 'http-equiv="refresh"', 'redirecting',
'please wait', 'loading...', 'not found'
]
content_lower = content.lower()
# Check for documentation indicators
doc_score = sum(1 for indicator in doc_indicators if indicator in content_lower)
# Check for redirect indicators
redirect_score = sum(1 for indicator in redirect_indicators if indicator in content_lower)
# Must have some documentation indicators and minimal redirect indicators
return doc_score >= 1 and redirect_score == 0 and len(content.strip()) > 500
def _retry_request(self, url: str, headers: Optional[Dict] = None) -> bytes:
"""
Make a request with retry logic and respectful delays.
Args:
url: URL to request
headers: Optional headers to include
Returns:
Response content as bytes
"""
if headers is None:
headers = {"User-Agent": USER_AGENT}
last_exception: Optional[Exception] = None
for attempt in range(self.max_retries + 1):
try:
# Add respectful delay between requests
if attempt > 0:
time.sleep(self.request_delay * (2 ** (attempt - 1)))
request = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(request) as response:
return response.read()
except urllib.error.HTTPError as e:
last_exception = e
if e.code == 404:
# Don't retry 404 errors
raise
elif e.code == 429 or (e.code == 403 and "rate limit" in str(e.reason).lower()):
# Rate limiting - wait longer
if attempt < self.max_retries:
wait_time = min(300, (2 ** attempt) * 10 + random.uniform(0, 5))
logger.warning(f"Rate limited on attempt {attempt + 1}, waiting {wait_time:.1f}s")
time.sleep(wait_time)
else:
raise
elif e.code >= 500 and attempt < self.max_retries:
# Server errors are worth retrying
wait_time = min(60, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Server error {e.code} on attempt {attempt + 1}, retrying in {wait_time:.1f}s")
time.sleep(wait_time)
else:
raise
except Exception as e:
last_exception = e
if attempt < self.max_retries:
wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Request error on attempt {attempt + 1}, retrying in {wait_time:.1f}s: {str(e)}")
time.sleep(wait_time)
else:
raise
# This should never be reached, but just in case
if last_exception:
raise last_exception
else:
raise RuntimeError(f"Failed to fetch {url} after {self.max_retries + 1} attempts")
class ExternalDocsFetcher:
"""Handles fetching documentation from external Laravel services and packages."""
def __init__(self, target_dir: Path, cache_duration: int = 86400, max_retries: int = 3):
"""
Initialize the external documentation fetcher.
Args:
target_dir: Directory where external docs should be stored
cache_duration: Cache duration in seconds (default: 24 hours)
max_retries: Maximum number of retry attempts for failed requests
"""
self.target_dir = target_dir
self.cache_duration = cache_duration
self.max_retries = max_retries
self.external_dir = target_dir / "external"
self.external_dir.mkdir(parents=True, exist_ok=True)
# Initialize auto-discovery system
self.auto_discovery = DocumentationAutoDiscovery(max_retries=max_retries)
# Laravel services documentation sources
self.laravel_services = {
"forge": {
"name": "Laravel Forge",
"type": DocumentationSourceType.LARAVEL_SERVICE,
"base_url": "https://forge.laravel.com/docs",
"auto_discovery": True,
"discovery_rules": {
"index_url": "https://forge.laravel.com/docs",
"link_pattern": r'href="(/docs/[^"]*)"',
"nested_sections": ["accounts", "servers", "sites", "resources"],
"exclude_patterns": ["#", "javascript:", "mailto:"]
},
"sections": [
# Get Started (manual fallback)
"introduction", "cli", "sdk",
# Accounts
"accounts/your-account", "accounts/circles", "accounts/source-control", "accounts/ssh", "accounts/api",
# Servers
"servers/providers", "servers/types", "servers/management", "servers/provisioning-process",
"servers/ssh", "servers/php", "servers/packages", "servers/recipes", "servers/load-balancing",
"servers/nginx-templates", "servers/backups", "servers/monitoring", "servers/cookbook",
# Sites
"sites/the-basics", "sites/applications", "sites/deployments", "sites/commands",
"sites/packages", "sites/queues", "sites/security-rules", "sites/redirects",
"sites/ssl", "sites/user-isolation", "sites/cookbook",
# Resources
"resources/daemons", "resources/databases", "resources/caches", "resources/network",
"resources/scheduler", "resources/integrations", "resources/cookbook"
]
},
"vapor": {
"name": "Laravel Vapor",
"type": DocumentationSourceType.LARAVEL_SERVICE,
"base_url": "https://docs.vapor.build",
"auto_discovery": True,
"discovery_rules": {
"index_url": "https://docs.vapor.build",
"navigation_patterns": [
r'href="(/[^"]*)"[^>]*>([^<]+)</a>',
r'"href":"(/[^"]*)"',
r'data-href="(/[^"]*)"'
],
"exclude_extensions": [".css", ".js", ".png", ".jpg", ".svg"],
"min_content_length": 500
},
"sections": [
# Manual fallback sections
"introduction", "projects/the-basics", "projects/environments",
"projects/deployments", "resources/queues", "resources/storage",
"resources/databases", "resources/caches"
]
},
"envoyer": {
"name": "Laravel Envoyer",
"type": DocumentationSourceType.LARAVEL_SERVICE,
"base_url": "https://docs.envoyer.io",
"auto_discovery": True,
"discovery_rules": {
"index_url": "https://docs.envoyer.io",
"link_pattern": r'href="(/docs/[^"]*)"',
"validate_content": True,
"content_indicators": ["envoyer", "deployment", "zero downtime", "project"],
"redirect_indicators": ["window.location", "redirecting", "loading..."]
},
"sections": [
# Manual fallback sections
"introduction", "quick-start",
"accounts/source-control", "accounts/your-account",
"projects/management", "projects/servers", "projects/deployment-hooks",
"projects/heartbeats", "projects/notifications", "projects/collaborators"
]
},
"nova": {
"name": "Laravel Nova",
"type": DocumentationSourceType.LARAVEL_SERVICE,
"base_url": "https://nova.laravel.com/docs/v5",
"auto_discovery": True,
"discovery_rules": {
"base_url": "https://nova.laravel.com/docs",
"version_detection": True,
"version_pattern": r'/docs/(v\d+)',
"link_pattern": r'href="(/docs/[^"]*)"',
"section_pattern": r'/docs/v\d+/(.+)',
"navigation_sections": ["Get Started", "Resources", "Search", "Filters", "Lenses", "Actions", "Metrics", "Digging Deeper"]
},
"sections": [
# Manual fallback sections
# Get Started
"installation", "releases", "upgrade",
# Resources
"resources/the-basics", "resources/fields", "resources/dependent-fields",
"resources/date-fields", "resources/file-fields", "resources/repeater-fields",
"resources/panels", "resources/relationships", "resources/validation", "resources/authorization",
# Search
"search/the-basics", "search/global-search", "search/scout-integration",
# Filters
"filters/defining-filters", "filters/registering-filters",
# Lenses
"lenses/defining-lenses", "lenses/registering-lenses",
# Actions
"actions/defining-actions", "actions/registering-actions",
# Metrics
"metrics/defining-metrics", "metrics/registering-metrics",
# Digging Deeper (Customization)
"customization/dashboards", "customization/menus", "customization/notifications",
"customization/authentication", "customization/impersonation", "customization/tools",
"customization/resource-tools", "customization/cards", "customization/fields",
"customization/filters", "customization/frontend", "customization/assets",
"customization/localization", "customization/stubs"
]
}
}
def get_service_cache_path(self, service: str) -> Path:
"""Get the cache directory path for a service."""
service_dir = self.external_dir / service
service_dir.mkdir(exist_ok=True)
return service_dir
def get_cache_metadata_path(self, service: str) -> Path:
"""Get the metadata file path for a service."""
return self.get_service_cache_path(service) / ".cache_metadata.json"
def is_cache_valid(self, service: str) -> bool:
"""Check if the cached documentation for a service is still valid."""
metadata_path = self.get_cache_metadata_path(service)
if not metadata_path.exists():
return False
try:
# Use file modification time instead of stored cached_at
cache_time = metadata_path.stat().st_mtime
return (time.time() - cache_time) < self.cache_duration
except Exception as e:
logger.warning(f"Error reading cache metadata for {service}: {str(e)}")
return False
def save_cache_metadata(self, service: str, metadata: Dict) -> None:
"""Save cache metadata for a service."""
metadata_path = self.get_cache_metadata_path(service)
try:
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
except Exception as e:
logger.error(f"Error saving cache metadata for {service}: {str(e)}")
def fetch_laravel_service_docs(self, service: str) -> bool:
"""
Fetch documentation for a Laravel service.
Args:
service: Service name (forge, vapor, envoyer, nova)
Returns:
True if successful, False otherwise
"""
if service not in self.laravel_services:
logger.error(f"Unknown Laravel service: {service}")
return False
# Check if cache is valid
if self.is_cache_valid(service):
logger.debug(f"Using cached documentation for {service}")
return True
service_config = self.laravel_services[service]
service_dir = self.get_service_cache_path(service)
logger.info(f"Fetching documentation for {service_config['name']}")
try:
if service_config["type"] == DocumentationSourceType.LARAVEL_SERVICE:
return self._fetch_service_documentation(service, service_config, service_dir)
elif service_config["type"] == DocumentationSourceType.GITHUB_REPO:
return self._fetch_github_documentation(service, service_config, service_dir)
else:
logger.error(f"Unsupported documentation source type for {service}")
return False
except Exception as e:
logger.error(f"Error fetching documentation for {service}: {str(e)}")
return False
def _fetch_service_documentation(self, service: str, config: Dict, target_dir: Path) -> bool:
"""Fetch documentation from Laravel service websites."""
base_url = config["base_url"]
# Try auto-discovery first, fallback to manual sections
discovered_sections = []
if config.get("auto_discovery", False):
try:
discovered_sections = self.auto_discovery.discover_sections(service, config)
logger.info(f"Auto-discovery found {len(discovered_sections)} sections for {service}")
except Exception as e:
logger.warning(f"Auto-discovery failed for {service}: {str(e)}, falling back to manual sections")
# Use discovered sections if available, otherwise use manual sections
if discovered_sections:
sections = discovered_sections
discovery_method = "auto-discovery"
else:
sections = config.get("sections", [])
discovery_method = "manual configuration"
logger.info(f"Using {discovery_method} for {service}: {len(sections)} sections")
# All configured services are now publicly accessible
# No longer creating placeholder documentation
fetched_sections = []
for section in sections:
# Double-check that this isn't an asset file
if self.auto_discovery._is_asset_file(section):
logger.debug(f"Skipping asset file: {section}")
continue
section_url = f"{base_url}/{section}"
section_file = target_dir / f"{section}.md"
# Create parent directories if needed for nested sections
section_file.parent.mkdir(parents=True, exist_ok=True)
try:
logger.debug(f"Fetching {section} documentation from {section_url}")
content_bytes = self._retry_request(section_url)
content = content_bytes.decode('utf-8')
# Extract main content (this would need service-specific parsing)
# For now, we'll save the raw HTML and note that it needs processing
processed_content = self._process_service_html(content, service, section)
with open(section_file, 'w', encoding='utf-8') as f:
f.write(processed_content)
fetched_sections.append(section)
logger.debug(f"Successfully fetched {section} documentation")
except urllib.error.HTTPError as e:
if e.code == 404:
logger.info(f"Section {section} not found (404) - may not be available")
else:
logger.warning(f"Failed to fetch {section} documentation: HTTP {e.code}")
continue
except Exception as e:
logger.warning(f"Failed to fetch {section} documentation: {str(e)}")
continue
if fetched_sections:
# Save metadata about what was fetched
metadata = {
"service": service,
"fetched_sections": fetched_sections,
"total_sections": len(sections),
"success_rate": len(fetched_sections) / len(sections),
"discovery_method": discovery_method,
"auto_discovery_enabled": config.get("auto_discovery", False),
"discovered_count": len(discovered_sections) if discovered_sections else 0,
"manual_fallback": discovery_method == "manual configuration" and config.get("auto_discovery", False)
}
self.save_cache_metadata(service, metadata)
logger.info(f"Successfully fetched {len(fetched_sections)}/{len(sections)} sections for {service} using {discovery_method}")
return True
return False
def _create_placeholder_documentation(self, service: str, config: Dict, target_dir: Path) -> bool:
"""Create placeholder documentation for services that require authentication."""
logger.info(f"Creating placeholder documentation for {service} (authentication required)")
sections = config.get("sections", [])
service_name = config.get("name", service.title())
base_url = config.get("base_url", "")
for section in sections:
section_file = target_dir / f"{section}.md"
content = f"# {service_name} - {section.replace('-', ' ').title()}\n\n"
content += f"*Note: {service_name} documentation requires authentication to access.*\n\n"
content += "## Overview\n\n"
content += f"This section covers {section.replace('-', ' ')} functionality in {service_name}.\n\n"
content += "## Documentation Access\n\n"
content += f"To access the complete {service_name} documentation:\n\n"
content += f"1. Visit [{service_name}]({base_url.replace('/docs/1.0', '')})\n"
content += "2. Sign in to your account\n"
content += "3. Navigate to the documentation section\n\n"
content += "## Common Use Cases\n\n"
if service == "vapor":
if section == "getting-started":
content += "- Setting up serverless Laravel applications\n"
content += "- Configuring AWS Lambda deployment\n"
elif section == "projects":
content += "- Creating and managing Vapor projects\n"
content += "- Environment configuration\n"
elif section == "deployments":
content += "- Deploying Laravel applications to AWS Lambda\n"
content += "- Managing deployment rollbacks\n"
elif service == "envoyer":
if section == "getting-started":
content += "- Setting up zero-downtime deployment\n"
content += "- Connecting your repositories\n"
elif section == "projects":
content += "- Creating deployment projects\n"
content += "- Managing project settings\n"
elif section == "deployments":
content += "- Configuring deployment hooks\n"
content += "- Managing deployment history\n"
content += f"\n*For detailed information, please visit the official {service_name} documentation.*\n"
with open(section_file, 'w', encoding='utf-8') as f:
f.write(content)
# Save metadata
metadata = {
"service": service,
"fetched_sections": sections,
"total_sections": len(sections),
"success_rate": 1.0,
"type": "placeholder"
}
self.save_cache_metadata(service, metadata)
logger.info(f"Created placeholder documentation for {service} with {len(sections)} sections")
return True
def _fetch_github_documentation(self, service: str, config: Dict, target_dir: Path) -> bool:
"""Fetch documentation from GitHub repositories."""
repo = config["repo"]
branch = config.get("branch", "main")
# Use similar logic to the main DocsUpdater but for external repos
archive_url = f"https://github.com/{repo}/archive/refs/heads/{branch}.zip"
try:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
zip_path = temp_path / f"{service}_docs.zip"
logger.debug(f"Downloading {service} documentation from {archive_url}")
content_bytes = self._retry_request(archive_url)
with open(zip_path, 'wb') as out_file:
out_file.write(content_bytes)
# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_path)
# Find the extracted directory
extracted_dirs = [d for d in temp_path.iterdir() if d.is_dir() and d.name.startswith(repo.split('/')[-1])]
if not extracted_dirs:
raise FileNotFoundError(f"Could not find extracted {service} documentation directory")
extracted_dir = extracted_dirs[0]
# Clear the target directory
if target_dir.exists():
shutil.rmtree(target_dir)
target_dir.mkdir(parents=True)
# Copy documentation files
for item in extracted_dir.iterdir():
if item.is_dir():
shutil.copytree(item, target_dir / item.name)
else:
shutil.copy2(item, target_dir / item.name)
# Save metadata
metadata = {
"service": service,
"repo": repo,
"branch": branch,
"fetch_method": "github_archive"
}
self.save_cache_metadata(service, metadata)
logger.info(f"Successfully fetched GitHub documentation for {service}")
return True
except Exception as e:
logger.error(f"Error fetching GitHub documentation for {service}: {str(e)}")
return False
def _process_service_html(self, html_content: str, service: str, section: str) -> str:
"""
Process HTML content from Laravel services to extract documentation.
Extracts main content from HTML and converts to markdown-like format.
"""
# Basic HTML content extraction
processed_content = f"# {service.title()} - {section.title()}\n\n"
processed_content += f"*Source: {self.laravel_services[service]['base_url']}/{section}*\n\n"
processed_content += "---\n\n"
try:
# Try to extract meaningful content from HTML
content_text = self._extract_html_content(html_content)
if len(content_text.strip()) > 100: # Simplified validation for testing
processed_content += content_text
else:
# Log warning but don't create fake content
logger.warning(f"Content extraction failed for {service}/{section} - content too short or invalid")
processed_content += f"*Content extraction failed for {service.title()} {section}.*\n"
processed_content += "*This may indicate a URL redirect or parsing issue.*\n\n"
processed_content += "*Please visit the official documentation at the source URL above.*\n\n"
processed_content += f"<!-- Content length: {len(content_text.strip()) if content_text else 0} characters -->\n"
except Exception as e:
logger.warning(f"Error processing HTML content for {service}/{section}: {str(e)}")
processed_content += f"*Content processing error: {str(e)}*\n\n"
processed_content += "*Please visit the official documentation at the source URL above.*\n\n"
return processed_content
def _extract_html_content(self, html_content: str) -> str:
"""
Extract readable content from HTML using markdownify.
Simplified approach that leverages markdownify's built-in HTML parsing.
"""
try:
from bs4 import BeautifulSoup
except ImportError:
logger.warning("BeautifulSoup not installed, using simple extraction")
# Fallback to just converting the entire HTML
text_content = self._html_to_text(html_content)
return text_content[:10000] if len(text_content) > 10000 else text_content
# Parse HTML with BeautifulSoup for better content extraction
soup = BeautifulSoup(html_content, 'html.parser')
# Try to find main content areas
content_areas = [
soup.find('main'),
soup.find('article'),
soup.find('div', class_=re.compile(r'prose|content|documentation|markdown', re.I)),
soup.find('div', id=re.compile(r'content|docs|documentation', re.I)),
soup.find('section', class_=re.compile(r'content|docs', re.I)),
]
# Use the first valid content area found
content_html = None
for area in content_areas:
if area and len(str(area)) > 200: # Ensure it has substantial content
content_html = str(area)
break
# If no specific content area found, use the body
if not content_html:
body = soup.find('body')
content_html = str(body) if body else html_content
# Convert to markdown
text_content = self._html_to_text(content_html)
# Limit length to prevent extremely long outputs
if len(text_content) > 10000:
text_content = text_content[:10000] + "\n\n*[Content truncated for length]*"
return text_content
def _is_valid_content(self, content: str, service: str, section: str) -> bool:
"""
Validate that extracted content is actually documentation.
Enhanced version with better quality scoring.
Args:
content: Extracted text content
service: Service name (forge, vapor, etc.)
section: Section name
Returns:
True if content appears to be valid documentation
"""
if not content or len(content.strip()) < 200:
return False
# Check for service-specific keywords that indicate real documentation
service_keywords = {
"forge": ["server", "deployment", "laravel", "forge", "provision", "ssh", "nginx", "database"],
"vapor": ["serverless", "lambda", "aws", "vapor", "deployment", "environment", "queue", "cache"],
"envoyer": ["zero downtime", "deployment", "envoyer", "rollback", "hook", "notification", "repository"],
"nova": ["admin", "resource", "nova", "eloquent", "dashboard", "field", "filter", "lens", "action", "metric"]
}
# Enhanced section-specific keywords
section_keywords = {
"introduction": ["overview", "getting started", "what is", "welcome"],
"installation": ["install", "composer", "requirements", "setup"],
"deployment": ["deploy", "release", "production", "build"],
"backups": ["backup", "database", "restore", "snapshot"],
"ssl": ["certificate", "https", "tls", "ssl", "encryption"],
"authentication": ["auth", "login", "user", "password", "token"],
"configuration": ["config", "settings", "environment", "env"],
"database": ["mysql", "postgresql", "migration", "schema", "query"]
}
content_lower = content.lower()
# Quality scoring system
quality_score = 0
# Service keyword matching (higher weight)
service_matches = 0
if service in service_keywords:
service_matches = sum(1 for keyword in service_keywords[service] if keyword in content_lower)
quality_score += service_matches * 2
# Section keyword matching
section_base = section.split('/')[-1].replace('-', ' ')
if section_base in section_keywords:
section_matches = sum(1 for keyword in section_keywords[section_base] if keyword in content_lower)
quality_score += section_matches
# Generic documentation indicators
doc_indicators = [
"documentation", "guide", "tutorial", "reference", "api", "configuration",
"deploy", "server", "application", "framework", "laravel", "php",
"example", "usage", "method", "class", "function", "parameter"
]
doc_matches = sum(1 for indicator in doc_indicators if indicator in content_lower)
quality_score += doc_matches
# Structural indicators (signs of well-structured documentation)
structure_indicators = ["# ", "## ", "### ", "```", "**", "*", "1.", "2.", "-", "•"]
structure_matches = sum(1 for indicator in structure_indicators if indicator in content)
quality_score += min(structure_matches, 5) # Cap at 5 to avoid over-weighting
# Negative indicators (things that suggest this isn't documentation)
bad_indicators = [
"search...", "⌘k", "dashboard", "login", "sign in", "register",
"404", "not found", "error", "loading...", "please wait",
"window.location", "redirect", "javascript:", "mailto:",
"cookie", "privacy policy", "terms of service"
]
bad_matches = sum(1 for indicator in bad_indicators if indicator in content_lower)
quality_score -= bad_matches * 2
# Length bonus (longer content is generally better documentation)
if len(content) > 1000:
quality_score += 2
elif len(content) > 500:
quality_score += 1
# Content must have a minimum quality score to be considered valid
min_score = 3
is_valid = quality_score >= min_score
if not is_valid:
logger.debug(f"Content validation failed for {service}/{section}: score {quality_score} < {min_score}")
return is_valid
def _html_to_text(self, html_content: str) -> str:
"""
Convert HTML to Markdown using markdownify.
"""
try:
from markdownify import markdownify as md
from bs4 import BeautifulSoup
except ImportError:
logger.error("markdownify not installed. Please install it with: pip install markdownify")
# Fallback to basic conversion
return re.sub(r'<[^>]+>', '', html_content)
# Parse HTML and remove script and style elements completely
soup = BeautifulSoup(html_content, 'html.parser')
# Remove all script and style tags and their contents
for tag in soup(['script', 'style']):
tag.decompose()
# Get the cleaned HTML
cleaned_html = str(soup)
# Convert HTML to Markdown with specific options
markdown = md(
cleaned_html,
strip=['nav', 'header', 'footer', 'aside', 'meta', 'link'],
heading_style='ATX', # Use # style headings
bullets='-', # Use - for unordered lists
code_language='', # Don't assume code language
escape_asterisks=False, # Don't escape asterisks
escape_underscores=False, # Don't escape underscores
escape_misc=False, # Don't escape other special chars
autolinks=True, # Convert URLs to links automatically
)
# Post-process to handle CloudFlare email protection links
# Replace all email protection links with [email protected]
markdown = re.sub(
r'\[\[email protected\]\]\(/cdn-cgi/l/email-protection#[a-f0-9]+\)',
'[email protected]',
markdown
)
# Handle Support links with email protection
markdown = re.sub(
r'\[Support\]\(/cdn-cgi/l/email-protection[^)]+\)',
'Support',
markdown
)
# Remove any remaining inline JavaScript patterns
markdown = re.sub(r'\(self\.__next_s=self\.__next_s\|\|\[\]\)\.push[^\n]+', '', markdown)
markdown = re.sub(r'\(function\s+[a-zA-Z]\([^)]*\)\s*\{[^}]+\}\)[^\n]*', '', markdown)
# Remove CSS blocks that might have been left
markdown = re.sub(r'h1,\s*h2,\s*h3,\s*h4\s*\{[^}]+\}', '', markdown)
markdown = re.sub(r'\.[a-zA-Z0-9-]+\s*\{[^}]+\}', '', markdown)
markdown = re.sub(r'#[a-zA-Z0-9-]+\s*>\s*[^{]+\{[^}]+\}', '', markdown)
# Clean up excessive newlines
markdown = re.sub(r'\n\s*\n\s*\n', '\n\n', markdown)
return markdown.strip()
def fetch_all_services(self, force: bool = False) -> Dict[str, bool]:
"""
Fetch documentation for all configured Laravel services.
Args:
force: Force refresh even if cache is valid
Returns:
Dictionary mapping service names to success status
"""
results = {}
for service in self.laravel_services.keys():
if force or not self.is_cache_valid(service):
results[service] = self.fetch_laravel_service_docs(service)
else:
results[service] = True
logger.debug(f"Skipping {service} (cache valid)")
return results
def list_available_services(self) -> List[str]:
"""List all available Laravel services."""
return list(self.laravel_services.keys())
def get_service_info(self, service: str) -> Optional[Dict]:
"""Get information about a specific service."""
return self.laravel_services.get(service)
def _retry_request(self, url: str, headers: Optional[Dict] = None, max_retries: Optional[int] = None) -> bytes:
"""
Make a request with retry logic and exponential backoff.
Args:
url: URL to request
headers: Optional headers to include
max_retries: Override default max_retries
Returns:
Response content as bytes
Raises:
urllib.error.URLError: If all retry attempts fail
"""
if headers is None:
headers = {"User-Agent": USER_AGENT}
retries = max_retries if max_retries is not None else self.max_retries
last_exception: Optional[Union[urllib.error.HTTPError, urllib.error.URLError, Exception]] = None
for attempt in range(retries + 1):
try:
request = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(request) as response:
return response.read()
except urllib.error.HTTPError as e:
last_exception = e
if e.code == 404:
# Don't retry 404 errors
raise
elif e.code == 403 and "rate limit" in str(e.reason).lower():
# For rate limiting, wait longer
wait_time = min(300, (2 ** attempt) * 5 + random.uniform(0, 5))
logger.warning(f"Rate limited on attempt {attempt + 1}/{retries + 1}, waiting {wait_time:.1f}s")
time.sleep(wait_time)
elif e.code >= 500:
# Server errors are worth retrying
if attempt < retries:
wait_time = min(60, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Server error {e.code} on attempt {attempt + 1}/{retries + 1}, retrying in {wait_time:.1f}s")
time.sleep(wait_time)
else:
raise
else:
# Other HTTP errors shouldn't be retried
raise
except urllib.error.URLError as e:
last_exception = e
if attempt < retries:
wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Network error on attempt {attempt + 1}/{retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
time.sleep(wait_time)
else:
raise
except Exception as e:
last_exception = e
if attempt < retries:
wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Unexpected error on attempt {attempt + 1}/{retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
time.sleep(wait_time)
else:
raise
# This should never be reached, but just in case
if last_exception:
raise last_exception
else:
raise RuntimeError(f"Failed to fetch {url} after {retries + 1} attempts")
class DocsUpdater:
"""Handles downloading and updating Laravel documentation from GitHub."""
def __init__(self, target_dir: Path, version: str = DEFAULT_VERSION):
"""
Initialize the documentation updater.
Args:
target_dir: Directory where docs should be stored
version: Laravel version branch to pull documentation from (e.g., "12.x")
"""
self.target_dir = target_dir
self.version = version
self.github_api_url = GITHUB_API_URL
self.repo = LARAVEL_DOCS_REPO
# Create version-specific directory
self.version_dir = target_dir / version
self.version_dir.mkdir(parents=True, exist_ok=True)
# Create metadata directory if it doesn't exist
self.metadata_dir = self.version_dir / ".metadata"
self.metadata_dir.mkdir(exist_ok=True)
self.metadata_file = self.metadata_dir / "sync_info.json"
def get_latest_commit(self, max_retries: int = 3) -> Dict[str, str]:
"""Get information about the latest commit on the specified branch."""
logger.debug(f"Getting latest commit info for {self.repo} on branch {self.version}")
url = f"{self.github_api_url}/repos/{self.repo}/branches/{self.version}"
last_exception: Optional[Exception] = None
for attempt in range(max_retries + 1):
try:
request = urllib.request.Request(
url,
headers={
"User-Agent": USER_AGENT,
"Accept": "application/vnd.github.v3+json"
}
)
with urllib.request.urlopen(request) as response:
data = json.loads(response.read().decode())
return {
"sha": data["commit"]["sha"],
"date": data["commit"]["commit"]["committer"]["date"],
"message": data["commit"]["commit"]["message"],
"url": data["commit"]["html_url"]
}
except urllib.error.HTTPError as e:
last_exception = e
if e.code == 403 and "rate limit" in str(e.reason).lower():
if attempt < max_retries:
wait_time = min(300, (2 ** attempt) * 30)
logger.warning(f"GitHub API rate limit exceeded on attempt {attempt + 1}/{max_retries + 1}, waiting {wait_time}s")
time.sleep(wait_time)
continue
else:
logger.error("GitHub API rate limit exceeded. Try again later.")
raise
elif e.code == 404:
logger.error(f"Branch {self.version} not found in repository {self.repo}")
raise
else:
if attempt < max_retries and e.code >= 500:
wait_time = min(60, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"GitHub API error {e.code} on attempt {attempt + 1}/{max_retries + 1}, retrying in {wait_time:.1f}s")
time.sleep(wait_time)
continue
else:
logger.error(f"HTTP error {e.code}: {e.reason}")
raise
except Exception as e:
last_exception = e
if attempt < max_retries:
wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Error fetching commit info on attempt {attempt + 1}/{max_retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
time.sleep(wait_time)
else:
logger.error(f"Error fetching latest commit info: {str(e)}")
raise
# This should never be reached, but just in case
if last_exception:
raise last_exception
else:
raise RuntimeError(f"Failed to get latest commit after {max_retries + 1} attempts")
def read_local_metadata(self) -> Dict:
"""Read local metadata about the last sync."""
if not self.metadata_file.exists():
return {}
try:
with open(self.metadata_file, 'r') as f:
return json.load(f)
except Exception as e:
logger.warning(f"Error reading metadata file: {str(e)}")
return {}
def write_local_metadata(self, data: Dict) -> None:
"""Write local metadata about the current sync."""
try:
with open(self.metadata_file, 'w') as f:
json.dump(data, f, indent=2)
except Exception as e:
logger.error(f"Error writing metadata file: {str(e)}")
def download_documentation(self) -> Path:
"""
Download the Laravel documentation as a zip file.
Returns:
Path to the downloaded and extracted documentation directory
"""
logger.info(f"Downloading documentation for Laravel {self.version}")
# GitHub archive URL for the specific branch
archive_url = f"https://github.com/{self.repo}/archive/refs/heads/{self.version}.zip"
try:
# Create a temporary directory
with tempfile.TemporaryDirectory(delete=False) as temp_dir:
temp_path = Path(temp_dir)
zip_path = temp_path / "laravel_docs.zip"
# Download the zip file
logger.debug(f"Downloading from {archive_url}")
# Retry mechanism for downloading
max_retries = 3
for attempt in range(max_retries + 1):
try:
request = urllib.request.Request(
archive_url,
headers={"User-Agent": USER_AGENT}
)
with urllib.request.urlopen(request) as response, open(zip_path, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
break # Success, exit retry loop
except Exception as e:
if attempt < max_retries:
wait_time = min(30, (2 ** attempt) + random.uniform(0, 2))
logger.warning(f"Download failed on attempt {attempt + 1}/{max_retries + 1}, retrying in {wait_time:.1f}s: {str(e)}")
time.sleep(wait_time)
else:
logger.error(f"Failed to download after {max_retries + 1} attempts: {str(e)}")
raise
# Extract the zip file
logger.debug(f"Extracting archive to {temp_path}")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_path)
# Find the extracted directory (should be named like "docs-12.x")
extracted_dirs = [d for d in temp_path.iterdir() if d.is_dir() and (d.name.startswith(f"{self.repo.split('/')[-1]}-"))]
if not extracted_dirs:
raise FileNotFoundError("Could not find extracted documentation directory")
extracted_dir = extracted_dirs[0]
logger.debug(f"Found extracted directory: {extracted_dir}")
# Return the directory containing markdown files
return extracted_dir
except Exception as e:
logger.error(f"Error downloading documentation: {str(e)}")
raise
def needs_update(self) -> bool:
"""Check if documentation needs to be updated based on remote commits."""
try:
# Get the latest commit info
latest_commit = self.get_latest_commit()
# Get local metadata
local_meta = self.read_local_metadata()
# Check if we already have the latest version
if local_meta.get("version") == self.version and local_meta.get("commit_sha") == latest_commit["sha"]:
logger.debug("Documentation is already up to date.")
return False
# If we reach here, an update is needed
return True
except Exception as e:
logger.error(f"Error checking for updates: {str(e)}")
logger.info("Assuming update is needed due to error")
return True
def update(self, force: bool = False) -> bool:
"""
Update the documentation if needed or if forced.
Args:
force: Force update even if already up to date
Returns:
True if update was performed, False otherwise
"""
if not force and not self.needs_update():
return False
try:
# Get the latest commit info for metadata
latest_commit = self.get_latest_commit()
# Download the documentation
source_dir = self.download_documentation()
# Clear the version directory (except .metadata)
for item in self.version_dir.iterdir():
if item.name != ".metadata":
if item.is_dir():
shutil.rmtree(item)
else:
item.unlink()
# Copy files to the version directory
for item in source_dir.iterdir():
if item.is_dir():
shutil.copytree(item, self.version_dir / item.name)
else:
shutil.copy2(item, self.version_dir / item.name)
# Update metadata
metadata = {
"version": self.version,
"commit_sha": latest_commit["sha"],
"commit_date": latest_commit["date"],
"commit_message": latest_commit["message"],
"commit_url": latest_commit["url"],
"sync_time": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
}
self.write_local_metadata(metadata)
shutil.rmtree(source_dir.parent) # Remove the temporary directory
logger.debug(f"Removed temporary directory: {source_dir.parent}")
logger.info(f"Documentation updated successfully to {self.version} ({latest_commit['sha'][:7]})")
return True
except Exception as e:
logger.error(f"Error updating documentation: {str(e)}")
raise
class CommunityPackageFetcher:
"""Handles fetching documentation from community Laravel packages."""
def __init__(self, target_dir: Path, cache_duration: int = 86400, max_retries: int = 3):
"""
Initialize the community package documentation fetcher.
Args:
target_dir: Directory where package docs should be stored
cache_duration: Cache duration in seconds (default: 24 hours)
max_retries: Maximum number of retry attempts for failed requests
"""
self.target_dir = target_dir
self.cache_duration = cache_duration
self.max_retries = max_retries
self.packages_dir = target_dir / "packages"
self.packages_dir.mkdir(parents=True, exist_ok=True)
# Community packages documentation sources
self.community_packages = {
"spatie": {
"name": "Spatie Packages",
"type": DocumentationSourceType.COMMUNITY_PACKAGE,
"base_url": "https://spatie.be/docs",
"packages": {
"laravel-permission": {
"name": "Laravel Permission",
"docs_url": "https://spatie.be/docs/laravel-permission/v6/introduction",
"version_pattern": r'/v(\d+)/',
"sections": [
"introduction", "installation-laravel", "basic-usage/basic-usage",
"basic-usage/role-permissions", "basic-usage/direct-permissions",
"basic-usage/multiple-guards", "basic-usage/teams-permissions",
"basic-usage/blade-directives", "basic-usage/artisan",
"basic-usage/middleware", "basic-usage/wildcard-permissions",
"advanced-usage/cache", "advanced-usage/extending",
"advanced-usage/exceptions", "advanced-usage/seeding",
"advanced-usage/testing", "api/models", "api/traits"
]
},
"laravel-medialibrary": {
"name": "Laravel Media Library",
"docs_url": "https://spatie.be/docs/laravel-medialibrary/v11/introduction",
"version_pattern": r'/v(\d+)/',
"sections": [
"introduction", "installation-setup", "basic-usage/associating-files",
"basic-usage/retrieving-media", "converting-images/defining-conversions",
"converting-images/retrieving-converted-images", "responsive-images",
"downloading-media/downloading-a-single-file", "advanced-usage/using-s3"
]
},
"laravel-backup": {
"name": "Laravel Backup",
"docs_url": "https://spatie.be/docs/laravel-backup/v9/introduction",
"version_pattern": r'/v(\d+)/',
"sections": [
"introduction", "installation-and-setup", "backing-up/overview",
"backing-up/events", "cleaning-up-old-backups/overview",
"sending-notifications/overview", "monitoring-health/overview"
]
}
}
},
"livewire": {
"name": "Livewire",
"type": DocumentationSourceType.COMMUNITY_PACKAGE,
"base_url": "https://livewire.laravel.com/docs",
"sections": [
"quickstart", "installation", "components", "properties", "actions",
"forms", "lifecycle-hooks", "nesting", "events", "security",
"uploads", "downloads", "validation", "pagination",
"redirecting", "wire-model", "wire-click", "wire-submit",
"wire-loading", "wire-transition", "wire-poll", "wire-init",
"wire-dirty", "wire-offline", "alpine", "morphing", "teleport",
"lazy", "locked", "computed-properties", "url", "navigate",
"offline", "testing", "troubleshooting", "javascript"
]
},
"inertia": {
"name": "Inertia.js",
"type": DocumentationSourceType.GITHUB_REPO,
"repo": "inertiajs/inertiajs.com",
"branch": "master",
"docs_path": "resources/js/Pages",
"sections": [
"how-it-works", "who-is-it-for", "the-protocol",
"server-side-setup", "client-side-setup", "pages", "responses",
"redirects", "routing", "title-and-meta", "links", "manual-visits",
"forms", "file-uploads", "validation", "shared-data", "partial-reloads",
"scroll-management", "authentication", "authorization", "csrf-protection",
"error-handling", "asset-versioning", "progress-indicators",
"remembering-state", "server-side-rendering", "testing"
]
},
"filament": {
"name": "Filament",
"type": DocumentationSourceType.COMMUNITY_PACKAGE,
"base_url": "https://filamentphp.com/docs",
"version": "3.x",
"sections": [
"panels/installation", "panels/configuration", "panels/resources/getting-started",
"panels/resources/listing-records", "panels/resources/creating-records",
"panels/resources/editing-records", "panels/resources/viewing-records",
"panels/resources/deleting-records", "panels/resources/custom-pages",
"panels/resources/relation-managers", "panels/resources/widgets",
"panels/pages", "panels/dashboard", "panels/navigation",
"panels/users", "panels/tenancy", "panels/plugins",
"forms/fields/getting-started", "forms/fields/text-input",
"forms/fields/select", "forms/fields/checkbox", "forms/fields/toggle",
"forms/fields/radio", "forms/fields/date-time-picker",
"forms/fields/file-upload", "forms/fields/rich-editor",
"forms/fields/markdown-editor", "forms/fields/repeater",
"forms/fields/builder", "forms/fields/tags-input",
"forms/fields/textarea", "forms/fields/key-value",
"forms/fields/color-picker", "forms/fields/hidden",
"forms/fields/placeholder", "forms/fields/fieldset",
"forms/layout/getting-started", "forms/layout/grid",
"forms/layout/tabs", "forms/layout/wizard",
"forms/validation", "forms/advanced",
"tables/columns/getting-started", "tables/columns/text",
"tables/columns/icon", "tables/columns/image", "tables/columns/badge",
"tables/columns/tags", "tables/columns/toggle",
"tables/filters", "tables/actions", "tables/bulk-actions",
"tables/summaries", "tables/grouping", "tables/advanced",
"actions/overview", "actions/prebuilt-actions", "actions/modals",
"notifications/overview", "notifications/sending-notifications",
"notifications/database-notifications", "widgets/overview"
]
},
"debugbar": {
"name": "Laravel Debugbar",
"type": DocumentationSourceType.COMMUNITY_PACKAGE,
"base_url": "https://laraveldebugbar.com",
"sections": [
"installation", "usage", "features", "collectors"
]
},
"ide-helper": {
"name": "Laravel IDE Helper",
"type": DocumentationSourceType.GITHUB_REPO,
"repo": "barryvdh/laravel-ide-helper",
"branch": "master",
"file": "README.md"
}
}
def get_package_cache_path(self, package: str, subpackage: Optional[str] = None) -> Path:
"""Get the cache directory path for a package."""
if subpackage:
package_dir = self.packages_dir / package / subpackage
else:
package_dir = self.packages_dir / package
package_dir.mkdir(parents=True, exist_ok=True)
return package_dir
def get_cache_metadata_path(self, package: str, subpackage: Optional[str] = None) -> Path:
"""Get the cache metadata file path for a package."""
cache_dir = self.get_package_cache_path(package, subpackage)
return cache_dir / ".metadata" / "cache.json"
def is_cache_valid(self, package: str, subpackage: Optional[str] = None) -> bool:
"""Check if the cache for a package is still valid."""
metadata_path = self.get_cache_metadata_path(package, subpackage)
if not metadata_path.exists():
return False
try:
# Use file modification time instead of stored cache_time
cache_time = metadata_path.stat().st_mtime
current_time = time.time()
if current_time - cache_time > self.cache_duration:
logger.debug(f"Cache expired for {package}/{subpackage or 'all'}")
return False
return True
except Exception as e:
logger.warning(f"Error reading cache metadata for {package}: {str(e)}")
return False
def fetch_package_docs(self, package: str, force: bool = False) -> bool:
"""
Fetch documentation for a community package.
Args:
package: Package name (spatie, livewire, inertia, filament)
force: Force refresh even if cache is valid
Returns:
True if successful, False otherwise
"""
if package not in self.community_packages:
logger.error(f"Unknown package: {package}")
return False
# Check cache validity
if not force and self.is_cache_valid(package):
logger.info(f"Using cached documentation for {package}")
return True
logger.info(f"Fetching documentation for {package}")
package_config = self.community_packages[package]
try:
if package == "spatie":
return self._fetch_spatie_docs(package_config)
elif package == "livewire":
return self._fetch_livewire_docs(package_config)
elif package == "inertia":
return self._fetch_inertia_docs(package_config)
elif package == "filament":
return self._fetch_filament_docs(package_config)
elif package == "debugbar":
return self._fetch_debugbar_docs(package_config)
elif package == "ide-helper":
return self._fetch_ide_helper_docs(package_config)
else:
logger.error(f"No fetch method implemented for package: {package}")
return False
except Exception as e:
logger.error(f"Error fetching {package} documentation: {str(e)}")
return False
def _fetch_spatie_docs(self, config: Dict) -> bool:
"""Fetch documentation for Spatie packages."""
success_count = 0
packages = config.get("packages", {})
for package_key, package_info in packages.items():
try:
logger.info(f"Fetching Spatie {package_info['name']} documentation")
package_dir = self.get_package_cache_path("spatie", package_key)
base_url = package_info["docs_url"].rsplit('/', 1)[0]
sections = package_info.get("sections", [])
fetched_sections = 0
for section in sections:
section_url = f"{base_url}/{section}"
content = self._fetch_and_process_content(section_url, "spatie", section)
if content:
# Save the processed content
file_path = package_dir / f"{section.replace('/', '-')}.md"
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
fetched_sections += 1
if fetched_sections > 0:
# Update cache metadata
metadata = {
"package": package_key,
"name": package_info['name'],
"sections_count": fetched_sections,
"base_url": base_url
}
metadata_path = self.get_cache_metadata_path("spatie", package_key)
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
success_count += 1
logger.info(f"Successfully fetched {fetched_sections} sections for Spatie {package_info['name']}")
except Exception as e:
logger.error(f"Error fetching Spatie {package_key} documentation: {str(e)}")
return success_count > 0
def _fetch_livewire_docs(self, config: Dict) -> bool:
"""Fetch Livewire documentation."""
base_url = config["base_url"]
sections = config.get("sections", [])
package_dir = self.get_package_cache_path("livewire")
fetched_sections = 0
for section in sections:
try:
section_url = f"{base_url}/{section}"
content = self._fetch_and_process_content(section_url, "livewire", section)
if content:
file_path = package_dir / f"{section}.md"
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
fetched_sections += 1
except Exception as e:
logger.warning(f"Error fetching Livewire section {section}: {str(e)}")
if fetched_sections > 0:
# Update cache metadata
metadata = {
"package": "livewire",
"name": config['name'],
"sections_count": fetched_sections,
"base_url": base_url
}
metadata_path = self.get_cache_metadata_path("livewire")
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Successfully fetched {fetched_sections} sections for Livewire")
return True
return False
def _fetch_inertia_docs(self, config: Dict) -> bool:
"""Fetch Inertia.js documentation from GitHub repository."""
repo = config["repo"]
branch = config["branch"]
docs_path = config["docs_path"]
sections = config.get("sections", [])
package_dir = self.get_package_cache_path("inertia")
fetched_sections = 0
for section in sections:
try:
# Map section names to JSX file names
jsx_filename = f"{section}.jsx"
github_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{docs_path}/{jsx_filename}"
logger.debug(f"Fetching {section} from {github_url}")
request = urllib.request.Request(
github_url,
headers={"User-Agent": "Laravel-MCP-Companion/1.0"}
)
with urllib.request.urlopen(request) as response:
jsx_content = response.read().decode('utf-8')
# Extract content from JSX file and convert to markdown
markdown_content = self._process_jsx_to_markdown(jsx_content, section)
if markdown_content:
file_path = package_dir / f"{section}.md"
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(f"# Inertia - {section.replace('-', ' ').title()}\n\n")
f.write(f"Source: https://inertiajs.com/{section}\n\n")
f.write(markdown_content)
fetched_sections += 1
logger.debug(f"Successfully processed {section}")
else:
logger.warning(f"No content extracted from {section}")
except urllib.error.HTTPError as e:
if e.code == 404:
logger.warning(f"Inertia section {section} not found (404)")
else:
logger.warning(f"HTTP error fetching Inertia section {section}: {e}")
except Exception as e:
logger.warning(f"Error fetching Inertia section {section}: {str(e)}")
if fetched_sections > 0:
# Update cache metadata
metadata = {
"package": "inertia",
"name": config['name'],
"sections_count": fetched_sections,
"source_type": "github_repo",
"repo": repo,
"branch": branch,
"docs_path": docs_path
}
metadata_path = self.get_cache_metadata_path("inertia")
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Successfully fetched {fetched_sections} sections for Inertia.js from GitHub")
return True
return False
def _process_jsx_to_markdown(self, jsx_content: str, section: str) -> Optional[str]:
"""
Process JSX content and extract documentation text to markdown.
Args:
jsx_content: Raw JSX file content
section: Section name for context
Returns:
Extracted markdown content or None if no content found
"""
try:
import re
# Remove import statements and React component structure
content = jsx_content
# Remove imports
content = re.sub(r'^import\s+.*?from\s+.*?[;\n]', '', content, flags=re.MULTILINE)
# Remove export statements
content = re.sub(r'^export\s+.*?[;\n]', '', content, flags=re.MULTILINE)
# Extract text content from JSX elements
text_content = []
# Extract headings (both standard and custom components)
heading_patterns = [
r'<[Hh]([1-6])[^>]*>(.*?)</[Hh][1-6]>', # Standard h1-h6
r'<H([1-6])[^>]*>(.*?)</H[1-6]>', # Custom H1-H6 components
]
for pattern in heading_patterns:
headings = re.findall(pattern, content, re.DOTALL)
for level, text in headings:
clean_text = self._clean_jsx_text(text)
if clean_text.strip():
text_content.append(f"{'#' * int(level)} {clean_text}\n")
# Extract paragraphs (both standard and custom components)
paragraph_patterns = [
r'<[Pp][^>]*>(.*?)</[Pp]>', # Standard p tags
r'<P[^>]*>(.*?)</P>', # Custom P components
]
for pattern in paragraph_patterns:
paragraphs = re.findall(pattern, content, re.DOTALL)
for para in paragraphs:
clean_text = self._clean_jsx_text(para)
if clean_text.strip():
text_content.append(f"{clean_text}\n")
# Extract code blocks (both standard and custom components)
code_patterns = [
r'<pre[^>]*><code[^>]*>(.*?)</code></pre>', # Standard code blocks
r'<Code[^>]*>(.*?)</Code>', # Custom Code components
r'<code[^>]*>(.*?)</code>', # Inline code
]
for pattern in code_patterns:
code_blocks = re.findall(pattern, content, re.DOTALL)
for code in code_blocks:
clean_code = self._clean_jsx_text(code)
if clean_code.strip():
if '\n' in clean_code:
text_content.append(f"```\n{clean_code}\n```\n")
else:
text_content.append(f"`{clean_code}`")
# Extract list items
list_items = re.findall(r'<li[^>]*>(.*?)</li>', content, re.DOTALL)
for item in list_items:
clean_text = self._clean_jsx_text(item)
if clean_text.strip():
text_content.append(f"- {clean_text}")
# Extract strong/bold text
strong_patterns = [
r'<strong[^>]*>(.*?)</strong>',
r'<Strong[^>]*>(.*?)</Strong>',
r'<b[^>]*>(.*?)</b>',
]
for pattern in strong_patterns:
strong_texts = re.findall(pattern, content, re.DOTALL)
for text in strong_texts:
clean_text = self._clean_jsx_text(text)
if clean_text.strip():
text_content.append(f"**{clean_text}**")
# Extract links
link_patterns = [
r'<a[^>]*href=["\']([^"\']*)["\'][^>]*>(.*?)</a>',
r'<A[^>]*href=["\']([^"\']*)["\'][^>]*>(.*?)</A>',
]
for pattern in link_patterns:
links = re.findall(pattern, content, re.DOTALL)
for href, link_text in links:
clean_text = self._clean_jsx_text(link_text)
if clean_text.strip():
text_content.append(f"[{clean_text}]({href})")
# Extract any remaining text content from string literals (but exclude JSX markup)
# Look for strings that appear to be documentation content
string_content = re.findall(r'["`\']([^"`\']{30,})["`\']', content)
for text in string_content:
# Skip if it looks like code, imports, JSX, or other non-documentation content
if not any(pattern in text.lower() for pattern in [
'import', 'export', 'from', 'require', 'function', 'const', 'let', 'var',
'===', '!==', '=>', 'return', 'props', 'component', '</', '/>', 'jsx', 'react'
]):
clean_text = text.strip()
if clean_text and len(clean_text.split()) > 5: # At least 5 words
# Don't add if it's already covered by component extraction
if not any(clean_text in existing for existing in text_content):
text_content.append(f"{clean_text}\n")
if text_content:
# Remove duplicates while preserving order
seen = set()
unique_content = []
for item in text_content:
item_clean = item.strip()
if item_clean and item_clean not in seen:
# Skip if it looks like JSX remnants
if not any(jsx_marker in item_clean for jsx_marker in ['<', '>', '{', '}', 'return (', '=>', 'export default']):
seen.add(item_clean)
unique_content.append(item)
return '\n'.join(unique_content)
else:
logger.debug(f"No extractable content found in {section} JSX file")
return None
except Exception as e:
logger.warning(f"Error processing JSX content for {section}: {str(e)}")
return None
def _clean_jsx_text(self, text: str) -> str:
"""Clean JSX text content of React syntax and HTML entities."""
import re
# Remove JSX curly braces and expressions
text = re.sub(r'\{[^}]*\}', '', text)
# Remove HTML tags
text = re.sub(r'<[^>]*>', '', text)
# Decode HTML entities
text = html.unescape(text)
# Clean up whitespace
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text
def _fetch_filament_docs(self, config: Dict) -> bool:
"""Fetch Filament documentation."""
base_url = config["base_url"]
version = config.get("version", "3.x")
sections = config.get("sections", [])
package_dir = self.get_package_cache_path("filament")
fetched_sections = 0
for section in sections:
try:
section_url = f"{base_url}/{version}/{section}"
content = self._fetch_and_process_content(section_url, "filament", section)
if content:
file_path = package_dir / f"{section.replace('/', '-')}.md"
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
fetched_sections += 1
except Exception as e:
logger.warning(f"Error fetching Filament section {section}: {str(e)}")
if fetched_sections > 0:
# Update cache metadata
metadata = {
"package": "filament",
"name": config['name'],
"version": version,
"sections_count": fetched_sections,
"base_url": base_url
}
metadata_path = self.get_cache_metadata_path("filament")
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Successfully fetched {fetched_sections} sections for Filament")
return True
return False
def _fetch_debugbar_docs(self, config: Dict) -> bool:
"""Fetch Laravel Debugbar documentation from website."""
base_url = config["base_url"]
sections = config.get("sections", [])
package_dir = self.get_package_cache_path("debugbar")
fetched_sections = 0
for section in sections:
try:
section_url = f"{base_url}/{section}/"
content = self._fetch_and_process_content(section_url, "debugbar", section)
if content:
file_path = package_dir / f"{section}.md"
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
fetched_sections += 1
except Exception as e:
logger.warning(f"Error fetching Debugbar section {section}: {str(e)}")
if fetched_sections > 0:
# Update cache metadata
metadata = {
"package": "debugbar",
"name": config['name'],
"sections_count": fetched_sections,
"base_url": base_url
}
metadata_path = self.get_cache_metadata_path("debugbar")
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Successfully fetched {fetched_sections} sections for Laravel Debugbar")
return True
return False
def _fetch_ide_helper_docs(self, config: Dict) -> bool:
"""Fetch Laravel IDE Helper documentation from GitHub README."""
repo = config["repo"]
branch = config.get("branch", "master")
file = config.get("file", "README.md")
package_dir = self.get_package_cache_path("ide-helper")
try:
# Fetch README from GitHub
github_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{file}"
logger.info(f"Fetching IDE Helper documentation from {github_url}")
request = urllib.request.Request(
github_url,
headers={"User-Agent": USER_AGENT}
)
with urllib.request.urlopen(request) as response:
content = response.read().decode('utf-8')
# Process the README content
if content:
# Add header
header = f"# {config['name']}\n\n"
header += f"Source: https://github.com/{repo}\n\n"
header += "---\n\n"
# Save the processed content
file_path = package_dir / "readme.md"
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(header + content)
# Update cache metadata
metadata = {
"package": "ide-helper",
"name": config['name'],
"source_type": "github_readme",
"repo": repo,
"branch": branch,
"file": file
}
metadata_path = self.get_cache_metadata_path("ide-helper")
metadata_path.parent.mkdir(parents=True, exist_ok=True)
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info("Successfully fetched documentation for Laravel IDE Helper")
return True
except urllib.error.HTTPError as e:
if e.code == 404:
logger.error(f"IDE Helper README not found at {github_url}")
else:
logger.error(f"HTTP error fetching IDE Helper documentation: {e}")
except Exception as e:
logger.error(f"Error fetching IDE Helper documentation: {str(e)}")
return False
def _fetch_and_process_content(self, url: str, package: str, section: str) -> Optional[str]:
"""Fetch and process content from a URL."""
try:
# Use markdownify for HTML to Markdown conversion
from markdownify import markdownify as md
from bs4 import BeautifulSoup
request = urllib.request.Request(
url,
headers={
"User-Agent": USER_AGENT,
"Accept": "text/html,application/xhtml+xml"
}
)
with urllib.request.urlopen(request, timeout=30) as response:
content_bytes = response.read()
content = content_bytes.decode('utf-8')
# Parse with BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')
# Remove navigation, header, footer elements
for tag in soup.find_all(['nav', 'header', 'footer', 'aside']):
tag.decompose()
# Remove stats/metrics elements that contain dynamic numbers
# For Spatie packages, remove elements that contain download counts and issue numbers
if package == "spatie":
# Find and remove the stats section that appears at the top of Spatie docs
# This typically contains Repository, Open Issues, and download counts
stats_removed = False
# Look for the pattern: Repository -> Open Issues -> large numbers
for element in soup.find_all(string=re.compile(r'^Repository$', re.I)):
# Find the container that holds this stats section
container = element.parent
while container and container.name not in ['body', 'html', 'main', 'article']:
# Check if this container has "Open Issues" and large numbers
text_content = container.get_text()
if 'Open Issues' in text_content and re.search(r'\d{3,}', text_content):
# This looks like the stats container
container.decompose()
stats_removed = True
logger.debug(f"Removed stats container from {url}")
break
# Try parent container
if container.parent and container.parent.name in ['div', 'section', 'aside', 'header']:
container = container.parent
else:
break
if stats_removed:
break
# Also remove any standalone large numbers that might be stats
for tag in soup.find_all(string=re.compile(r'^\s*[\d,]+\s*$')):
if tag.parent:
num_str = str(tag).strip().replace(',', '')
try:
# Remove numbers larger than 1000 (likely stats, not code examples)
if num_str.isdigit() and int(num_str) > 1000:
# Don't remove if it's inside a code block
if not any(p.name in ['code', 'pre'] for p in tag.parents):
tag.parent.extract()
except Exception:
pass
# Remove any divs or sections that look like stats containers
for tag in soup.find_all(['div', 'section'], class_=re.compile(r'stats|metrics|numbers|count', re.I)):
tag.decompose()
# Try to find main content area
main_content = None
# Package-specific selectors
if package == "inertia":
# Inertia uses div with id="top" for main content
main_content = soup.find('div', id='top')
if not main_content:
logger.debug(f"Could not find #top div for Inertia on {url}")
elif package == "filament":
# Filament might use different selectors
main_content = soup.select_one('.docs-content, .prose, main')
elif package == "debugbar":
# Debugbar documentation site selectors
main_content = soup.select_one('.prose, .content, main, article')
# If no package-specific selector worked, try common selectors
if not main_content:
content_selectors = [
'#top', # Try #top first as it seems common
'main', 'article', '[role="main"]', '.content', '.docs-content',
'.documentation', '#content', '.prose', '.markdown-body'
]
for selector in content_selectors:
main_content = soup.select_one(selector)
if main_content:
logger.debug(f"Found content using selector: {selector}")
break
if not main_content:
logger.warning(f"Could not find main content area for {url}, using body")
main_content = soup.find('body') or soup
# Convert to markdown
markdown_content = md(str(main_content), strip=['a'], code_language='php')
# Clean up the content
markdown_content = self._clean_markdown_content(markdown_content)
# Check if we got any actual content
if len(markdown_content.strip()) < 50:
logger.warning(f"Very little content extracted from {url} (len: {len(markdown_content.strip())})")
# Log first 200 chars of HTML to debug
html_preview = str(main_content)[:500]
logger.debug(f"HTML preview: {html_preview}")
# Add metadata header
header = f"# {package.title()} - {section.replace('-', ' ').title()}\n\n"
header += f"Source: {url}\n\n"
return header + markdown_content
except Exception as e:
logger.error(f"Error fetching content from {url}: {str(e)}")
return None
def _clean_markdown_content(self, content: str) -> str:
"""Clean up markdown content."""
# Remove excessive blank lines
content = re.sub(r'\n{3,}', '\n\n', content)
# Fix code blocks
content = re.sub(r'```\s*\n', '```\n', content)
# Remove CloudFlare email protection links
content = re.sub(
r'\[\[email protected\]\]\(/cdn-cgi/l/email-protection#[a-f0-9]+\)',
'[email protected]',
content
)
# Remove trailing whitespace
content = '\n'.join(line.rstrip() for line in content.split('\n'))
return content.strip()
def list_available_packages(self) -> List[str]:
"""List all available community packages."""
return list(self.community_packages.keys())
def fetch_all_packages(self, force: bool = False) -> Dict[str, bool]:
"""
Fetch documentation for all community packages.
Args:
force: Force refresh even if cache is valid
Returns:
Dictionary mapping package names to success status
"""
results = {}
for package in self.list_available_packages():
logger.info(f"Processing community package: {package}")
results[package] = self.fetch_package_docs(package, force=force)
# Log summary
success_count = sum(1 for success in results.values() if success)
total_count = len(results)
logger.info(f"Community package documentation update complete: {success_count}/{total_count} packages")
return results
class MultiSourceDocsUpdater:
"""Handles updating documentation from multiple sources including core Laravel and external services."""
def __init__(self, target_dir: Path, version: str = DEFAULT_VERSION):
"""
Initialize the multi-source documentation updater.
Args:
target_dir: Directory where all docs should be stored
version: Laravel version for core documentation
"""
self.target_dir = target_dir
self.version = version
# Initialize core Laravel docs updater
self.core_updater = DocsUpdater(target_dir, version)
# Initialize external docs fetcher
self.external_fetcher = ExternalDocsFetcher(target_dir)
# Initialize community package fetcher
self.package_fetcher = CommunityPackageFetcher(target_dir)
def update_core_docs(self, force: bool = False) -> bool:
"""Update core Laravel documentation."""
logger.info("Updating core Laravel documentation")
return self.core_updater.update(force=force)
def update_external_docs(self, services: Optional[List[str]] = None, force: bool = False) -> Dict[str, bool]:
"""
Update external Laravel services documentation.
Args:
services: List of specific services to update. If None, updates all.
force: Force refresh even if cache is valid
Returns:
Dictionary mapping service names to success status
"""
logger.info("Updating external Laravel services documentation")
if services:
results = {}
for service in services:
if service in self.external_fetcher.laravel_services:
results[service] = self.external_fetcher.fetch_laravel_service_docs(service)
else:
logger.error(f"Unknown service: {service}")
results[service] = False
return results
else:
return self.external_fetcher.fetch_all_services(force=force)
def update_package_docs(self, packages: Optional[List[str]] = None, force: bool = False) -> Dict[str, bool]:
"""
Update community package documentation.
Args:
packages: List of specific packages to update. If None, updates all.
force: Force refresh even if cache is valid
Returns:
Dictionary mapping package names to success status
"""
logger.info("Updating community package documentation")
if packages:
results = {}
for package in packages:
if package in self.package_fetcher.community_packages:
results[package] = self.package_fetcher.fetch_package_docs(package, force=force)
else:
logger.error(f"Unknown package: {package}")
results[package] = False
return results
else:
return self.package_fetcher.fetch_all_packages(force=force)
def update_all(self, force_core: bool = False, force_external: bool = False, force_packages: bool = False) -> Dict[str, object]:
"""
Update all documentation sources.
Args:
force_core: Force update of core documentation
force_external: Force update of external documentation
force_packages: Force update of community packages
Returns:
Dictionary with results for core, external, and package updates
"""
logger.info("Starting comprehensive documentation update")
results: Dict[str, object] = {
"core": False,
"external": {},
"packages": {}
}
try:
# Update core Laravel documentation
results["core"] = self.update_core_docs(force=force_core)
# Update external services documentation
results["external"] = self.update_external_docs(force=force_external)
# Update community package documentation
results["packages"] = self.update_package_docs(force=force_packages)
# Log summary
core_status = "updated" if results["core"] else "up-to-date"
external_results = results["external"]
package_results = results["packages"]
if isinstance(external_results, dict):
external_count = sum(1 for success in external_results.values() if success)
total_external = len(external_results)
else:
external_count = 0
total_external = 0
if isinstance(package_results, dict):
package_count = sum(1 for success in package_results.values() if success)
total_packages = len(package_results)
else:
package_count = 0
total_packages = 0
logger.info(f"Documentation update complete: Core {core_status}, External {external_count}/{total_external} services, Packages {package_count}/{total_packages}")
except Exception as e:
logger.error(f"Error during comprehensive documentation update: {str(e)}")
return results
def get_all_documentation_status(self) -> Dict[str, Dict]:
"""Get status information for all documentation sources."""
status: Dict[str, Dict] = {
"core": {},
"external": {},
"packages": {}
}
# Get core documentation status
try:
core_metadata = self.core_updater.read_local_metadata()
status["core"] = {
"version": self.version,
"available": bool(core_metadata),
"last_updated": core_metadata.get("sync_time", "unknown"),
"commit_sha": core_metadata.get("commit_sha", "unknown")
}
except Exception as e:
status["core"] = {"error": str(e)}
# Get external documentation status
for service in self.external_fetcher.list_available_services():
try:
cache_valid = self.external_fetcher.is_cache_valid(service)
service_info = self.external_fetcher.get_service_info(service)
if service_info is None:
continue
# Try to read cache metadata
metadata_path = self.external_fetcher.get_cache_metadata_path(service)
if metadata_path.exists():
try:
with open(metadata_path, 'r') as f:
metadata = json.load(f)
except Exception:
metadata = {}
else:
metadata = {}
status["external"][service] = {
"name": service_info.get("name", service),
"type": service_info.get("type", "unknown").value if hasattr(service_info.get("type"), 'value') else str(service_info.get("type", "unknown")),
"cache_valid": cache_valid,
"last_fetched": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(metadata_path.stat().st_mtime)) if metadata_path.exists() else "never",
"success_rate": metadata.get("success_rate", "unknown")
}
except Exception as e:
status["external"][service] = {"error": str(e)}
# Get community package documentation status
for package in self.package_fetcher.list_available_packages():
try:
cache_valid = self.package_fetcher.is_cache_valid(package)
package_info = self.package_fetcher.community_packages.get(package, {})
# Try to read cache metadata
metadata_path = self.package_fetcher.get_cache_metadata_path(package)
if metadata_path.exists():
try:
with open(metadata_path, 'r') as f:
metadata = json.load(f)
except Exception:
metadata = {}
else:
metadata = {}
# Get the type value safely
package_type = package_info.get("type", DocumentationSourceType.COMMUNITY_PACKAGE)
if hasattr(package_type, 'value'):
type_value = package_type.value
else:
type_value = str(package_type) if package_type else "community_package"
status["packages"][package] = {
"name": package_info.get("name", package),
"type": type_value,
"cache_valid": cache_valid,
"last_fetched": metadata.get("cache_time", "never"),
"sections_count": metadata.get("sections_count", 0)
}
# For Spatie, include sub-packages
if package == "spatie" and "packages" in package_info:
status["packages"][package]["sub_packages"] = {}
packages_dict = package_info.get("packages", {})
if isinstance(packages_dict, dict):
for sub_pkg, sub_info in packages_dict.items():
sub_metadata_path = self.package_fetcher.get_cache_metadata_path("spatie", sub_pkg)
if sub_metadata_path.exists():
try:
with open(sub_metadata_path, 'r') as f:
sub_metadata = json.load(f)
status["packages"][package]["sub_packages"][sub_pkg] = {
"name": sub_info.get("name", sub_pkg),
"sections_count": sub_metadata.get("sections_count", 0)
}
except Exception:
pass
except Exception as e:
status["packages"][package] = {"error": str(e)}
return status
def needs_update(self, check_external: bool = True, check_packages: bool = True) -> Dict[str, Union[bool, Dict[str, bool]]]:
"""
Check which documentation sources need updating.
Args:
check_external: Whether to check external services
check_packages: Whether to check community packages
Returns:
Dictionary indicating which sources need updates
"""
needs_update: Dict[str, Union[bool, Dict[str, bool]]] = {
"core": False,
"external": {},
"packages": {}
}
# Check core documentation
try:
needs_update["core"] = self.core_updater.needs_update()
except Exception as e:
logger.warning(f"Error checking core documentation update status: {str(e)}")
needs_update["core"] = True
# Check external documentation
if check_external:
external_dict = needs_update["external"]
if isinstance(external_dict, dict):
for service in self.external_fetcher.list_available_services():
try:
external_dict[service] = not self.external_fetcher.is_cache_valid(service)
except Exception as e:
logger.warning(f"Error checking {service} documentation status: {str(e)}")
external_dict[service] = True
# Check community package documentation
if check_packages:
packages_dict = needs_update["packages"]
if isinstance(packages_dict, dict):
for package in self.package_fetcher.list_available_packages():
try:
packages_dict[package] = not self.package_fetcher.is_cache_valid(package)
except Exception as e:
logger.warning(f"Error checking {package} documentation status: {str(e)}")
packages_dict[package] = True
return needs_update
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Laravel Documentation Updater"
)
parser.add_argument(
"--target-dir",
type=str,
default="./docs",
help="Path to store documentation (default: ./docs)"
)
parser.add_argument(
"--version",
type=str,
default=DEFAULT_VERSION,
help=f"Laravel version branch to use (default: {DEFAULT_VERSION}). Supported: {', '.join(SUPPORTED_VERSIONS)}"
)
parser.add_argument(
"--all-versions",
action="store_true",
help="Update documentation for all supported versions"
)
parser.add_argument(
"--force",
action="store_true",
help="Force update even if already up to date"
)
parser.add_argument(
"--check-only",
action="store_true",
help="Only check if update is needed, don't perform update"
)
parser.add_argument(
"--update",
action="store_true",
help="Update all documentation (Laravel core, services, and community packages)"
)
parser.add_argument(
"--external-only",
action="store_true",
help="Only update external Laravel services documentation (deprecated: use --update)"
)
parser.add_argument(
"--core-only",
action="store_true",
help="Only update core Laravel documentation (deprecated: use --update)"
)
parser.add_argument(
"--packages-only",
action="store_true",
help="Only update community package documentation (deprecated: use --update)"
)
parser.add_argument(
"--services",
type=str,
nargs="+",
help="Specific Laravel services to update (deprecated: use --update)"
)
parser.add_argument(
"--packages",
type=str,
nargs="+",
help="Specific community packages to update (deprecated: use --update)"
)
parser.add_argument(
"--list-services",
action="store_true",
help="List all available Laravel services"
)
parser.add_argument(
"--list-packages",
action="store_true",
help="List all available community packages"
)
parser.add_argument(
"--status",
action="store_true",
help="Show status of all documentation sources"
)
parser.add_argument(
"--log-level",
type=str,
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
default="INFO",
help="Logging level (default: INFO)"
)
return parser.parse_args()
def update_version(target_dir: Path, version: str, force: bool, check_only: bool) -> tuple[bool, bool]:
"""Update documentation for a single version.
Returns:
(success, updated): success indicates if operation completed without error,
updated indicates if files were actually updated
"""
try:
updater = DocsUpdater(target_dir, version)
if check_only:
needs_update = updater.needs_update()
logger.info(f"Version {version}: {'needs' if needs_update else 'does not need'} updating.")
return True, needs_update
else:
updated = updater.update(force=force)
if updated:
logger.info(f"Version {version}: Updated successfully")
else:
logger.info(f"Version {version}: Already up to date")
return True, updated
except Exception as e:
logger.error(f"Version {version}: Update failed - {str(e)}")
return False, False
def handle_update_command(args, updater):
"""Handle the unified --update command - updates everything."""
# Just update all documentation
results = updater.update_all(force_core=args.force, force_external=args.force, force_packages=args.force)
core_success = results["core"]
external_results = results["external"]
package_results = results.get("packages", {})
external_success_count = sum(1 for success in external_results.values() if success)
external_total = len(external_results)
package_success_count = sum(1 for success in package_results.values() if success)
package_total = len(package_results)
logger.info(f"Complete documentation update: Core {'successful' if core_success else 'failed'}, External {external_success_count}/{external_total}, Packages {package_success_count}/{package_total}")
# Return success if core succeeded and at least some external services/packages succeeded
overall_success = core_success and ((external_success_count > 0 or external_total == 0) or (package_success_count > 0 or package_total == 0))
return 0 if overall_success else 1
def main():
"""Main entry point for the Laravel Docs Updater."""
args = parse_arguments()
# Set logging level
logger.setLevel(getattr(logging, args.log_level))
# Create target directory if it doesn't exist
target_dir = Path(args.target_dir).resolve()
target_dir.mkdir(parents=True, exist_ok=True)
# Initialize multi-source updater
updater = MultiSourceDocsUpdater(target_dir, args.version)
try:
# Handle list services command
if args.list_services:
services = updater.external_fetcher.list_available_services()
print("Available Laravel Services:")
for service in services:
info = updater.external_fetcher.get_service_info(service)
print(f" {service}: {info.get('name', service)}")
return 0
# Handle list packages command
if args.list_packages:
packages = updater.package_fetcher.list_available_packages()
print("Available Community Packages:")
for package in packages:
info = updater.package_fetcher.community_packages.get(package, {})
print(f" {package}: {info.get('name', package)}")
# Show sub-packages for Spatie
if package == "spatie" and "packages" in info:
for sub_pkg, sub_info in info["packages"].items():
print(f" - {sub_pkg}: {sub_info.get('name', sub_pkg)}")
return 0
# Handle status command
if args.status:
status = updater.get_all_documentation_status()
print("Documentation Status:")
print(f"\nCore Laravel Documentation ({args.version}):")
core_status = status["core"]
if "error" in core_status:
print(f" Error: {core_status['error']}")
else:
print(f" Available: {core_status.get('available', False)}")
print(f" Last Updated: {core_status.get('last_updated', 'unknown')}")
print(f" Commit: {core_status.get('commit_sha', 'unknown')[:7]}")
print("\nExternal Services:")
for service, info in status["external"].items():
if "error" in info:
print(f" {service}: Error - {info['error']}")
else:
print(f" {service} ({info.get('name', service)}):")
print(f" Cache Valid: {info.get('cache_valid', False)}")
print(f" Type: {info.get('type', 'unknown')}")
if info.get('success_rate') != 'unknown':
print(f" Success Rate: {info.get('success_rate', 'unknown'):.1%}")
# Show auto-discovery status if available
metadata_path = updater.external_fetcher.get_cache_metadata_path(service)
if metadata_path.exists():
try:
import json
with open(metadata_path, 'r') as f:
metadata = json.load(f)
if metadata.get('auto_discovery_enabled'):
discovery_method = metadata.get('discovery_method', 'unknown')
discovered_count = metadata.get('discovered_count', 0)
print(f" Auto-Discovery: ✅ {discovery_method} ({discovered_count} sections)")
if metadata.get('manual_fallback'):
print(" Fallback: Used manual configuration (auto-discovery failed)")
else:
print(" Auto-Discovery: ❌ disabled (using manual configuration)")
except Exception:
pass
print("\nCommunity Packages:")
for package, info in status["packages"].items():
if "error" in info:
print(f" {package}: Error - {info['error']}")
else:
print(f" {package} ({info.get('name', package)}):")
print(f" Cache Valid: {info.get('cache_valid', False)}")
print(f" Sections: {info.get('sections_count', 0)}")
# Show sub-packages for Spatie
if package == "spatie" and "sub_packages" in info:
print(" Sub-packages:")
for sub_pkg, sub_info in info["sub_packages"].items():
print(f" - {sub_info.get('name', sub_pkg)}: {sub_info.get('sections_count', 0)} sections")
return 0
# Validate version if not updating all
if not args.all_versions and args.version not in SUPPORTED_VERSIONS:
logger.error(f"Unsupported version: {args.version}. Supported versions: {', '.join(SUPPORTED_VERSIONS)}")
return 1
# Handle check-only command
if args.check_only:
needs_update = updater.needs_update()
print("Update Status:")
print(f"Core Laravel ({args.version}): {'needs update' if needs_update['core'] else 'up to date'}")
print("External Services:")
for service, needs in needs_update["external"].items():
print(f" {service}: {'needs update' if needs else 'up to date'}")
print("Community Packages:")
for package, needs in needs_update["packages"].items():
print(f" {package}: {'needs update' if needs else 'up to date'}")
# Return 1 if any updates needed, 0 if all up to date
any_needs_update = needs_update["core"] or any(needs_update["external"].values()) or any(needs_update["packages"].values())
return 1 if any_needs_update else 0
# Handle new unified --update parameter
if args.update:
return handle_update_command(args, updater)
# Handle deprecated parameters - all now just update everything
if args.external_only or args.core_only or args.packages_only or args.packages or args.services:
logger.warning("Deprecated parameter used. Please use --update instead.")
return handle_update_command(args, updater)
# Handle specific update modes (deprecated but still supported)
if args.all_versions:
# Update all supported versions (core only)
all_success = True
for version in SUPPORTED_VERSIONS:
logger.info(f"Processing version {version}...")
version_updater = MultiSourceDocsUpdater(target_dir, version)
success = version_updater.update_core_docs(force=args.force)
if not success:
all_success = False
return 0 if all_success else 1
else:
# Default: update all (core, external services, and packages)
results = updater.update_all(force_core=args.force, force_external=args.force, force_packages=args.force)
core_success = results["core"]
external_results = results["external"]
package_results = results.get("packages", {})
external_success_count = sum(1 for success in external_results.values() if success)
external_total = len(external_results)
package_success_count = sum(1 for success in package_results.values() if success)
package_total = len(package_results)
logger.info(f"Complete documentation update: Core {'successful' if core_success else 'failed'}, External {external_success_count}/{external_total}, Packages {package_success_count}/{package_total}")
# Return success if core succeeded and at least some external services/packages succeeded
overall_success = core_success and ((external_success_count > 0 or external_total == 0) or (package_success_count > 0 or package_total == 0))
return 0 if overall_success else 1
except KeyboardInterrupt:
logger.info("Operation cancelled by user")
return 130
if __name__ == "__main__":
sys.exit(main())