"""Parser for PrestaShop component documentation files."""
import re
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from .base_parser import BaseParser
from ..config import DOCS_PATH, DOC_TYPES
class ComponentParser(BaseParser):
"""Parser for PrestaShop component documentation.
Handles:
- Form type references (form/types-reference/)
- Grid column references (grid/columns-reference/)
- Grid action references (grid/actions-reference/)
- Grid bulk action references (grid/bulk-actions-reference/)
- Grid filter type references (grid/filter-types-reference/)
"""
# Component directories to detect
COMPONENT_PATHS = [
"components/form/types-reference",
"components/grid/columns-reference",
"components/grid/actions-reference",
"components/grid/bulk-actions-reference",
"components/grid/filter-types-reference",
]
def can_parse(self, file_path: Path, frontmatter: Dict) -> bool:
"""Check if this is a component reference file.
Args:
file_path: Path to the file
frontmatter: Extracted YAML frontmatter
Returns:
True if this is a component documentation file
"""
path_str = str(file_path)
# Check if file is in any component directory
for comp_path in self.COMPONENT_PATHS:
if comp_path in path_str:
# Skip _index.md files
if file_path.name == "_index.md":
return False
return True
return False
def parse(self, file_path: Path) -> Optional[Dict]:
"""Parse a PrestaShop component documentation file.
Args:
file_path: Path to the component file
Returns:
Parsed component data or None
"""
if self.should_skip_file(file_path):
return None
try:
content = file_path.read_text(encoding="utf-8")
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
# Extract frontmatter
frontmatter, markdown_content = self.extract_frontmatter(content)
if not markdown_content:
return None
# Extract title
title = frontmatter.get("title", "")
if not title:
title = frontmatter.get("menuTitle", "")
if not title:
title = file_path.stem.replace("-", " ").title()
menu_title = frontmatter.get("menuTitle", "")
weight = frontmatter.get("weight")
# Determine component type
component_type = self._determine_component_type(file_path)
# Extract namespace/class information
namespace = self._extract_namespace(markdown_content)
# Extract description
description = self._extract_description(markdown_content)
# Extract options/properties table
options = self._extract_options_table(markdown_content)
# Extract code examples
code_examples = self.extract_code_examples(markdown_content)
# Extract GitHub reference link
github_ref = self._extract_github_reference(markdown_content)
# Get category and subcategory
category = self.get_category_from_path(file_path, DOCS_PATH)
subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH)
# Build relative path
try:
relative_path = str(file_path.relative_to(DOCS_PATH))
except ValueError:
relative_path = file_path.name
# Clean title
clean_title = self.clean_title(title)
return {
"name": file_path.stem,
"title": clean_title,
"category": category,
"subcategory": subcategory,
"doc_type": DOC_TYPES["component"],
"path": relative_path,
"origin": "",
"location": "",
"content": markdown_content,
"version": self.extract_version(frontmatter, markdown_content),
"metadata": {
"menu_title": menu_title,
"weight": weight,
"component_type": component_type,
"namespace": namespace,
"description": description,
"options": options,
"code_examples": code_examples,
"github_ref": github_ref,
},
}
def _determine_component_type(self, file_path: Path) -> str:
"""Determine the type of component from the file path.
Args:
file_path: Path to the file
Returns:
Component type string
"""
path_str = str(file_path)
if "form/types-reference" in path_str:
return "form_type"
elif "grid/columns-reference" in path_str:
return "grid_column"
elif "grid/actions-reference" in path_str:
return "grid_action"
elif "grid/bulk-actions-reference" in path_str:
return "grid_bulk_action"
elif "grid/filter-types-reference" in path_str:
return "grid_filter"
else:
return "component"
def _extract_namespace(self, markdown: str) -> str:
"""Extract namespace/class from markdown content.
Args:
markdown: Markdown content
Returns:
Namespace string
"""
# Look for "Namespace:" line
namespace_match = re.search(
r"^[-*\s]*Namespace:\s*(.+)$", markdown, re.MULTILINE
)
if namespace_match:
return namespace_match.group(1).strip()
# Look for "use" statements in code
use_match = re.search(r"use\s+([\w\\]+);", markdown)
if use_match:
return use_match.group(1)
return ""
def _extract_description(self, markdown: str) -> str:
"""Extract description from markdown content.
Args:
markdown: Markdown content
Returns:
Description string
"""
lines = markdown.split("\n")
description_lines = []
# Skip frontmatter and title
in_content = False
for line in lines:
stripped = line.strip()
# Start collecting after first # header
if stripped.startswith("#"):
in_content = True
continue
if in_content:
# Stop at next header, table, or code block
if stripped.startswith("#") or stripped.startswith("|") or stripped.startswith("```"):
break
# Stop at lists or special markers
if stripped.startswith("-") or stripped.startswith("*") or stripped.startswith("##"):
if description_lines: # Only break if we have content
break
# Collect non-empty lines
if stripped and not stripped.startswith("{{"):
description_lines.append(stripped)
description = " ".join(description_lines)
if len(description) > 500:
description = description[:497] + "..."
return description
def _extract_options_table(self, markdown: str) -> List[Dict]:
"""Extract options/properties table from markdown.
Args:
markdown: Markdown content
Returns:
List of option dictionaries
"""
options = []
# Look for options table sections
# Common patterns: "Type options", "Available options", "Properties"
lines = markdown.split("\n")
in_table = False
table_headers = []
for i, line in enumerate(lines):
stripped = line.strip()
# Detect start of options section
if re.search(r"(type\s+options|available\s+options|properties)", stripped, re.IGNORECASE):
in_table = True
continue
if in_table:
# Stop at next header or empty lines after table
if stripped.startswith("#") and not stripped.startswith("#"):
break
# Parse table header
if "|" in stripped and not table_headers:
# Extract column names
cols = [c.strip() for c in stripped.split("|")]
table_headers = [c for c in cols if c and c != ":" and "---" not in c]
continue
# Skip separator line
if "---" in stripped:
continue
# Parse table row
if "|" in stripped and table_headers:
cols = [c.strip() for c in stripped.split("|")]
cols = [c for c in cols if c] # Remove empty
if len(cols) >= 2:
option = {
"name": cols[0] if len(cols) > 0 else "",
"type": cols[1] if len(cols) > 1 else "",
"default": cols[2] if len(cols) > 2 else "",
"description": cols[3] if len(cols) > 3 else cols[2] if len(cols) == 3 else "",
}
# Clean up the option data
option = {k: v.replace("**", "").strip() for k, v in option.items()}
if option["name"]: # Only add if has a name
options.append(option)
# Stop if we hit empty lines after starting table
if not stripped and options:
break
return options
def _extract_github_reference(self, markdown: str) -> str:
"""Extract GitHub reference URL from markdown.
Args:
markdown: Markdown content
Returns:
GitHub URL or empty string
"""
# Look for "Reference:" line with GitHub link
ref_match = re.search(
r"[-*\s]*Reference:\s*\[.+?\]\((https://github\.com/[^)]+)\)",
markdown
)
if ref_match:
return ref_match.group(1)
# Look for any GitHub link in markdown
github_match = re.search(r"(https://github\.com/PrestaShop/PrestaShop/[^\s\)]+)", markdown)
if github_match:
return github_match.group(1)
return ""