Skip to main content
Glama
component_parser.py10.4 kB
"""Parser for PrestaShop component documentation files.""" import re from pathlib import Path from typing import Dict, List, Optional, Tuple from .base_parser import BaseParser from ..config import DOCS_PATH, DOC_TYPES class ComponentParser(BaseParser): """Parser for PrestaShop component documentation. Handles: - Form type references (form/types-reference/) - Grid column references (grid/columns-reference/) - Grid action references (grid/actions-reference/) - Grid bulk action references (grid/bulk-actions-reference/) - Grid filter type references (grid/filter-types-reference/) """ # Component directories to detect COMPONENT_PATHS = [ "components/form/types-reference", "components/grid/columns-reference", "components/grid/actions-reference", "components/grid/bulk-actions-reference", "components/grid/filter-types-reference", ] def can_parse(self, file_path: Path, frontmatter: Dict) -> bool: """Check if this is a component reference file. Args: file_path: Path to the file frontmatter: Extracted YAML frontmatter Returns: True if this is a component documentation file """ path_str = str(file_path) # Check if file is in any component directory for comp_path in self.COMPONENT_PATHS: if comp_path in path_str: # Skip _index.md files if file_path.name == "_index.md": return False return True return False def parse(self, file_path: Path) -> Optional[Dict]: """Parse a PrestaShop component documentation file. Args: file_path: Path to the component file Returns: Parsed component data or None """ if self.should_skip_file(file_path): return None try: content = file_path.read_text(encoding="utf-8") except Exception as e: print(f"Error reading {file_path}: {e}") return None # Extract frontmatter frontmatter, markdown_content = self.extract_frontmatter(content) if not markdown_content: return None # Extract title title = frontmatter.get("title", "") if not title: title = frontmatter.get("menuTitle", "") if not title: title = file_path.stem.replace("-", " ").title() menu_title = frontmatter.get("menuTitle", "") weight = frontmatter.get("weight") # Determine component type component_type = self._determine_component_type(file_path) # Extract namespace/class information namespace = self._extract_namespace(markdown_content) # Extract description description = self._extract_description(markdown_content) # Extract options/properties table options = self._extract_options_table(markdown_content) # Extract code examples code_examples = self.extract_code_examples(markdown_content) # Extract GitHub reference link github_ref = self._extract_github_reference(markdown_content) # Get category and subcategory category = self.get_category_from_path(file_path, DOCS_PATH) subcategory = self.get_subcategory_from_path(file_path, DOCS_PATH) # Build relative path try: relative_path = str(file_path.relative_to(DOCS_PATH)) except ValueError: relative_path = file_path.name # Clean title clean_title = self.clean_title(title) return { "name": file_path.stem, "title": clean_title, "category": category, "subcategory": subcategory, "doc_type": DOC_TYPES["component"], "path": relative_path, "origin": "", "location": "", "content": markdown_content, "version": self.extract_version(frontmatter, markdown_content), "metadata": { "menu_title": menu_title, "weight": weight, "component_type": component_type, "namespace": namespace, "description": description, "options": options, "code_examples": code_examples, "github_ref": github_ref, }, } def _determine_component_type(self, file_path: Path) -> str: """Determine the type of component from the file path. Args: file_path: Path to the file Returns: Component type string """ path_str = str(file_path) if "form/types-reference" in path_str: return "form_type" elif "grid/columns-reference" in path_str: return "grid_column" elif "grid/actions-reference" in path_str: return "grid_action" elif "grid/bulk-actions-reference" in path_str: return "grid_bulk_action" elif "grid/filter-types-reference" in path_str: return "grid_filter" else: return "component" def _extract_namespace(self, markdown: str) -> str: """Extract namespace/class from markdown content. Args: markdown: Markdown content Returns: Namespace string """ # Look for "Namespace:" line namespace_match = re.search( r"^[-*\s]*Namespace:\s*(.+)$", markdown, re.MULTILINE ) if namespace_match: return namespace_match.group(1).strip() # Look for "use" statements in code use_match = re.search(r"use\s+([\w\\]+);", markdown) if use_match: return use_match.group(1) return "" def _extract_description(self, markdown: str) -> str: """Extract description from markdown content. Args: markdown: Markdown content Returns: Description string """ lines = markdown.split("\n") description_lines = [] # Skip frontmatter and title in_content = False for line in lines: stripped = line.strip() # Start collecting after first # header if stripped.startswith("#"): in_content = True continue if in_content: # Stop at next header, table, or code block if stripped.startswith("#") or stripped.startswith("|") or stripped.startswith("```"): break # Stop at lists or special markers if stripped.startswith("-") or stripped.startswith("*") or stripped.startswith("##"): if description_lines: # Only break if we have content break # Collect non-empty lines if stripped and not stripped.startswith("{{"): description_lines.append(stripped) description = " ".join(description_lines) if len(description) > 500: description = description[:497] + "..." return description def _extract_options_table(self, markdown: str) -> List[Dict]: """Extract options/properties table from markdown. Args: markdown: Markdown content Returns: List of option dictionaries """ options = [] # Look for options table sections # Common patterns: "Type options", "Available options", "Properties" lines = markdown.split("\n") in_table = False table_headers = [] for i, line in enumerate(lines): stripped = line.strip() # Detect start of options section if re.search(r"(type\s+options|available\s+options|properties)", stripped, re.IGNORECASE): in_table = True continue if in_table: # Stop at next header or empty lines after table if stripped.startswith("#") and not stripped.startswith("#"): break # Parse table header if "|" in stripped and not table_headers: # Extract column names cols = [c.strip() for c in stripped.split("|")] table_headers = [c for c in cols if c and c != ":" and "---" not in c] continue # Skip separator line if "---" in stripped: continue # Parse table row if "|" in stripped and table_headers: cols = [c.strip() for c in stripped.split("|")] cols = [c for c in cols if c] # Remove empty if len(cols) >= 2: option = { "name": cols[0] if len(cols) > 0 else "", "type": cols[1] if len(cols) > 1 else "", "default": cols[2] if len(cols) > 2 else "", "description": cols[3] if len(cols) > 3 else cols[2] if len(cols) == 3 else "", } # Clean up the option data option = {k: v.replace("**", "").strip() for k, v in option.items()} if option["name"]: # Only add if has a name options.append(option) # Stop if we hit empty lines after starting table if not stripped and options: break return options def _extract_github_reference(self, markdown: str) -> str: """Extract GitHub reference URL from markdown. Args: markdown: Markdown content Returns: GitHub URL or empty string """ # Look for "Reference:" line with GitHub link ref_match = re.search( r"[-*\s]*Reference:\s*\[.+?\]\((https://github\.com/[^)]+)\)", markdown ) if ref_match: return ref_match.group(1) # Look for any GitHub link in markdown github_match = re.search(r"(https://github\.com/PrestaShop/PrestaShop/[^\s\)]+)", markdown) if github_match: return github_match.group(1) return ""

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/florinel-chis/prestashop-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server