PrestaShop MCP Server

base_parser.py•7.75 KiB

"""Base parser interface for PrestaShop documentation."""

import re
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import yaml


class BaseParser(ABC):
    """Base class for all documentation parsers.

    Each parser is responsible for:
    1. Identifying if it can parse a specific file
    2. Extracting structured data from the file
    3. Providing metadata for indexing
    """

    def __init__(self):
        """Initialize the parser."""
        self.name = self.__class__.__name__

    @abstractmethod
    def can_parse(self, file_path: Path, frontmatter: Dict) -> bool:
        """Check if this parser can handle the given file.

        Args:
            file_path: Path to the markdown file
            frontmatter: Extracted YAML frontmatter (empty dict if none)

        Returns:
            True if this parser can handle the file, False otherwise
        """
        pass

    @abstractmethod
    def parse(self, file_path: Path) -> Optional[Dict]:
        """Parse the file and return structured data.

        Args:
            file_path: Path to the markdown file

        Returns:
            Dictionary with parsed data, or None if parsing fails

        Expected structure:
        {
            "name": str,              # Document name/identifier
            "title": str,             # Display title
            "category": str,          # Main category (admin-api, basics, etc.)
            "subcategory": str,       # Subdirectory path
            "doc_type": str,          # Type: hook, reference, tutorial, guide, etc.
            "path": str,              # Relative path from docs root
            "content": str,           # Full markdown content
            "metadata": dict,         # Parser-specific metadata
            "version": str,           # PrestaShop version (if specified)
        }
        """
        pass

    def extract_frontmatter(self, content: str) -> Tuple[Dict, str]:
        """Extract YAML frontmatter and markdown content.

        Args:
            content: Raw file content

        Returns:
            Tuple of (frontmatter dict, markdown content)
        """
        if not content.startswith("---"):
            return {}, content

        parts = content.split("---", 2)
        if len(parts) < 3:
            return {}, content

        try:
            frontmatter = yaml.safe_load(parts[1]) or {}
        except yaml.YAMLError as e:
            print(f"YAML parsing error: {e}")
            return {}, content

        markdown = parts[2].strip()
        return frontmatter, markdown

    def extract_code_examples(self, markdown: str) -> List[str]:
        """Extract code blocks from markdown content.

        Args:
            markdown: Markdown content

        Returns:
            List of code examples
        """
        # Match code blocks with triple backticks
        pattern = r"```(?:\w+)?\n(.*?)```"
        matches = re.findall(pattern, markdown, re.DOTALL)
        return [match.strip() for match in matches if match.strip()]

    def get_category_from_path(self, file_path: Path, docs_root: Path) -> str:
        """Extract category from file path.

        Args:
            file_path: Path to the file
            docs_root: Root documentation directory

        Returns:
            Category name (top-level folder)
        """
        try:
            relative_path = file_path.relative_to(docs_root)
            return relative_path.parts[0] if relative_path.parts else "unknown"
        except ValueError:
            return "unknown"

    def get_subcategory_from_path(self, file_path: Path, docs_root: Path) -> str:
        """Extract subcategory (subdirectory path) from file path.

        Args:
            file_path: Path to the file
            docs_root: Root documentation directory

        Returns:
            Subcategory path (e.g., "installation/advanced")
        """
        try:
            relative_path = file_path.relative_to(docs_root)
            if len(relative_path.parts) > 2:
                # Join all parts except first (category) and last (filename)
                return "/".join(relative_path.parts[1:-1])
            return ""
        except ValueError:
            return ""

    def extract_version(self, frontmatter: Dict, content: str) -> Optional[str]:
        """Extract PrestaShop version from frontmatter or content.

        Args:
            frontmatter: YAML frontmatter
            content: Markdown content

        Returns:
            Version string (e.g., "9.0", "8.1") or None
        """
        # Check frontmatter
        if "version" in frontmatter:
            return str(frontmatter["version"])

        # Check for version in content (e.g., "PrestaShop 9.0")
        version_pattern = r"PrestaShop\s+(\d+\.\d+)"
        match = re.search(version_pattern, content)
        if match:
            return match.group(1)

        return None

    def clean_title(self, title: str) -> str:
        """Clean and normalize title.

        Args:
            title: Raw title

        Returns:
            Cleaned title
        """
        if not title:
            return "Untitled"

        # Remove markdown formatting
        title = re.sub(r'[*_`]', '', title)
        # Remove extra whitespace
        title = " ".join(title.split())
        return title

    def should_skip_file(self, file_path: Path) -> bool:
        """Check if file should be skipped based on patterns.

        Args:
            file_path: Path to check

        Returns:
            True if file should be skipped
        """
        path_str = str(file_path)

        # Skip patterns
        skip_patterns = [
            "/img/",
            "/images/",
            "/_partials/",
            "/.github/",
            "/node_modules/",
        ]

        for pattern in skip_patterns:
            if pattern in path_str:
                return True

        # Skip non-markdown files
        if file_path.suffix != ".md":
            return True

        return False


class ParserRegistry:
    """Registry to manage multiple parsers and select the appropriate one."""

    def __init__(self):
        """Initialize the parser registry."""
        self.parsers: List[BaseParser] = []

    def register(self, parser: BaseParser):
        """Register a parser.

        Args:
            parser: Parser instance to register
        """
        self.parsers.append(parser)

    def get_parser(self, file_path: Path) -> Optional[BaseParser]:
        """Get the appropriate parser for a file.

        Args:
            file_path: Path to the file

        Returns:
            Parser instance or None if no parser can handle the file
        """
        if not file_path.exists():
            return None

        try:
            content = file_path.read_text(encoding="utf-8")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            return None

        # Extract frontmatter for parser selection
        if content.startswith("---"):
            parts = content.split("---", 2)
            if len(parts) >= 3:
                try:
                    frontmatter = yaml.safe_load(parts[1]) or {}
                except yaml.YAMLError:
                    frontmatter = {}
            else:
                frontmatter = {}
        else:
            frontmatter = {}

        # Try each parser in order
        for parser in self.parsers:
            if parser.can_parse(file_path, frontmatter):
                return parser

        return None

    def parse_file(self, file_path: Path) -> Optional[Dict]:
        """Parse a file using the appropriate parser.

        Args:
            file_path: Path to the file

        Returns:
            Parsed data dict or None
        """
        parser = self.get_parser(file_path)
        if parser:
            return parser.parse(file_path)
        return None

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/florinel-chis/prestashop-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

base_parser.py•7.75 KiB

"""Base parser interface for PrestaShop documentation."""

import re
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import yaml


class BaseParser(ABC):
    """Base class for all documentation parsers.

    Each parser is responsible for:
    1. Identifying if it can parse a specific file
    2. Extracting structured data from the file
    3. Providing metadata for indexing
    """

    def __init__(self):
        """Initialize the parser."""
        self.name = self.__class__.__name__

    @abstractmethod
    def can_parse(self, file_path: Path, frontmatter: Dict) -> bool:
        """Check if this parser can handle the given file.

        Args:
            file_path: Path to the markdown file
            frontmatter: Extracted YAML frontmatter (empty dict if none)

        Returns:
            True if this parser can handle the file, False otherwise
        """
        pass

    @abstractmethod
    def parse(self, file_path: Path) -> Optional[Dict]:
        """Parse the file and return structured data.

        Args:
            file_path: Path to the markdown file

        Returns:
            Dictionary with parsed data, or None if parsing fails

        Expected structure:
        {
            "name": str,              # Document name/identifier
            "title": str,             # Display title
            "category": str,          # Main category (admin-api, basics, etc.)
            "subcategory": str,       # Subdirectory path
            "doc_type": str,          # Type: hook, reference, tutorial, guide, etc.
            "path": str,              # Relative path from docs root
            "content": str,           # Full markdown content
            "metadata": dict,         # Parser-specific metadata
            "version": str,           # PrestaShop version (if specified)
        }
        """
        pass

    def extract_frontmatter(self, content: str) -> Tuple[Dict, str]:
        """Extract YAML frontmatter and markdown content.

        Args:
            content: Raw file content

        Returns:
            Tuple of (frontmatter dict, markdown content)
        """
        if not content.startswith("---"):
            return {}, content

        parts = content.split("---", 2)
        if len(parts) < 3:
            return {}, content

        try:
            frontmatter = yaml.safe_load(parts[1]) or {}
        except yaml.YAMLError as e:
            print(f"YAML parsing error: {e}")
            return {}, content

        markdown = parts[2].strip()
        return frontmatter, markdown

    def extract_code_examples(self, markdown: str) -> List[str]:
        """Extract code blocks from markdown content.

        Args:
            markdown: Markdown content

        Returns:
            List of code examples
        """
        # Match code blocks with triple backticks
        pattern = r"```(?:\w+)?\n(.*?)```"
        matches = re.findall(pattern, markdown, re.DOTALL)
        return [match.strip() for match in matches if match.strip()]

    def get_category_from_path(self, file_path: Path, docs_root: Path) -> str:
        """Extract category from file path.

        Args:
            file_path: Path to the file
            docs_root: Root documentation directory

        Returns:
            Category name (top-level folder)
        """
        try:
            relative_path = file_path.relative_to(docs_root)
            return relative_path.parts[0] if relative_path.parts else "unknown"
        except ValueError:
            return "unknown"

    def get_subcategory_from_path(self, file_path: Path, docs_root: Path) -> str:
        """Extract subcategory (subdirectory path) from file path.

        Args:
            file_path: Path to the file
            docs_root: Root documentation directory

        Returns:
            Subcategory path (e.g., "installation/advanced")
        """
        try:
            relative_path = file_path.relative_to(docs_root)
            if len(relative_path.parts) > 2:
                # Join all parts except first (category) and last (filename)
                return "/".join(relative_path.parts[1:-1])
            return ""
        except ValueError:
            return ""

    def extract_version(self, frontmatter: Dict, content: str) -> Optional[str]:
        """Extract PrestaShop version from frontmatter or content.

        Args:
            frontmatter: YAML frontmatter
            content: Markdown content

        Returns:
            Version string (e.g., "9.0", "8.1") or None
        """
        # Check frontmatter
        if "version" in frontmatter:
            return str(frontmatter["version"])

        # Check for version in content (e.g., "PrestaShop 9.0")
        version_pattern = r"PrestaShop\s+(\d+\.\d+)"
        match = re.search(version_pattern, content)
        if match:
            return match.group(1)

        return None

    def clean_title(self, title: str) -> str:
        """Clean and normalize title.

        Args:
            title: Raw title

        Returns:
            Cleaned title
        """
        if not title:
            return "Untitled"

        # Remove markdown formatting
        title = re.sub(r'[*_`]', '', title)
        # Remove extra whitespace
        title = " ".join(title.split())
        return title

    def should_skip_file(self, file_path: Path) -> bool:
        """Check if file should be skipped based on patterns.

        Args:
            file_path: Path to check

        Returns:
            True if file should be skipped
        """
        path_str = str(file_path)

        # Skip patterns
        skip_patterns = [
            "/img/",
            "/images/",
            "/_partials/",
            "/.github/",
            "/node_modules/",
        ]

        for pattern in skip_patterns:
            if pattern in path_str:
                return True

        # Skip non-markdown files
        if file_path.suffix != ".md":
            return True

        return False


class ParserRegistry:
    """Registry to manage multiple parsers and select the appropriate one."""

    def __init__(self):
        """Initialize the parser registry."""
        self.parsers: List[BaseParser] = []

    def register(self, parser: BaseParser):
        """Register a parser.

        Args:
            parser: Parser instance to register
        """
        self.parsers.append(parser)

    def get_parser(self, file_path: Path) -> Optional[BaseParser]:
        """Get the appropriate parser for a file.

        Args:
            file_path: Path to the file

        Returns:
            Parser instance or None if no parser can handle the file
        """
        if not file_path.exists():
            return None

        try:
            content = file_path.read_text(encoding="utf-8")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            return None

        # Extract frontmatter for parser selection
        if content.startswith("---"):
            parts = content.split("---", 2)
            if len(parts) >= 3:
                try:
                    frontmatter = yaml.safe_load(parts[1]) or {}
                except yaml.YAMLError:
                    frontmatter = {}
            else:
                frontmatter = {}
        else:
            frontmatter = {}

        # Try each parser in order
        for parser in self.parsers:
            if parser.can_parse(file_path, frontmatter):
                return parser

        return None

    def parse_file(self, file_path: Path) -> Optional[Dict]:
        """Parse a file using the appropriate parser.

        Args:
            file_path: Path to the file

        Returns:
            Parsed data dict or None
        """
        parser = self.get_parser(file_path)
        if parser:
            return parser.parse(file_path)
        return None