Skip to main content
Glama

RAG Document Server

by jaimeferj
frontmatter_parser.py3.09 kB
"""YAML frontmatter parser for extracting metadata from markdown files.""" import re from typing import Optional class FrontmatterParser: """Parse YAML frontmatter from markdown documents.""" # Regex to match YAML frontmatter at the start of a file FRONTMATTER_PATTERN = re.compile( r'^---\s*\n(.*?)\n---\s*\n', re.DOTALL | re.MULTILINE ) @staticmethod def parse(markdown_text: str) -> tuple[dict, str]: """ Parse frontmatter from markdown text. Args: markdown_text: The markdown document text Returns: Tuple of (metadata_dict, content_without_frontmatter) """ match = FrontmatterParser.FRONTMATTER_PATTERN.match(markdown_text) if not match: # No frontmatter found return {}, markdown_text frontmatter_text = match.group(1) content = markdown_text[match.end():] # Parse YAML frontmatter into dict metadata = FrontmatterParser._parse_yaml(frontmatter_text) return metadata, content @staticmethod def _parse_yaml(yaml_text: str) -> dict: """ Simple YAML parser for frontmatter. Handles basic key-value pairs. For complex YAML, consider using PyYAML library. Args: yaml_text: YAML frontmatter text Returns: Dictionary of metadata """ metadata = {} # Split into lines and parse key-value pairs for line in yaml_text.split('\n'): line = line.strip() if not line or line.startswith('#'): continue # Match key: value pattern if ':' in line: key, value = line.split(':', 1) key = key.strip() value = value.strip() # Remove quotes if present if value.startswith('"') and value.endswith('"'): value = value[1:-1] elif value.startswith("'") and value.endswith("'"): value = value[1:-1] # Handle boolean values if value.lower() == 'true': value = True elif value.lower() == 'false': value = False # Handle numeric values elif value.isdigit(): value = int(value) metadata[key] = value return metadata @staticmethod def get_title(metadata: dict) -> Optional[str]: """ Extract the title from frontmatter metadata. Tries multiple common title fields in order of preference. Args: metadata: Parsed frontmatter metadata Returns: Title string or None if not found """ # Try different title fields in order of preference title_fields = ['title', 'sidebar_label', 'name', 'heading'] for field in title_fields: if field in metadata and metadata[field]: return str(metadata[field]) return None

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jaimeferj/mcp-rag-docs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server