Skip to main content
Glama

Poetry MCP Server

frontmatter_parser.py10.2 kB
"""Frontmatter parser for markdown files. Extracts YAML frontmatter from poem markdown files and converts to Poem objects. This is the core of the v2.0 architecture: data comes from frontmatter, not BASE files. """ import re from pathlib import Path from datetime import datetime from typing import Optional import yaml from ..models.poem import Poem from ..errors import FrontmatterParseError def parse_poem_file(file_path: Path, vault_root: Path) -> Poem: """ Parse a poem markdown file into a Poem object. Args: file_path: Absolute path to the markdown file vault_root: Absolute path to vault root (for relative paths) Returns: Poem object with frontmatter metadata and computed fields Raises: FrontmatterParseError: If file cannot be parsed FileNotFoundError: If file doesn't exist ValueError: If required frontmatter fields are missing """ if not file_path.exists(): raise FileNotFoundError(f"Poem file not found: {file_path}") try: content = file_path.read_text(encoding='utf-8') except Exception as e: raise FrontmatterParseError(f"Failed to read {file_path}: {e}") # Extract frontmatter and content frontmatter, poem_content = extract_frontmatter(content, file_path) # Generate ID from filename poem_id = generate_poem_id(file_path) # Extract title (from first # heading or fallback to filename) title = extract_title(poem_content, file_path) # Get file timestamps created_at = datetime.fromtimestamp(file_path.stat().st_ctime) updated_at = datetime.fromtimestamp(file_path.stat().st_mtime) # Compute metrics word_count = count_words(poem_content) line_count = count_lines(poem_content) stanza_count = count_stanzas(poem_content) # Build relative file path try: relative_path = str(file_path.relative_to(vault_root)) except ValueError: # File is not under vault_root relative_path = str(file_path) # Extract required fields with defaults state = frontmatter.get('state') form = frontmatter.get('form') # Validate required fields if not state: # Infer state from directory structure if missing state = infer_state_from_path(file_path) if not form: # Detect form heuristically if missing form = detect_form(poem_content) # Extract optional fields tags = frontmatter.get('tags', []) if isinstance(tags, str): # Handle legacy comma-separated tags tags = [t.strip() for t in tags.split(',') if t.strip()] keywords = frontmatter.get('keywords') notes = frontmatter.get('notes') # Build Poem object try: poem = Poem( id=poem_id, title=title, file_path=relative_path, state=state, form=form, tags=tags, keywords=keywords, notes=notes, word_count=word_count, line_count=line_count, stanza_count=stanza_count, created_at=created_at, updated_at=updated_at, content=poem_content # Include full content ) except Exception as e: raise FrontmatterParseError( f"Failed to create Poem object for {file_path}: {e}" ) return poem def extract_frontmatter(content: str, file_path: Path) -> tuple[dict, str]: """ Extract YAML frontmatter from markdown content. Args: content: Full markdown file content file_path: Path to file (for error messages) Returns: Tuple of (frontmatter_dict, content_without_frontmatter) Raises: FrontmatterParseError: If frontmatter is malformed """ # Match YAML frontmatter: ---\n...\n--- pattern = r'^---\s*\n(.*?)\n---\s*\n(.*)$' match = re.match(pattern, content, re.DOTALL) if not match: # No frontmatter found - return empty dict and full content return {}, content yaml_content = match.group(1) body_content = match.group(2) # Parse YAML try: frontmatter = yaml.safe_load(yaml_content) if frontmatter is None: frontmatter = {} if not isinstance(frontmatter, dict): raise FrontmatterParseError( f"Frontmatter in {file_path} is not a valid YAML object" ) except yaml.YAMLError as e: raise FrontmatterParseError( f"Invalid YAML in {file_path} frontmatter: {e}" ) return frontmatter, body_content def generate_poem_id(file_path: Path) -> str: """ Generate poem ID from filename. Removes .md extension and normalizes to lowercase-with-dashes. Args: file_path: Path to poem file Returns: Normalized poem ID """ # Remove .md extension name = file_path.stem # Remove leading numbers and dashes (e.g., "11 - Toeses" -> "Toeses") name = re.sub(r'^\d+\s*-\s*', '', name) # Convert to lowercase and replace spaces with dashes poem_id = name.lower().replace(' ', '-') # Remove any other special characters except dashes poem_id = re.sub(r'[^a-z0-9-]', '', poem_id) return poem_id def extract_title(content: str, file_path: Path) -> str: """ Extract poem title from first # heading or filename. Args: content: Poem content (without frontmatter) file_path: Path to file (fallback for title) Returns: Poem title """ # Look for first # heading heading_pattern = r'^#\s+(.+)$' match = re.search(heading_pattern, content, re.MULTILINE) if match: title = match.group(1).strip() # Remove duplicate # symbols (e.g., "# # Toeses" -> "Toeses") title = re.sub(r'^#+\s*', '', title) return title # Fallback to filename without extension name = file_path.stem # Remove leading numbers (e.g., "11 - Toeses" -> "Toeses") name = re.sub(r'^\d+\s*-\s*', '', name) return name def infer_state_from_path(file_path: Path) -> str: """ Infer poem state from directory structure. Args: file_path: Path to poem file Returns: Inferred state value """ path_str = str(file_path).lower() if 'completed' in path_str: return 'completed' elif 'fledgeling' in path_str: return 'fledgeling' elif 'still cooking' in path_str or 'still-cooking' in path_str: return 'still_cooking' elif 'needs research' in path_str or 'needs-research' in path_str: return 'needs_research' elif 'risk' in path_str: return 'risk' else: # Default to fledgeling for unknown directories (including personal workflow dirs) return 'fledgeling' def detect_form(content: str) -> str: """ Detect poem form using heuristics. Args: content: Poem content (without frontmatter) Returns: Detected form value """ # Strip whitespace and split into lines lines = [line for line in content.strip().split('\n') if line.strip()] # Skip title if present if lines and lines[0].strip().startswith('#'): lines = lines[1:] lines = [line for line in lines if line.strip()] # Re-filter if not lines: return 'free_verse' # American sentence: single line, ~17 syllables if len(lines) == 1: syllable_count = estimate_syllables(lines[0]) if 15 <= syllable_count <= 19: return 'american_sentence' # Prose poem: paragraph format (long lines, no stanzas) if len(lines) <= 3: # Very few line breaks avg_line_length = sum(len(line) for line in lines) / len(lines) if avg_line_length > 100: # Long lines return 'prose_poem' # Catalog poem: anaphora patterns (repeated line beginnings) if len(lines) >= 3: starting_words = [line.strip().split()[0].lower() if line.strip().split() else '' for line in lines] # Check for repeated starting words from collections import Counter word_counts = Counter(starting_words) most_common = word_counts.most_common(1) if most_common and most_common[0][1] >= 3: # Same word starts 3+ lines return 'catalog_poem' # Default to free verse return 'free_verse' def estimate_syllables(text: str) -> int: """ Rough syllable estimation for form detection. Not linguistically perfect, but good enough for heuristics. Args: text: Text to analyze Returns: Estimated syllable count """ # Remove punctuation text = re.sub(r'[^\w\s]', '', text.lower()) words = text.split() syllables = 0 for word in words: # Count vowel groups as syllables vowel_groups = re.findall(r'[aeiouy]+', word) count = len(vowel_groups) # Adjust for silent 'e' if word.endswith('e') and count > 1: count -= 1 # Minimum 1 syllable per word syllables += max(1, count) return syllables def count_words(content: str) -> int: """Count words in poem content.""" # Remove markdown headings content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE) # Split on whitespace and count words = content.split() return len(words) def count_lines(content: str) -> int: """Count non-empty lines in poem content.""" # Remove markdown headings content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE) # Count non-empty lines lines = [line for line in content.split('\n') if line.strip()] return len(lines) def count_stanzas(content: str) -> Optional[int]: """ Count stanzas (blank-line separated groups) in poem content. Returns None if poem has no stanzas (prose poem or single-line). """ # Remove markdown headings content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE) # Split on blank lines (2+ newlines) stanzas = re.split(r'\n\s*\n', content.strip()) stanzas = [s for s in stanzas if s.strip()] # Return None for prose poems or single-line poems if len(stanzas) <= 1: return None return len(stanzas)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/james-livefront/poetry-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server