test_front_matter_parser.py•18.8 kB
"""Tests for YAML front matter parsing in agent definition files.
This test suite validates the hybrid YAML parsing approach used for agent files.
The parser must handle both valid YAML and malformed YAML that Claude Code's
built-in /agents feature sometimes generates.
Real-world observation: Claude Code can generate agent files with YAML front matter
that contains unquoted multi-line values with embedded colons and special characters.
For example:
description: Line 1\nLine 2\nExample: "quoted text"
This is invalid YAML syntax but appears in production agent files. Our parser
gracefully falls back to simple regex-based key-value extraction when standard
YAML parsing fails, ensuring these files still work correctly.
"""
from __future__ import annotations
import pytest
from polyagent_mcp.server import _extract_front_matter
class TestValidYAMLParsing:
"""Test cases for valid YAML front matter that should parse with PyYAML."""
def test_simple_key_value_pairs(self):
"""Test basic key-value pairs parse correctly."""
text = """---
name: test-agent
description: A simple test agent
model: sonnet
---
Agent instructions here."""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "test-agent"
assert metadata["description"] == "A simple test agent"
assert metadata["model"] == "sonnet"
assert body.strip() == "Agent instructions here."
def test_multiline_description_with_pipe(self):
"""Test YAML pipe operator for literal multi-line strings."""
text = """---
name: test-agent
description: |
First line
Second line
Third line
model: sonnet
---
Body content"""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "test-agent"
assert "First line" in metadata["description"]
assert "Second line" in metadata["description"]
assert "\n" in metadata["description"]
assert metadata["model"] == "sonnet"
def test_multiline_description_with_greater_than(self):
"""Test YAML > operator for folded multi-line strings."""
text = """---
name: code-reviewer
description: >
Expert code review subagent. Reviews recent code changes
for quality, security, and maintainability issues.
Use after code modifications.
tools: Read, Grep, Glob, Bash
model: sonnet
---
You are a senior engineer."""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "code-reviewer"
# The > operator folds lines with spaces
assert "Expert code review subagent" in metadata["description"]
assert metadata["tools"] == "Read, Grep, Glob, Bash"
assert metadata["model"] == "sonnet"
assert body.strip().startswith("You are a senior engineer")
def test_quoted_values_with_special_characters(self):
"""Test properly quoted values containing colons and other special chars."""
text = """---
name: test-agent
description: "This is a description: with colons and quotes"
command: "echo 'Hello: World'"
---
Body"""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "test-agent"
assert ":" in metadata["description"]
assert metadata["description"] == "This is a description: with colons and quotes"
assert metadata["command"] == "echo 'Hello: World'"
def test_list_values(self):
"""Test YAML lists are properly parsed."""
text = """---
name: test-agent
tools:
- Read
- Write
- Bash
model: sonnet
---
Body"""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "test-agent"
assert isinstance(metadata["tools"], list)
assert metadata["tools"] == ["Read", "Write", "Bash"]
assert metadata["model"] == "sonnet"
def test_no_front_matter(self):
"""Test files without front matter return empty metadata."""
text = """# Agent Instructions
This file has no front matter."""
metadata, body = _extract_front_matter(text)
assert metadata == {}
assert body == text
class TestMalformedYAMLFallback:
"""Test cases for malformed YAML that requires regex fallback.
These test cases document real-world scenarios where Claude Code generates
agent files with invalid YAML syntax. The parser must gracefully handle
these cases using the regex-based fallback parser.
"""
def test_unquoted_value_with_colon(self):
"""Test unquoted values containing colons trigger fallback.
This is the most common malformed YAML pattern observed in Claude Code
generated agent files. YAML requires quoting values that contain colons
(except for key: value pairs), but Claude Code sometimes generates:
description: Use this agent: it does things
instead of:
description: "Use this agent: it does things"
"""
text = """---
name: bad-agent
description: This has a colon: which breaks YAML
model: sonnet
---
Body"""
metadata, body = _extract_front_matter(text)
# Should fall back to regex parser and extract all three fields
assert metadata["name"] == "bad-agent"
assert metadata["description"] == "This has a colon: which breaks YAML"
assert metadata["model"] == "sonnet"
assert body.strip() == "Body"
def test_escaped_newlines_single_line(self):
r"""Test literal \n escape sequences on a single line.
Claude Code has been observed to generate descriptions like:
description: Line 1\nLine 2\nLine 3
where \n is the literal two-character sequence backslash-n, not an
actual newline. When combined with other YAML syntax errors (like
unquoted colons), YAML parsing fails and the regex fallback captures
the entire value and properly unescapes it.
"""
# Use raw string to preserve literal \n sequences
# Include colon to trigger YAML parsing failure
text = r"""---
name: escaped-agent
description: First paragraph\n\nSecond paragraph\nWith details: note
model: sonnet
---
Body"""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "escaped-agent"
# After regex extraction and unescaping, \n should become actual newlines
assert "\n" in metadata["description"]
assert "First paragraph" in metadata["description"]
assert "Second paragraph" in metadata["description"]
assert "With details: note" in metadata["description"]
assert metadata["model"] == "sonnet"
def test_long_single_line_with_multiple_issues(self):
r"""Test complex single-line description with multiple YAML violations.
This simulates the web-research.md case: a very long single-line
description containing:
- Unquoted colons
- Literal \n sequences
- Example blocks with nested structure
- Special characters
This is the exact pattern observed in Claude Code's /agents feature.
"""
text = r"""---
name: web-research
description: Use this agent when you need info\n\nExamples:\n\n<example>\nContext: Flask app\nuser: "How to do X?"\nassistant: "Use this agent"\n</example>\n\nMore: details here
model: sonnet
---
You are an elite researcher."""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "web-research"
# The entire description should be captured despite YAML issues
assert "Use this agent when you need info" in metadata["description"]
assert "Examples:" in metadata["description"]
assert "<example>" in metadata["description"]
# Escape sequences should be converted to actual newlines
assert "\n" in metadata["description"]
assert "More: details here" in metadata["description"]
assert metadata["model"] == "sonnet"
def test_multiple_colons_in_value(self):
"""Test values with multiple colons fall back correctly."""
text = """---
name: test-agent
command: docker run --env KEY:VALUE --port 8080:8080
description: Runs containers
---
Body"""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "test-agent"
assert "docker run" in metadata["command"]
assert "8080:8080" in metadata["command"]
assert metadata["description"] == "Runs containers"
def test_unquoted_special_yaml_chars(self):
"""Test various YAML special characters in unquoted values."""
text = """---
name: test-agent
description: Contains {braces} and [brackets] and @symbols
note: Also has # hash and * star
---
Body"""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "test-agent"
assert "{braces}" in metadata["description"]
assert "[brackets]" in metadata["description"]
assert "@symbols" in metadata["description"]
# Note: # starts a comment in YAML, so this field might not parse
# But our regex parser should still catch it
assert "note" in metadata
class TestEscapeSequenceHandling:
r"""Test proper handling of escape sequences in values.
NOTE: Escape sequence unescaping only happens when the regex fallback parser
is used (i.e., when YAML parsing fails). These tests include YAML-breaking
syntax (like unquoted colons) to trigger the fallback parser.
This matches real-world behavior where Claude Code generates files with both
escape sequences AND invalid YAML syntax together.
"""
def test_newline_escape_conversion(self):
r"""Test \n literal sequences convert to actual newlines in fallback parser."""
# Include colon to break YAML and trigger fallback
text = r"""---
name: test
description: Line 1\nLine 2\nLine 3: with colon
---
Body"""
metadata, body = _extract_front_matter(text)
lines = metadata["description"].split("\n")
assert len(lines) >= 3
assert "Line 1" in metadata["description"]
assert "Line 2" in metadata["description"]
assert "Line 3: with colon" in metadata["description"]
def test_tab_escape_conversion(self):
r"""Test \t literal sequences convert to actual tabs in fallback parser."""
# Include colon to break YAML and trigger fallback
text = r"""---
name: test
description: Column1\tColumn2\tColumn3: note
---
Body"""
metadata, body = _extract_front_matter(text)
assert "\t" in metadata["description"]
assert "Column1" in metadata["description"]
assert "Column2" in metadata["description"]
def test_quote_escape_conversion(self):
r"""Test \" literal sequences convert to actual quotes in fallback parser."""
# Include colon to break YAML and trigger fallback
text = r"""---
name: test
description: She said \"Hello\" to me: greeting
---
Body"""
metadata, body = _extract_front_matter(text)
assert '"Hello"' in metadata["description"]
def test_multiple_escape_types(self):
r"""Test multiple escape sequences in the same value with fallback parser."""
# Include colon to break YAML and trigger fallback
text = r"""---
name: test
description: First line\nSecond line with \"quotes\"\nThird line with\ttabs: note
---
Body"""
metadata, body = _extract_front_matter(text)
assert "\n" in metadata["description"]
assert "\t" in metadata["description"]
assert '"quotes"' in metadata["description"]
class TestEdgeCases:
"""Test edge cases and boundary conditions."""
def test_empty_front_matter(self):
"""Test empty front matter (just the delimiters)."""
text = """---
---
Body content"""
metadata, body = _extract_front_matter(text)
assert metadata == {}
assert body.strip() == "Body content"
def test_missing_closing_delimiter(self):
"""Test front matter without closing --- delimiter."""
text = """---
name: test
description: No closing delimiter
Body content"""
metadata, body = _extract_front_matter(text)
# Should treat entire content as body when no closing delimiter
assert metadata == {}
assert "name: test" in body
def test_field_name_with_hyphens(self):
"""Test field names containing hyphens are supported."""
text = """---
agent-name: test-agent
output-format: json
---
Body"""
metadata, body = _extract_front_matter(text)
assert "agent-name" in metadata or "agent_name" in metadata
assert "output-format" in metadata or "output_format" in metadata
def test_field_name_with_underscores(self):
"""Test field names containing underscores are supported."""
text = """---
agent_name: test-agent
output_format: json
---
Body"""
metadata, body = _extract_front_matter(text)
assert metadata.get("agent_name") == "test-agent"
assert metadata.get("output_format") == "json"
def test_comments_in_front_matter(self):
"""Test YAML comments are handled correctly."""
text = """---
# This is a comment
name: test-agent # inline comment
description: Test description
# Another comment
model: sonnet
---
Body"""
metadata, body = _extract_front_matter(text)
# Valid YAML should parse and ignore comments
assert metadata.get("name") == "test-agent"
assert metadata.get("description") == "Test description"
assert metadata.get("model") == "sonnet"
def test_whitespace_handling(self):
"""Test various whitespace patterns around colons."""
text = """---
name:test-agent
description : Test with spaces
model :sonnet
extra : lots-of-spaces
---
Body"""
metadata, body = _extract_front_matter(text)
# Both parsers should handle whitespace variations
assert "test-agent" in str(metadata.get("name", ""))
assert "Test with spaces" in str(metadata.get("description", ""))
assert "sonnet" in str(metadata.get("model", ""))
def test_very_long_description(self):
r"""Test handling of very long single-line descriptions (>1000 chars).
This simulates real Claude Code output where descriptions can be
several thousand characters on a single line.
"""
long_text = "A" * 2000 + r"\n" + "B" * 2000
text = f"""---
name: test-agent
description: {long_text}
model: sonnet
---
Body"""
metadata, body = _extract_front_matter(text)
assert "name" in metadata
assert len(metadata["description"]) > 1000
assert "A" * 100 in metadata["description"] # Check content is preserved
class TestIntegrationWithAgentParsing:
"""Integration tests showing front matter parsing in context of full agent files."""
def test_code_reviewer_agent_format(self):
"""Test the format used by code-reviewer.md (valid YAML)."""
text = """---
name: code-reviewer
description: >
Expert code review subagent. Reviews recent code changes for quality, security, and maintainability issues.
Use after code modifications, before finalizing changes, to get an in-depth review.
tools: Read, Grep, Glob, Bash
model: sonnet
---
You are a senior engineer acting as a code reviewer. Your task is to scrutinize the provided code changes and point out any issues or improvements."""
metadata, body = _extract_front_matter(text)
assert metadata["name"] == "code-reviewer"
assert "Expert code review" in metadata["description"]
assert metadata["tools"] == "Read, Grep, Glob, Bash"
assert metadata["model"] == "sonnet"
assert "You are a senior engineer" in body
def test_web_research_agent_format(self):
r"""Test the format observed in web-research.md (malformed YAML).
This is a real-world example of Claude Code generating invalid YAML.
The description is a single very long line with:
- Literal \n escape sequences
- Unquoted colons in text
- XML-like <example> tags
- Nested quoted strings
"""
text = r"""---
name: web-research
description: Use this agent when you need to gather information, knowledge, or examples from the internet that aren't available in the codebase or your training data. This agent excels at conducting thorough, multi-stage research using various search tools and AI models to find specific information while maintaining context efficiency.\n\nExamples of when to use this agent:\n\n<example>\nContext: User is implementing security features in a Flask application and needs to understand current best practices.\nuser: "I'm adding authentication to our Flask app. What are the current security best practices I should follow?"\nassistant: "I'll use the web-research agent to gather comprehensive security best practices for Flask authentication."\n<Task tool invocation with web-research agent>\nPrompt: "Research current security best practices for implementing authentication in Flask applications. Context: We're building a production Flask application that needs robust authentication. Background: Flask is a Python web framework, and we need to understand both general web security principles and Flask-specific security considerations. Question: What are the essential security measures, recommended libraries/extensions, and common pitfalls to avoid when implementing authentication in Flask? Output specification: Provide a structured summary with: 1) Top 3-5 critical security measures with brief explanations, 2) Recommended Flask security extensions with their specific use cases, 3) Common authentication vulnerabilities in Flask apps with mitigation strategies. Keep the response concise and actionable."\n</Task>\n</example>
model: sonnet
---
You are an elite Web Research Specialist with expertise in conducting thorough, context-efficient internet research."""
metadata, body = _extract_front_matter(text)
# Should fall back to regex parser
assert metadata["name"] == "web-research"
assert metadata["model"] == "sonnet"
# Check description was extracted and unescaped properly
description = metadata["description"]
assert len(description) > 1000 # Should be the full long description
assert "\n" in description # \n should be converted to actual newlines
assert "Examples of when to use this agent:" in description
assert "<example>" in description
assert "Flask application" in description
assert "Context:" in description # Contains colons that broke YAML
# Check body was separated correctly
assert "You are an elite Web Research Specialist" in body