test_parser_edge_cases.py•5.14 kB
"""Tests for markdown parser edge cases."""
from pathlib import Path
from textwrap import dedent
import pytest
from basic_memory.markdown.entity_parser import EntityParser
@pytest.mark.asyncio
async def test_unicode_content(tmp_path):
"""Test handling of Unicode content including emoji and non-Latin scripts."""
content = dedent("""
---
type: test
id: test/unicode
created: 2024-12-21T14:00:00Z
modified: 2024-12-21T14:00:00Z
tags: [unicode, 测试]
---
# Unicode Test 🧪
## Observations
- [test] Emoji test 👍 #emoji #test (Testing emoji)
- [中文] Chinese text 测试 #language (Script test)
- [русский] Russian привет #language (More scripts)
- [note] Emoji in text 😀 #meta (Category test)
## Relations
- tested_by [[测试组件]] (Unicode test)
- depends_on [[компонент]] (Another test)
""")
test_file = tmp_path / "unicode.md"
test_file.write_text(content, encoding="utf-8")
parser = EntityParser(tmp_path)
entity = await parser.parse_file(test_file)
assert "测试" in entity.frontmatter.metadata["tags"]
assert "chinese" not in entity.frontmatter.metadata["tags"]
assert "🧪" in entity.content
# Verify Unicode in observations
assert any(o.content == "Emoji test 👍 #emoji #test" for o in entity.observations)
assert any(o.category == "中文" for o in entity.observations)
assert any(o.category == "русский" for o in entity.observations)
# Verify Unicode in relations
assert any(r.target == "测试组件" for r in entity.relations)
assert any(r.target == "компонент" for r in entity.relations)
@pytest.mark.asyncio
async def test_empty_file(tmp_path):
"""Test handling of empty files."""
empty_file = tmp_path / "empty.md"
empty_file.write_text("")
parser = EntityParser(tmp_path)
entity = await parser.parse_file(empty_file)
assert entity.observations == []
assert entity.relations == []
@pytest.mark.asyncio
async def test_missing_sections(tmp_path):
"""Test handling of files with missing sections."""
content = dedent("""
---
type: test
id: test/missing
created: 2024-01-09
modified: 2024-01-09
tags: []
---
Just some content
with [[links]] but no sections
""")
test_file = tmp_path / "missing.md"
test_file.write_text(content)
parser = EntityParser(tmp_path)
entity = await parser.parse_file(test_file)
assert len(entity.relations) == 1
assert entity.relations[0].target == "links"
assert entity.relations[0].type == "links to"
@pytest.mark.asyncio
async def test_tasks_are_not_observations(tmp_path):
"""Test handling of plain observations without categories."""
content = dedent("""
---
type: test
id: test/missing
created: 2024-01-09
modified: 2024-01-09
tags: []
---
- [ ] one
-[ ] two
- [x] done
- [-] not done
""")
test_file = tmp_path / "missing.md"
test_file.write_text(content)
parser = EntityParser(tmp_path)
entity = await parser.parse_file(test_file)
assert len(entity.observations) == 0
@pytest.mark.asyncio
async def test_nested_content(tmp_path):
"""Test handling of deeply nested content."""
content = dedent("""
---
type: test
id: test/nested
created: 2024-01-09
modified: 2024-01-09
tags: []
---
# Test
## Level 1
- [test] Level 1 #test (First level)
- implements [[One]]
### Level 2
- [test] Level 2 #test (Second level)
- uses [[Two]]
#### Level 3
- [test] Level 3 #test (Third level)
- needs [[Three]]
""")
test_file = tmp_path / "nested.md"
test_file.write_text(content)
parser = EntityParser(tmp_path)
entity = await parser.parse_file(test_file)
# Should find all observations and relations regardless of nesting
assert len(entity.observations) == 3
assert len(entity.relations) == 3
assert {r.target for r in entity.relations} == {"One", "Two", "Three"}
@pytest.mark.asyncio
async def test_malformed_frontmatter(tmp_path):
"""Test handling of malformed frontmatter."""
# Missing fields
content = dedent("""
---
type: test
---
# Test
""")
test_file = tmp_path / "malformed.md"
test_file.write_text(content)
parser = EntityParser(tmp_path)
entity = await parser.parse_file(test_file)
assert entity.frontmatter.permalink is None
@pytest.mark.asyncio
async def test_file_not_found():
"""Test handling of non-existent files."""
parser = EntityParser(Path("/tmp"))
with pytest.raises(FileNotFoundError):
await parser.parse_file(Path("nonexistent.md"))