Skip to main content
Glama
test_ltm_index.pyβ€’24.5 kB
"""Tests for LTM index parsing and search.""" from __future__ import annotations import sys import time from pathlib import Path import pytest from mnemex.storage.ltm_index import LTMDocument, LTMIndex def write_md(p: Path, text: str) -> None: p.parent.mkdir(parents=True, exist_ok=True) p.write_text(text, encoding="utf-8") def test_ltm_index_parses_frontmatter_wikilinks_and_tags(tmp_path: Path) -> None: vault = tmp_path / "vault" write_md( vault / "Note A.md", """--- title: Note A tags: [project, alpha] --- This links to [[Note B]] and mentions #alpha. """, ) write_md( vault / "Note B.md", """--- title: Note B tags: - docs --- Backlink to [[Note A]] and #docs. """, ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) # Stats stats = index.get_stats() assert stats["total_documents"] == 2 # Documents loaded doc_a = index.get_document(str((vault / "Note A.md").relative_to(vault))) assert doc_a is not None assert doc_a.title == "Note A" assert "project" in doc_a.tags and "alpha" in doc_a.tags assert "Note B" in doc_a.wikilinks # Hashtags extraction # Tags already include hashtags merged from content assert "alpha" in doc_a.tags # Search by query results = index.search(query="backlink", tags=None, limit=10) assert any(r.title == "Note B" for r in results) # Backlinks backlinks = index.get_backlinks("Note B") assert any(d.title == "Note A" for d in backlinks) # Forward links forward = index.get_forward_links(str((vault / "Note A.md").relative_to(vault))) assert any(d.title == "Note B" for d in forward) def test_ltm_document_to_dict_and_from_dict() -> None: """Test LTMDocument serialization and deserialization.""" doc = LTMDocument( path="test/note.md", title="Test Note", content="Test content", frontmatter={"author": "Test Author", "date": "2024-01-01"}, wikilinks=["Link1", "Link2"], tags=["tag1", "tag2"], mtime=1234567890.0, size=1024, ) # Test to_dict doc_dict = doc.to_dict() assert doc_dict["path"] == "test/note.md" assert doc_dict["title"] == "Test Note" assert doc_dict["content"] == "Test content" assert doc_dict["frontmatter"] == {"author": "Test Author", "date": "2024-01-01"} assert doc_dict["wikilinks"] == ["Link1", "Link2"] assert doc_dict["tags"] == ["tag1", "tag2"] assert doc_dict["mtime"] == 1234567890.0 assert doc_dict["size"] == 1024 # Test from_dict restored_doc = LTMDocument.from_dict(doc_dict) assert restored_doc.path == doc.path assert restored_doc.title == doc.title assert restored_doc.content == doc.content assert restored_doc.frontmatter == doc.frontmatter assert restored_doc.wikilinks == doc.wikilinks assert restored_doc.tags == doc.tags assert restored_doc.mtime == doc.mtime assert restored_doc.size == doc.size def test_ltm_document_from_dict_with_missing_optional_fields() -> None: """Test LTMDocument.from_dict with missing optional fields.""" minimal_dict = { "path": "test.md", "title": "Test", "content": "Content", "mtime": 1234567890.0, "size": 100, } doc = LTMDocument.from_dict(minimal_dict) assert doc.path == "test.md" assert doc.title == "Test" assert doc.content == "Content" assert doc.frontmatter == {} assert doc.wikilinks == [] assert doc.tags == [] assert doc.mtime == 1234567890.0 assert doc.size == 100 def test_ltm_index_with_explicit_index_path(tmp_path: Path) -> None: """Test LTMIndex initialization with explicit index path.""" vault = tmp_path / "vault" vault.mkdir() custom_index = tmp_path / "custom-index.jsonl" index = LTMIndex(vault_path=vault, index_path=custom_index) assert index.index_path == custom_index def test_ltm_index_with_default_path(tmp_path: Path) -> None: """Test LTMIndex initialization with default path.""" vault = tmp_path / "vault" vault.mkdir() index = LTMIndex(vault_path=vault) assert index.index_path == vault / ".mnemex-index.jsonl" def test_ltm_index_legacy_path_fallback(tmp_path: Path) -> None: """Test LTMIndex falls back to legacy .stm-index.jsonl when it exists.""" vault = tmp_path / "vault" vault.mkdir() # Create legacy index file legacy_index = vault / ".stm-index.jsonl" legacy_index.write_text('{"_stats": {"total_documents": 0}}\n') # New path doesn't exist, so should use legacy index = LTMIndex(vault_path=vault) assert index.index_path == legacy_index def test_ltm_index_prefers_new_path_over_legacy(tmp_path: Path) -> None: """Test LTMIndex prefers new path even when legacy exists.""" vault = tmp_path / "vault" vault.mkdir() # Create both index files new_index = vault / ".mnemex-index.jsonl" legacy_index = vault / ".stm-index.jsonl" new_index.write_text('{"_stats": {"total_documents": 0}}\n') legacy_index.write_text('{"_stats": {"total_documents": 0}}\n') # Should prefer new path index = LTMIndex(vault_path=vault) assert index.index_path == new_index def test_build_index_with_files_without_frontmatter(tmp_path: Path) -> None: """Test building index with markdown files without frontmatter.""" vault = tmp_path / "vault" write_md( vault / "simple.md", "Just some content without frontmatter.\n\nLinks to [[OtherNote]] and #simple", ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) doc = index.get_document("simple.md") assert doc is not None assert doc.title == "simple" # Should use filename as title assert "OtherNote" in doc.wikilinks assert "simple" in doc.tags def test_build_index_with_string_tag_in_frontmatter(tmp_path: Path) -> None: """Test parsing frontmatter with single string tag instead of list.""" vault = tmp_path / "vault" write_md( vault / "note.md", """--- title: Note with String Tag tags: single-tag --- Content here. """, ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) doc = index.get_document("note.md") assert doc is not None assert "single-tag" in doc.tags def test_build_index_force_rebuild(tmp_path: Path) -> None: """Test force rebuild ignores existing index.""" vault = tmp_path / "vault" write_md(vault / "note1.md", "First note") # Build initial index index = LTMIndex(vault_path=vault) index.build_index(verbose=False) assert index.get_stats()["total_documents"] == 1 # Add another note write_md(vault / "note2.md", "Second note") # Build with force=True should rebuild everything index2 = LTMIndex(vault_path=vault) index2.build_index(force=True, verbose=False) assert index2.get_stats()["total_documents"] == 2 def test_build_index_incremental_skips_unchanged(tmp_path: Path) -> None: """Test incremental build skips unchanged files.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Content") # Build initial index index = LTMIndex(vault_path=vault) index.build_index(verbose=False) # Get the document's mtime doc = index.get_document("note.md") original_mtime = doc.mtime # Build again without force (incremental) # File hasn't changed, should be skipped index2 = LTMIndex(vault_path=vault) index2.build_index(force=False, verbose=False) doc2 = index2.get_document("note.md") assert doc2.mtime == original_mtime def test_build_index_detects_deleted_files(tmp_path: Path) -> None: """Test index removes entries for deleted files.""" vault = tmp_path / "vault" write_md(vault / "note1.md", "Note 1") write_md(vault / "note2.md", "Note 2") # Build index with both files index = LTMIndex(vault_path=vault) index.build_index(verbose=False) assert index.get_stats()["total_documents"] == 2 # Delete one file (vault / "note2.md").unlink() # Rebuild - should detect deletion index2 = LTMIndex(vault_path=vault) index2.build_index(force=False, verbose=False) assert index2.get_stats()["total_documents"] == 1 assert index2.get_document("note1.md") is not None assert index2.get_document("note2.md") is None def test_build_index_verbose_output(tmp_path: Path, capsys) -> None: """Test verbose output during index building.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Content") index = LTMIndex(vault_path=vault) index.build_index(verbose=True) captured = capsys.readouterr() assert "Found 1 markdown files in vault" in captured.out assert "Index built:" in captured.out assert "Updated:" in captured.out assert "Total:" in captured.out def test_build_index_vault_not_found(tmp_path: Path) -> None: """Test build_index raises error when vault doesn't exist.""" vault = tmp_path / "nonexistent" index = LTMIndex(vault_path=vault) with pytest.raises(FileNotFoundError, match="Vault path not found"): index.build_index() def test_build_index_handles_parse_errors(tmp_path: Path, capsys) -> None: """Test index handles files that fail to parse.""" vault = tmp_path / "vault" vault.mkdir() # Create a file that will cause parse issues (e.g., permission denied simulation) # We'll use a different approach - create valid markdown but test the error path # by creating a file with invalid encoding issues bad_file = vault / "bad.md" bad_file.write_bytes(b"\x80\x81\x82") # Invalid UTF-8 index = LTMIndex(vault_path=vault) index.build_index(verbose=False) # Should print warning but not crash captured = capsys.readouterr() assert "Warning: Failed to parse" in captured.out def test_save_and_load_index(tmp_path: Path) -> None: """Test saving and loading index to/from JSONL.""" vault = tmp_path / "vault" write_md( vault / "note.md", """--- title: Test Note tags: [test] --- Content with [[link]] and #hashtag """, ) # Build and save index = LTMIndex(vault_path=vault) index.build_index(verbose=False) # Load in new instance index2 = LTMIndex(vault_path=vault) index2.load_index() # Verify loaded data assert index2.get_stats()["total_documents"] == 1 doc = index2.get_document("note.md") assert doc is not None assert doc.title == "Test Note" assert "test" in doc.tags assert "link" in doc.wikilinks assert "hashtag" in doc.tags def test_load_index_nonexistent_file(tmp_path: Path) -> None: """Test load_index handles nonexistent file gracefully.""" vault = tmp_path / "vault" vault.mkdir() index = LTMIndex(vault_path=vault) index.load_index() # Should not crash assert len(index._documents) == 0 def test_load_index_with_empty_lines(tmp_path: Path) -> None: """Test load_index handles empty lines in JSONL.""" vault = tmp_path / "vault" vault.mkdir() # Create index file with empty lines index_file = vault / ".mnemex-index.jsonl" index_file.write_text( '{"_stats": {"total_documents": 1, "total_wikilinks": 0, "last_indexed": 0, "index_time_ms": 0}}\n' "\n" # Empty line '{"path": "test.md", "title": "Test", "content": "Content", "frontmatter": {}, "wikilinks": [], "tags": [], "mtime": 123.0, "size": 10}\n' "\n" # Another empty line ) index = LTMIndex(vault_path=vault) index.load_index() assert len(index._documents) == 1 assert index.get_document("test.md") is not None def test_search_with_query_only(tmp_path: Path) -> None: """Test search with query text only.""" vault = tmp_path / "vault" write_md(vault / "python.md", "Python programming language") write_md(vault / "java.md", "Java programming language") write_md(vault / "cooking.md", "How to cook pasta") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) results = index.search(query="programming", limit=10) assert len(results) == 2 titles = [r.title for r in results] assert "python" in titles assert "java" in titles def test_search_with_tags_only(tmp_path: Path) -> None: """Test search with tags only.""" vault = tmp_path / "vault" write_md( vault / "note1.md", """--- tags: [python, tutorial] --- Content """, ) write_md( vault / "note2.md", """--- tags: [java, tutorial] --- Content """, ) write_md( vault / "note3.md", """--- tags: [python, advanced] --- Content """, ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) results = index.search(tags=["python"], limit=10) assert len(results) == 2 def test_search_with_query_and_tags(tmp_path: Path) -> None: """Test search with both query and tags.""" vault = tmp_path / "vault" write_md( vault / "note1.md", """--- tags: [python] --- Advanced tutorial """, ) write_md( vault / "note2.md", """--- tags: [python] --- Basic guide """, ) write_md( vault / "note3.md", """--- tags: [java] --- Advanced tutorial """, ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) results = index.search(query="tutorial", tags=["python"], limit=10) assert len(results) == 1 assert results[0].title == "note1" def test_search_empty_results(tmp_path: Path) -> None: """Test search returns empty list when no matches.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Some content") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) results = index.search(query="nonexistent", limit=10) assert len(results) == 0 def test_search_limit_parameter(tmp_path: Path) -> None: """Test search respects limit parameter.""" vault = tmp_path / "vault" for i in range(10): write_md(vault / f"note{i}.md", "programming content") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) results = index.search(query="programming", limit=3) assert len(results) == 3 def test_search_prioritizes_title_matches(tmp_path: Path) -> None: """Test search ranks title matches higher than content matches.""" vault = tmp_path / "vault" write_md( vault / "programming.md", "This is about coding", ) write_md( vault / "cooking.md", "This mentions programming in the content but title doesn't match", ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) results = index.search(query="programming", limit=10) # Title match should come first assert results[0].title == "programming" def test_get_document_nonexistent(tmp_path: Path) -> None: """Test get_document returns None for nonexistent path.""" vault = tmp_path / "vault" vault.mkdir() index = LTMIndex(vault_path=vault) doc = index.get_document("nonexistent.md") assert doc is None def test_get_documents_by_tag(tmp_path: Path) -> None: """Test get_documents_by_tag method.""" vault = tmp_path / "vault" write_md( vault / "note1.md", """--- tags: [python, tutorial] --- Content """, ) write_md( vault / "note2.md", """--- tags: [java] --- Content """, ) index = LTMIndex(vault_path=vault) index.build_index(verbose=False) python_docs = index.get_documents_by_tag("python") assert len(python_docs) == 1 assert python_docs[0].title == "note1" def test_get_backlinks_empty(tmp_path: Path) -> None: """Test get_backlinks returns empty list when no backlinks.""" vault = tmp_path / "vault" write_md(vault / "note.md", "No links here") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) backlinks = index.get_backlinks("nonexistent") assert len(backlinks) == 0 def test_get_forward_links_nonexistent_path(tmp_path: Path) -> None: """Test get_forward_links returns empty list for nonexistent path.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Content") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) forward_links = index.get_forward_links("nonexistent.md") assert len(forward_links) == 0 def test_get_forward_links_no_matches(tmp_path: Path) -> None: """Test get_forward_links when wikilinks don't match any documents.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Links to [[NonexistentNote]]") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) forward_links = index.get_forward_links("note.md") assert len(forward_links) == 0 def test_extract_wikilinks_with_aliases(tmp_path: Path) -> None: """Test wikilink extraction handles [[link|alias]] format.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Link with alias: [[ActualNote|Display Name]]") index = LTMIndex(vault_path=vault) doc = index.parse_markdown_file(vault / "note.md") assert doc is not None assert "ActualNote" in doc.wikilinks def test_extract_hashtags_complex(tmp_path: Path) -> None: """Test hashtag extraction with complex tags.""" vault = tmp_path / "vault" write_md( vault / "note.md", "Tags: #python/django #web-dev #2024 #project_alpha #nested/tag/structure", ) index = LTMIndex(vault_path=vault) doc = index.parse_markdown_file(vault / "note.md") assert doc is not None assert "python/django" in doc.tags assert "web-dev" in doc.tags assert "2024" in doc.tags assert "project_alpha" in doc.tags assert "nested/tag/structure" in doc.tags def test_stats_tracking(tmp_path: Path) -> None: """Test index statistics are tracked correctly.""" vault = tmp_path / "vault" write_md(vault / "note1.md", "Links: [[Note2]] [[Note3]]") write_md(vault / "note2.md", "Links: [[Note1]]") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) stats = index.get_stats() assert stats["total_documents"] == 2 assert stats["total_wikilinks"] == 3 assert stats["last_indexed"] > 0 assert stats["index_time_ms"] >= 0 def test_cli_main_basic(tmp_path: Path, monkeypatch, capsys) -> None: """Test CLI main function with basic arguments.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Test content") from mnemex.storage.ltm_index import main monkeypatch.setattr(sys, "argv", ["ltm_index", str(vault)]) result = main() assert result == 0 captured = capsys.readouterr() assert "Index built:" in captured.out def test_cli_main_with_force(tmp_path: Path, monkeypatch) -> None: """Test CLI main with --force flag.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Test content") from mnemex.storage.ltm_index import main monkeypatch.setattr(sys, "argv", ["ltm_index", str(vault), "--force"]) result = main() assert result == 0 def test_cli_main_with_custom_index_path(tmp_path: Path, monkeypatch) -> None: """Test CLI main with --index-path argument.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Test content") custom_index = tmp_path / "custom.jsonl" from mnemex.storage.ltm_index import main monkeypatch.setattr(sys, "argv", ["ltm_index", str(vault), "--index-path", str(custom_index)]) result = main() assert result == 0 assert custom_index.exists() def test_cli_main_with_search(tmp_path: Path, monkeypatch, capsys) -> None: """Test CLI main with --search argument.""" vault = tmp_path / "vault" write_md(vault / "python.md", "Python programming") write_md(vault / "java.md", "Java programming") from mnemex.storage.ltm_index import main monkeypatch.setattr(sys, "argv", ["ltm_index", str(vault), "--search", "python"]) result = main() assert result == 0 captured = capsys.readouterr() assert "Search results" in captured.out assert "python" in captured.out def test_cli_main_with_tag(tmp_path: Path, monkeypatch, capsys) -> None: """Test CLI main with --tag argument.""" vault = tmp_path / "vault" write_md( vault / "note.md", """--- tags: [tutorial] --- Content """, ) from mnemex.storage.ltm_index import main monkeypatch.setattr(sys, "argv", ["ltm_index", str(vault), "--tag", "tutorial"]) result = main() assert result == 0 captured = capsys.readouterr() assert "Search results" in captured.out def test_cli_main_with_search_and_tag(tmp_path: Path, monkeypatch, capsys) -> None: """Test CLI main with both --search and --tag.""" vault = tmp_path / "vault" write_md( vault / "note.md", """--- tags: [python] --- Tutorial content """, ) from mnemex.storage.ltm_index import main monkeypatch.setattr( sys, "argv", ["ltm_index", str(vault), "--search", "tutorial", "--tag", "python"] ) result = main() assert result == 0 captured = capsys.readouterr() assert "Search results" in captured.out def test_cli_main_error_handling(tmp_path: Path, monkeypatch, capsys) -> None: """Test CLI main handles errors gracefully.""" vault = tmp_path / "nonexistent_vault" from mnemex.storage.ltm_index import main monkeypatch.setattr(sys, "argv", ["ltm_index", str(vault)]) result = main() assert result == 1 captured = capsys.readouterr() assert "Error:" in captured.err def test_multiple_markdown_files_in_subdirectories(tmp_path: Path) -> None: """Test indexing multiple markdown files in subdirectories.""" vault = tmp_path / "vault" write_md(vault / "root.md", "Root level note") write_md(vault / "folder1/note1.md", "Note in folder 1") write_md(vault / "folder1/subfolder/note2.md", "Note in subfolder") write_md(vault / "folder2/note3.md", "Note in folder 2") index = LTMIndex(vault_path=vault) index.build_index(verbose=False) assert index.get_stats()["total_documents"] == 4 assert index.get_document("root.md") is not None assert index.get_document("folder1/note1.md") is not None assert index.get_document("folder1/subfolder/note2.md") is not None assert index.get_document("folder2/note3.md") is not None def test_index_persistence_across_instances(tmp_path: Path) -> None: """Test index persists correctly across different LTMIndex instances.""" vault = tmp_path / "vault" write_md(vault / "note.md", "Content with [[link]] and #tag") # Build index in first instance index1 = LTMIndex(vault_path=vault) index1.build_index(verbose=False) stats1 = index1.get_stats() # Load in second instance index2 = LTMIndex(vault_path=vault) index2.load_index() # Verify data matches assert index2.get_stats()["total_documents"] == stats1["total_documents"] doc = index2.get_document("note.md") assert doc is not None assert "link" in doc.wikilinks assert "tag" in doc.tags def test_verbose_output_for_large_batch(tmp_path: Path, capsys) -> None: """Test verbose output prints progress for large batches.""" vault = tmp_path / "vault" # Create 101 files to trigger batch output (every 100 files) for i in range(101): write_md(vault / f"note{i:03d}.md", f"Content {i}") index = LTMIndex(vault_path=vault) index.build_index(verbose=True) captured = capsys.readouterr() assert "... indexed 100 files" in captured.out def test_incremental_build_with_existing_index(tmp_path: Path, capsys) -> None: """Test incremental build loads and updates existing index.""" vault = tmp_path / "vault" write_md(vault / "note1.md", "First note") # Initial build index1 = LTMIndex(vault_path=vault) index1.build_index(verbose=False) # Add new file time.sleep(0.01) # Ensure different mtime write_md(vault / "note2.md", "Second note") # Incremental build should load existing and add new index2 = LTMIndex(vault_path=vault) index2.build_index(force=False, verbose=True) captured = capsys.readouterr() assert "Loaded existing index" in captured.out stats = index2.get_stats() assert stats["total_documents"] == 2

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mnemexai/mnemex'

If you have feedback or need assistance with the MCP directory API, please join our Discord server