Skip to main content
Glama
kzmshx
by kzmshx
test_indexer.py8.37 kB
"""Tests for semantic indexer module.""" import time from pathlib import Path from typing import Generator from unittest.mock import MagicMock import numpy as np import pytest from frontmatter_mcp.semantic import EmbeddingCache, EmbeddingIndexer, IndexerState class TestEmbeddingIndexer: """Tests for EmbeddingIndexer class.""" @pytest.fixture def cache_dir(self, tmp_path: Path) -> Path: """Create a temporary cache directory.""" return tmp_path / ".frontmatter-mcp" @pytest.fixture def base_dir(self, tmp_path: Path) -> Path: """Create a temporary base directory with test files.""" base = tmp_path / "vault" base.mkdir() return base @pytest.fixture def mock_model(self) -> MagicMock: """Create a mock embedding model.""" model = MagicMock() model.name = "test-model" model.encode.return_value = np.random.rand(256).astype(np.float32) model.get_dimension.return_value = 256 return model @pytest.fixture def cache( self, cache_dir: Path, mock_model: MagicMock ) -> Generator[EmbeddingCache, None, None]: """Create a cache instance.""" cache = EmbeddingCache(cache_dir, model=mock_model) yield cache cache.close() def _create_md_file(self, base_dir: Path, name: str, content: str) -> Path: """Create a markdown file with frontmatter.""" file_path = base_dir / name file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(f"---\ntitle: {name}\n---\n{content}") return file_path def test_initial_state( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """EmbeddingIndexer starts in IDLE state.""" indexer = EmbeddingIndexer(cache, mock_model, lambda: [], base_dir) assert indexer.state == IndexerState.IDLE def test_start_indexing( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Start begins background indexing and transitions to READY on completion.""" self._create_md_file(base_dir, "a.md", "Content A") self._create_md_file(base_dir, "b.md", "Content B") files = list(base_dir.glob("*.md")) indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) # Before start assert indexer.state == IndexerState.IDLE result = indexer.start() assert result["state"] == "indexing" assert result["message"] == "Indexing started" assert result["target_count"] == 2 # Wait for completion indexer.wait(timeout=5.0) assert indexer.state == IndexerState.READY assert cache.count() == 2 def test_duplicate_start( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Starting while already indexing returns appropriate message.""" self._create_md_file(base_dir, "a.md", "Content A") # Make encode slow to keep indexing state def slow_encode(text): time.sleep(0.5) return np.random.rand(256).astype(np.float32) mock_model.encode.side_effect = slow_encode files = list(base_dir.glob("*.md")) indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) result1 = indexer.start() assert result1["state"] == "indexing" assert result1["message"] == "Indexing started" # Try to start again while indexing result2 = indexer.start() assert result2["state"] == "indexing" assert result2["message"] == "Indexing already in progress" indexer.wait(timeout=5.0) def test_differential_update( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Only stale files are re-indexed.""" file_a = self._create_md_file(base_dir, "a.md", "Content A") file_b = self._create_md_file(base_dir, "b.md", "Content B") files = [file_a, file_b] indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) # First indexing indexer.start() indexer.wait(timeout=5.0) assert mock_model.encode.call_count == 2 mock_model.encode.reset_mock() # Modify one file time.sleep(0.01) # Ensure mtime changes file_a.write_text("---\ntitle: a.md\n---\nUpdated Content A") # Second indexing indexer.start() indexer.wait(timeout=5.0) # Only modified file should be re-indexed assert mock_model.encode.call_count == 1 def test_deleted_file_removed_from_cache( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Deleted files are removed from cache.""" file_a = self._create_md_file(base_dir, "a.md", "Content A") file_b = self._create_md_file(base_dir, "b.md", "Content B") files = [file_a, file_b] indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) # First indexing indexer.start() indexer.wait(timeout=5.0) assert cache.count() == 2 # Delete one file file_b.unlink() # Re-index with only file_a indexer2 = EmbeddingIndexer(cache, mock_model, lambda: [file_a], base_dir) indexer2.start() indexer2.wait(timeout=5.0) assert cache.count() == 1 assert cache.get("b.md") is None def test_empty_content_skipped( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Files with empty content are skipped.""" self._create_md_file(base_dir, "empty.md", "") self._create_md_file(base_dir, "has_content.md", "Some content") files = list(base_dir.glob("*.md")) indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) indexer.start() indexer.wait(timeout=5.0) # Only file with content should be indexed assert cache.count() == 1 def test_wait_returns_true_on_completion( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Wait returns True when indexing completes.""" self._create_md_file(base_dir, "a.md", "Content") files = list(base_dir.glob("*.md")) indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) indexer.start() result = indexer.wait(timeout=5.0) assert result is True assert indexer.state == IndexerState.READY def test_wait_returns_true_when_not_started( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Wait returns True when indexing was never started.""" indexer = EmbeddingIndexer(cache, mock_model, lambda: [], base_dir) result = indexer.wait(timeout=1.0) assert result is True def test_subdirectory_files( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Files in subdirectories are indexed with relative paths.""" self._create_md_file(base_dir, "root.md", "Root content") self._create_md_file(base_dir, "sub/nested.md", "Nested content") files = list(base_dir.rglob("*.md")) indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) indexer.start() indexer.wait(timeout=5.0) assert cache.count() == 2 assert cache.get("root.md") is not None assert cache.get("sub/nested.md") is not None def test_cache_connection_closed_after_indexing( self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path ) -> None: """Cache connection is closed after indexing completes. This prevents DuckDB file lock conflicts when query tries to access the cache after indexing is done. """ self._create_md_file(base_dir, "a.md", "Content A") files = list(base_dir.glob("*.md")) indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir) indexer.start() indexer.wait(timeout=5.0) assert indexer.state == IndexerState.READY # Connection should be closed after indexing assert cache._conn is None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kzmshx/frontmatter-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server