We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/kzmshx/frontmatter-mcp'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""Tests for semantic indexer module."""
import time
from pathlib import Path
from typing import Generator
from unittest.mock import MagicMock
import numpy as np
import pytest
from frontmatter_mcp.semantic import EmbeddingCache, EmbeddingIndexer, IndexerState
class TestEmbeddingIndexer:
"""Tests for EmbeddingIndexer class."""
@pytest.fixture
def cache_dir(self, tmp_path: Path) -> Path:
"""Create a temporary cache directory."""
return tmp_path / ".frontmatter-mcp"
@pytest.fixture
def base_dir(self, tmp_path: Path) -> Path:
"""Create a temporary base directory with test files."""
base = tmp_path / "vault"
base.mkdir()
return base
@pytest.fixture
def mock_model(self) -> MagicMock:
"""Create a mock embedding model."""
model = MagicMock()
model.name = "test-model"
model.encode.return_value = np.random.rand(256).astype(np.float32)
model.get_dimension.return_value = 256
return model
@pytest.fixture
def cache(
self, cache_dir: Path, mock_model: MagicMock
) -> Generator[EmbeddingCache, None, None]:
"""Create a cache instance."""
cache = EmbeddingCache(cache_dir, model=mock_model)
yield cache
cache.close()
def _create_md_file(self, base_dir: Path, name: str, content: str) -> Path:
"""Create a markdown file with frontmatter."""
file_path = base_dir / name
file_path.parent.mkdir(parents=True, exist_ok=True)
file_path.write_text(f"---\ntitle: {name}\n---\n{content}")
return file_path
def test_initial_state(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""EmbeddingIndexer starts in IDLE state."""
indexer = EmbeddingIndexer(cache, mock_model, lambda: [], base_dir)
assert indexer.state == IndexerState.IDLE
def test_start_indexing(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Start begins background indexing and transitions to READY on completion."""
self._create_md_file(base_dir, "a.md", "Content A")
self._create_md_file(base_dir, "b.md", "Content B")
files = list(base_dir.glob("*.md"))
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
# Before start
assert indexer.state == IndexerState.IDLE
result = indexer.start()
assert result["state"] == "indexing"
assert result["message"] == "Indexing started"
assert result["target_count"] == 2
# Wait for completion
indexer.wait(timeout=5.0)
assert indexer.state == IndexerState.READY
assert cache.count() == 2
def test_duplicate_start(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Starting while already indexing returns appropriate message."""
self._create_md_file(base_dir, "a.md", "Content A")
# Make encode slow to keep indexing state
def slow_encode(text):
time.sleep(0.5)
return np.random.rand(256).astype(np.float32)
mock_model.encode.side_effect = slow_encode
files = list(base_dir.glob("*.md"))
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
result1 = indexer.start()
assert result1["state"] == "indexing"
assert result1["message"] == "Indexing started"
# Try to start again while indexing
result2 = indexer.start()
assert result2["state"] == "indexing"
assert result2["message"] == "Indexing already in progress"
indexer.wait(timeout=5.0)
def test_differential_update(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Only stale files are re-indexed."""
file_a = self._create_md_file(base_dir, "a.md", "Content A")
file_b = self._create_md_file(base_dir, "b.md", "Content B")
files = [file_a, file_b]
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
# First indexing
indexer.start()
indexer.wait(timeout=5.0)
assert mock_model.encode.call_count == 2
mock_model.encode.reset_mock()
# Modify one file
time.sleep(0.01) # Ensure mtime changes
file_a.write_text("---\ntitle: a.md\n---\nUpdated Content A")
# Second indexing
indexer.start()
indexer.wait(timeout=5.0)
# Only modified file should be re-indexed
assert mock_model.encode.call_count == 1
def test_deleted_file_removed_from_cache(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Deleted files are removed from cache."""
file_a = self._create_md_file(base_dir, "a.md", "Content A")
file_b = self._create_md_file(base_dir, "b.md", "Content B")
files = [file_a, file_b]
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
# First indexing
indexer.start()
indexer.wait(timeout=5.0)
assert cache.count() == 2
# Delete one file
file_b.unlink()
# Re-index with only file_a
indexer2 = EmbeddingIndexer(cache, mock_model, lambda: [file_a], base_dir)
indexer2.start()
indexer2.wait(timeout=5.0)
assert cache.count() == 1
assert cache.get("b.md") is None
def test_empty_content_skipped(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Files with empty content are skipped."""
self._create_md_file(base_dir, "empty.md", "")
self._create_md_file(base_dir, "has_content.md", "Some content")
files = list(base_dir.glob("*.md"))
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
indexer.start()
indexer.wait(timeout=5.0)
# Only file with content should be indexed
assert cache.count() == 1
def test_wait_returns_true_on_completion(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Wait returns True when indexing completes."""
self._create_md_file(base_dir, "a.md", "Content")
files = list(base_dir.glob("*.md"))
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
indexer.start()
result = indexer.wait(timeout=5.0)
assert result is True
assert indexer.state == IndexerState.READY
def test_wait_returns_true_when_not_started(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Wait returns True when indexing was never started."""
indexer = EmbeddingIndexer(cache, mock_model, lambda: [], base_dir)
result = indexer.wait(timeout=1.0)
assert result is True
def test_subdirectory_files(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Files in subdirectories are indexed with relative paths."""
self._create_md_file(base_dir, "root.md", "Root content")
self._create_md_file(base_dir, "sub/nested.md", "Nested content")
files = list(base_dir.rglob("*.md"))
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
indexer.start()
indexer.wait(timeout=5.0)
assert cache.count() == 2
assert cache.get("root.md") is not None
assert cache.get("sub/nested.md") is not None
def test_cache_connection_closed_after_indexing(
self, cache: EmbeddingCache, mock_model: MagicMock, base_dir: Path
) -> None:
"""Cache connection is closed after indexing completes.
This prevents DuckDB file lock conflicts when query tries to access
the cache after indexing is done.
"""
self._create_md_file(base_dir, "a.md", "Content A")
files = list(base_dir.glob("*.md"))
indexer = EmbeddingIndexer(cache, mock_model, lambda: files, base_dir)
indexer.start()
indexer.wait(timeout=5.0)
assert indexer.state == IndexerState.READY
# Connection should be closed after indexing
assert cache._conn is None