Markdown RAG Documentation

Overview Schema Related Servers Score Discussions

ragdocs-mcp
tests

conftest.py•14.9 KiB

""" Shared pytest fixtures for integration and e2e tests. Provides both ephemeral (tmp_path) and persistent fixtures for different testing scenarios: - Ephemeral fixtures (tmp_path): Fast, isolated, used by default in unit tests - Persistent fixtures: Realistic storage, shared across tests in a session/module Use persistent fixtures when: - Testing index persistence/loading behavior - Testing manifest checking across test runs - Simulating realistic production scenarios - Testing index size/performance with larger datasets Use ephemeral fixtures (tmp_path) when: - Testing core logic in isolation - Fast test iteration is priority - Each test needs complete isolation """ # MUST be set before any HuggingFace/sentence-transformers imports to suppress # progress bars that would pollute JSON output in E2E tests. import os os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" os.environ["TQDM_DISABLE"] = "1" from pathlib import Path from typing import Any, Generator import pytest from llama_index.embeddings.huggingface import HuggingFaceEmbedding from src.config import ChunkingConfig, Config, IndexingConfig, LLMConfig, SearchConfig, ServerConfig from src.indexing.manager import IndexManager from src.indices.graph import GraphStore from src.indices.keyword import KeywordIndex from src.indices.vector import VectorIndex @pytest.fixture(autouse=True) def isolate_xdg_data_home(tmp_path_factory, monkeypatch): """Isolate application data while preserving HuggingFace model cache. Creates temp directories for XDG_DATA_HOME and HOME to isolate test data, but preserves HF_HOME to avoid re-downloading large embedding models and hitting rate limits during parallel test execution. """ # Preserve original HuggingFace cache location BEFORE modifying HOME # Default to ~/.cache/huggingface if not set original_home = os.environ.get("HOME", "") original_hf_home = os.environ.get("HF_HOME", os.path.join(original_home, ".cache", "huggingface")) # Create isolated temp directories for application data data_home = tmp_path_factory.mktemp("xdg-data-home") home_dir = tmp_path_factory.mktemp("home") monkeypatch.setenv("XDG_DATA_HOME", str(data_home)) monkeypatch.setenv("HOME", str(home_dir)) # Restore HuggingFace cache to original location (shared across workers) monkeypatch.setenv("HF_HOME", original_hf_home) # ============================================================================ # Test Fixture Factories # ============================================================================ def make_test_config(tmp_path: Path, **overrides): docs_path = tmp_path / "docs" docs_path.mkdir(exist_ok=True) index_path = tmp_path / "index" index_path.mkdir(exist_ok=True) defaults: dict[str, Any] = { "server": ServerConfig(host="localhost", port=8080), "indexing": IndexingConfig( documents_path=str(docs_path), index_path=str(index_path), ), "search": SearchConfig(), "chunking": ChunkingConfig(), } defaults.update(overrides) return Config(**defaults) def create_test_document(docs_dir: Path | str, doc_id: str, content: str): doc_path = Path(docs_dir) / f"{doc_id}.md" doc_path.write_text(content) return str(doc_path) # ============================================================================ # Shared Embedding Model Fixture # ============================================================================ @pytest.fixture(scope="session") def shared_embedding_model(): """Session-scoped embedding model shared across all tests. Uses filelock to ensure only one pytest worker downloads the model at a time, preventing race conditions and rate limit issues. Pre-warms the model with a dummy embedding call to avoid first-call overhead (~1-2s) during actual tests. """ import filelock # Lock file in the HF cache directory hf_home = os.environ.get("HF_HOME", os.path.expanduser("~/.cache/huggingface")) lock_path = os.path.join(hf_home, ".model_download.lock") os.makedirs(os.path.dirname(lock_path), exist_ok=True) with filelock.FileLock(lock_path, timeout=300): # 5 min timeout model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") # Pre-warm: trigger model initialization and cache warmup _ = model.get_text_embedding("warmup") return model @pytest.fixture(scope="module") def module_vector_index(shared_embedding_model): """ Module-scoped VectorIndex with shared embedding model. Use this instead of creating VectorIndex() in function-scoped fixtures to avoid redundant model loading (2-4s overhead per load). Note: Module scope means tests share index state. Only use when tests don't mutate the index or when using tmp_path for document isolation. """ return VectorIndex(embedding_model=shared_embedding_model) @pytest.fixture(scope="module") def module_indices(shared_embedding_model): """ Module-scoped indices for integration tests. Returns (vector, keyword, graph) tuple with shared embedding model. Avoids redundant model loading across tests in the same module. Note: Module scope means tests share index state. Ensure tests either: 1. Use separate tmp_path directories for document isolation, OR 2. Don't mutate index state, OR 3. Explicitly clear indices between tests """ vector = VectorIndex(embedding_model=shared_embedding_model) keyword = KeywordIndex() graph = GraphStore() return vector, keyword, graph # ============================================================================ # Persistent Storage Fixtures # ============================================================================ @pytest.fixture(scope="session") def persistent_storage_root(tmp_path_factory) -> Path: """ Create session-scoped persistent storage directory. This directory persists for the entire test session, allowing tests to share data and verify persistence behavior. Returns path to persistent storage root directory. """ return tmp_path_factory.mktemp("persistent_test_storage") @pytest.fixture(scope="session") def persistent_docs_path(persistent_storage_root: Path) -> Path: """ Create session-scoped documents directory. Documents stored here persist across tests in the session. Returns path to persistent documents directory. """ docs_path = persistent_storage_root / "documents" docs_path.mkdir(parents=True, exist_ok=True) return docs_path @pytest.fixture(scope="session") def persistent_index_path(persistent_storage_root: Path) -> Path: """ Create session-scoped index directory. Indices stored here persist across tests in the session. Returns path to persistent index directory. """ index_path = persistent_storage_root / "indices" index_path.mkdir(parents=True, exist_ok=True) return index_path # ============================================================================ # Persistent Configuration Fixtures # ============================================================================ @pytest.fixture(scope="session") def persistent_config( persistent_docs_path: Path, persistent_index_path: Path, ) -> Config: """ Create session-scoped configuration with persistent paths. Uses real persistent storage locations that survive across tests in the session. Returns Config object configured for persistent storage. """ return Config( server=ServerConfig(), indexing=IndexingConfig( documents_path=str(persistent_docs_path), index_path=str(persistent_index_path), recursive=True, ), parsers={"**/*.md": "MarkdownParser"}, search=SearchConfig( semantic_weight=1.0, keyword_weight=1.0, recency_bias=0.5, rrf_k_constant=60, ), llm=LLMConfig(embedding_model="BAAI/bge-small-en-v1.5"), ) # ============================================================================ # Module-Scoped Persistent Fixtures # ============================================================================ @pytest.fixture(scope="module") def persistent_indices_module(shared_embedding_model) -> Generator[tuple[VectorIndex, KeywordIndex, GraphStore], None, None]: """ Create module-scoped indices that persist across tests in a module. These indices are shared across all tests in a module for performance. They start fresh but can accumulate data within a module's test suite. Yields tuple of (vector, keyword, graph) indices. """ vector = VectorIndex(embedding_model=shared_embedding_model) keyword = KeywordIndex() graph = GraphStore() yield vector, keyword, graph @pytest.fixture(scope="module") def persistent_manager_module( persistent_config: Config, persistent_indices_module: tuple[VectorIndex, KeywordIndex, GraphStore], ) -> IndexManager: """ Create module-scoped IndexManager with persistent storage. This manager uses persistent paths and shared indices within a module. Useful for testing persistence behavior and manifest checking. Returns IndexManager configured with persistent storage. """ vector, keyword, graph = persistent_indices_module return IndexManager(persistent_config, vector, keyword, graph) # ============================================================================ # Function-Scoped Persistent Fixtures with Cleanup # ============================================================================ @pytest.fixture def persistent_indices_isolated(shared_embedding_model) -> Generator[tuple[VectorIndex, KeywordIndex, GraphStore], None, None]: """ Create function-scoped indices that can use persistent storage. Fresh indices for each test but can persist to/load from disk. Provides isolation while allowing persistence testing. Yields tuple of (vector, keyword, graph) indices. """ vector = VectorIndex(embedding_model=shared_embedding_model) keyword = KeywordIndex() graph = GraphStore() yield vector, keyword, graph @pytest.fixture def persistent_manager_isolated( persistent_config: Config, persistent_indices_isolated: tuple[VectorIndex, KeywordIndex, GraphStore], ) -> IndexManager: """ Create function-scoped IndexManager with persistent storage. Fresh manager for each test that uses persistent paths. Allows testing persistence across manager instances. Returns IndexManager configured with persistent storage. """ vector, keyword, graph = persistent_indices_isolated return IndexManager(persistent_config, vector, keyword, graph) # ============================================================================ # Hybrid Fixtures (Module-Scoped Config + Function-Scoped Indices) # ============================================================================ @pytest.fixture(scope="module") def persistent_config_module(tmp_path_factory) -> Config: """ Create module-scoped configuration with dedicated module storage. Each test module gets its own persistent storage directory that survives across tests in that module. Returns Config object with module-specific persistent paths. """ base_path = tmp_path_factory.mktemp("module_persistent") docs_path = base_path / "documents" index_path = base_path / "indices" docs_path.mkdir(parents=True, exist_ok=True) index_path.mkdir(parents=True, exist_ok=True) return Config( server=ServerConfig(), indexing=IndexingConfig( documents_path=str(docs_path), index_path=str(index_path), recursive=True, ), parsers={"**/*.md": "MarkdownParser"}, search=SearchConfig( semantic_weight=1.0, keyword_weight=1.0, recency_bias=0.5, rrf_k_constant=60, ), llm=LLMConfig(embedding_model="BAAI/bge-small-en-v1.5"), ) @pytest.fixture def persistent_manager_with_module_config( persistent_config_module: Config, persistent_indices_isolated: tuple[VectorIndex, KeywordIndex, GraphStore], ) -> IndexManager: """ Create function-scoped manager with module-persistent paths. Fresh manager for each test but shares module-level storage paths. Balances isolation with realistic persistence testing. Returns IndexManager with module-scoped persistent storage. """ vector, keyword, graph = persistent_indices_isolated return IndexManager(persistent_config_module, vector, keyword, graph) # ============================================================================ # Cleanup Utilities # ============================================================================ @pytest.fixture def cleanup_persistent_indices(persistent_index_path: Path) -> Generator[None, None, None]: """ Clean up persistent indices after test execution. Use this fixture when you need guaranteed cleanup of persistent storage after a test, even if using session-scoped paths. Example: def test_with_cleanup( persistent_manager_isolated, cleanup_persistent_indices ): # Test code here # Indices will be cleaned up after test pass """ yield # Cleanup after test if persistent_index_path.exists(): import shutil for item in persistent_index_path.iterdir(): if item.is_dir(): shutil.rmtree(item) else: item.unlink() @pytest.fixture def cleanup_persistent_docs(persistent_docs_path: Path) -> Generator[None, None, None]: """ Clean up persistent documents after test execution. Use this fixture when you need guaranteed cleanup of persistent documents after a test. Example: def test_with_doc_cleanup( persistent_docs_path, cleanup_persistent_docs ): # Test code here # Documents will be cleaned up after test pass """ yield # Cleanup after test if persistent_docs_path.exists(): for item in persistent_docs_path.iterdir(): if item.is_dir(): import shutil shutil.rmtree(item) else: item.unlink() # ============================================================================ # pytest-xdist hook to handle serial tests # ============================================================================ def pytest_xdist_auto_num_workers(config): """Hook to configure pytest-xdist behavior for serial tests.""" # Let pytest-xdist determine worker count automatically return None def pytest_collection_modifyitems(config, items): """Mark serial tests to run in the main process.""" for item in items: if "serial" in item.keywords: # Force serial tests to run in dist group 'serial' # This ensures they don't run in parallel with other tests item.add_marker(pytest.mark.xdist_group(name="serial"))

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/andnp/ragdocs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

conftest.py•14.9 KiB