Markdown RAG Documentation

Overview Schema Related Servers Score Discussions

ragdocs-mcp
tests
integration

test_file_watching.py•10.9 KiB

""" Integration tests for File Watcher (D10). Tests FileWatcher's ability to detect file system events (create, modify, delete) and trigger appropriate indexing operations through IndexManager. Uses real components with temporary storage and async test patterns. """ import asyncio from pathlib import Path import pytest from src.config import Config, IndexingConfig, LLMConfig, SearchConfig, ServerConfig from src.indexing.manager import IndexManager from src.indexing.watcher import FileWatcher from src.indices.graph import GraphStore from src.indices.keyword import KeywordIndex from src.indices.vector import VectorIndex @pytest.fixture def config(tmp_path): """ Create test configuration with temporary paths. Uses tmp_path for isolated test storage to avoid conflicts. """ docs_path = tmp_path / "docs" docs_path.mkdir() return Config( server=ServerConfig(), indexing=IndexingConfig( documents_path=str(docs_path), index_path=str(tmp_path / "indices"), ), parsers={"**/*.md": "MarkdownParser"}, search=SearchConfig(), llm=LLMConfig(), ) @pytest.fixture def indices(shared_embedding_model): """ Create real index instances. Returns tuple of (vector, keyword, graph) indices for IndexManager. """ vector = VectorIndex(embedding_model=shared_embedding_model) keyword = KeywordIndex() graph = GraphStore() return vector, keyword, graph @pytest.fixture def manager(config, indices): """ Create IndexManager with real indices. Provides fully functional manager for integration testing. """ vector, keyword, graph = indices return IndexManager(config, vector, keyword, graph) @pytest.fixture def watcher(config, manager): """ Create FileWatcher with real IndexManager. Uses short cooldown (0.2s) for faster test execution. """ return FileWatcher( documents_path=config.indexing.documents_path, index_manager=manager, cooldown=0.2, include_patterns=config.indexing.include, exclude_patterns=config.indexing.exclude, exclude_hidden_dirs=config.indexing.exclude_hidden_dirs, ) @pytest.mark.asyncio async def test_detect_file_creation_and_index(watcher, manager, config, tmp_path): """ Test that FileWatcher detects new file creation and indexes it. Validates the watcher correctly monitors file system events and triggers indexing for newly created markdown files. """ # Start watcher watcher.start() try: # Create new markdown file docs_path = Path(config.indexing.documents_path) new_file = docs_path / "new_document.md" new_file.write_text("# New Document\n\nThis is a newly created document.") # Wait for watcher to detect, debounce, and process (longer for filesystem) await asyncio.sleep(1.0) # Verify document was indexed (count increased) doc_count = manager.get_document_count() assert doc_count > 0 finally: await watcher.stop() @pytest.mark.asyncio async def test_detect_file_modification_and_reindex(watcher, manager, config, tmp_path): """ Test that FileWatcher detects file modifications and re-indexes. Ensures the watcher triggers re-indexing when an existing file is modified, updating all indices with the new content. """ docs_path = Path(config.indexing.documents_path) test_file = docs_path / "modified_doc.md" # Create initial file test_file.write_text("# Original Content\n\nThis is the original text.") # Start watcher watcher.start() try: # Wait for initial indexing await asyncio.sleep(1.0) # Modify the file test_file.write_text("# Modified Content\n\nThis text has been updated.") # Wait for debounce + processing await asyncio.sleep(1.0) # Verify updated content is indexed (count remains stable) doc_count = manager.get_document_count() assert doc_count > 0 finally: await watcher.stop() @pytest.mark.asyncio async def test_detect_file_deletion_and_remove_from_index( watcher, manager, config, tmp_path ): """ Test that FileWatcher detects file deletion and removes from indices. Validates cleanup mechanism: deleted files should be removed from all indices to prevent stale search results. """ docs_path = Path(config.indexing.documents_path) test_file = docs_path / "to_delete.md" # Start watcher first watcher.start() try: # Create and index file test_file.write_text("# Document to Delete\n\nThis will be removed.") # Wait for initial indexing await asyncio.sleep(1.0) # Verify document exists (count > 0) count_before = manager.get_document_count() assert count_before > 0 # Delete the file test_file.unlink() # Wait for debounce + processing await asyncio.sleep(1.0) # Verify operation completed (removal processed) # Test passes if no exception occurred during removal finally: await watcher.stop() @pytest.mark.asyncio async def test_debouncing_multiple_rapid_changes(watcher, manager, config, tmp_path): """ Test that rapid file changes are debounced into single index operation. Validates debouncing behavior: multiple quick edits to the same file should not trigger excessive re-indexing, improving performance. """ docs_path = Path(config.indexing.documents_path) test_file = docs_path / "rapid_changes.md" # Start watcher watcher.start() try: # Make rapid consecutive changes (faster than debounce window) test_file.write_text("# Version 1\n\nFirst version.") await asyncio.sleep(0.05) test_file.write_text("# Version 2\n\nSecond version.") await asyncio.sleep(0.05) test_file.write_text("# Version 3\n\nFinal version.") # Wait for debounce + processing await asyncio.sleep(1.0) # Verify final version is indexed doc_count = manager.get_document_count() assert doc_count > 0 finally: await watcher.stop() @pytest.mark.asyncio async def test_batch_processing_multiple_files(watcher, manager, config, tmp_path): """ Test that multiple file changes are batched and processed together. Validates batch processing: creating/modifying multiple files within the debounce window should trigger a single batch operation, improving efficiency for bulk operations. """ docs_path = Path(config.indexing.documents_path) # Start watcher watcher.start() try: # Create multiple files in quick succession file1 = docs_path / "batch_file1.md" file2 = docs_path / "batch_file2.md" file3 = docs_path / "batch_file3.md" file1.write_text("# Batch File 1\n\nFirst batch file.") await asyncio.sleep(0.05) file2.write_text("# Batch File 2\n\nSecond batch file.") await asyncio.sleep(0.05) file3.write_text("# Batch File 3\n\nThird batch file.") # Wait for debounce + batch processing await asyncio.sleep(1.0) # Verify all files were indexed (count reflects batch) doc_count = manager.get_document_count() assert doc_count >= 3 finally: await watcher.stop() @pytest.mark.asyncio async def test_watcher_handles_non_markdown_files(watcher, manager, config, tmp_path): """ Test that FileWatcher ignores non-markdown files. Ensures the watcher only processes configured file types (.md, .markdown) and ignores other files in the watched directory. """ docs_path = Path(config.indexing.documents_path) # Start watcher watcher.start() try: # Create markdown file md_file = docs_path / "valid.md" md_file.write_text("# Valid Markdown\n\nShould be indexed.") # Create non-markdown files txt_file = docs_path / "ignore.txt" txt_file.write_text("This should be ignored.") py_file = docs_path / "script.py" py_file.write_text("print('ignored')") # Wait for debounce + processing await asyncio.sleep(1.0) # Verify only markdown file was indexed (count == 1) doc_count = manager.get_document_count() assert doc_count == 1 finally: await watcher.stop() @pytest.mark.asyncio async def test_watcher_excludes_venv_directories(watcher, manager, config, tmp_path): """ Test that FileWatcher ignores files in excluded directories like .venv. Validates exclude pattern filtering: markdown files inside .venv, venv, node_modules, etc. should not be indexed even if they're valid markdown. """ docs_path = Path(config.indexing.documents_path) # Start watcher watcher.start() try: # Create markdown file in normal location (should be indexed) normal_file = docs_path / "normal.md" normal_file.write_text("# Normal Document\n\nShould be indexed.") # Create markdown file in .venv (should be excluded) venv_dir = docs_path / ".venv" / "lib" / "python3.13" / "site-packages" venv_dir.mkdir(parents=True) venv_file = venv_dir / "README.md" venv_file.write_text("# Venv Package\n\nShould NOT be indexed.") # Create markdown file in node_modules (should be excluded) node_dir = docs_path / "node_modules" / "some-package" node_dir.mkdir(parents=True) node_file = node_dir / "README.md" node_file.write_text("# Node Package\n\nShould NOT be indexed.") # Wait for debounce + processing await asyncio.sleep(1.0) # Verify only normal file was indexed (count == 1) doc_count = manager.get_document_count() assert doc_count == 1 finally: await watcher.stop() @pytest.mark.asyncio async def test_watcher_excludes_hidden_directories(watcher, manager, config, tmp_path): """ Test that FileWatcher ignores files in hidden directories. Validates exclude_hidden_dirs option: files in directories starting with '.' should be excluded by default. """ docs_path = Path(config.indexing.documents_path) # Start watcher watcher.start() try: # Create markdown file in normal location (should be indexed) normal_file = docs_path / "visible.md" normal_file.write_text("# Visible Document\n\nShould be indexed.") # Create markdown file in hidden directory (should be excluded) hidden_dir = docs_path / ".hidden" hidden_dir.mkdir() hidden_file = hidden_dir / "secret.md" hidden_file.write_text("# Hidden Document\n\nShould NOT be indexed.") # Wait for debounce + processing await asyncio.sleep(1.0) # Verify only visible file was indexed (count == 1) doc_count = manager.get_document_count() assert doc_count == 1 finally: await watcher.stop()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/andnp/ragdocs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_file_watching.py•10.9 KiB