Files-DB-MCP

Overview Schema Related Servers Score Discussions

test_incremental_indexing.py•11.8 KiB

""" Unit tests for incremental indexing functionality """ import os import json from unittest.mock import MagicMock, patch, mock_open import pytest from src.file_processor import FileProcessor @pytest.fixture def mock_makedirs(): """Mock os.makedirs to prevent file system errors""" with patch("os.makedirs") as mock: yield mock @patch("src.file_processor.hashlib.sha256") @patch("src.file_processor.os.stat") @patch("builtins.open", new_callable=mock_open, read_data=b"test file content") def test_compute_file_hash(mock_file_open, mock_stat, mock_sha256, mock_makedirs): """Test file hash computation""" # Set up mocks mock_hasher = MagicMock() mock_sha256.return_value = mock_hasher mock_hasher.hexdigest.return_value = "test_hash_digest" with patch.object(FileProcessor, 'load_state') as mock_load_state: # Create file processor with mocked load_state vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Reset the mock_file_open to clear any calls from load_state mock_file_open.reset_mock() # Test hash computation result = processor.compute_file_hash("/test/file.txt") # Verify mock_file_open.assert_called_once_with("/test/file.txt", "rb") assert mock_hasher.update.called assert result == "test_hash_digest" @patch("src.file_processor.os.stat") @patch("src.file_processor.open", new_callable=mock_open, read_data=b"test file content") def test_get_file_stats(mock_file_open, mock_stat, mock_makedirs): """Test file stats retrieval""" # Set up mocks mock_stat_result = MagicMock() mock_stat_result.st_mtime = 12345.6789 mock_stat_result.st_size = 1024 mock_stat.return_value = mock_stat_result with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Mock hash computation processor.compute_file_hash = MagicMock(return_value="test_hash_digest") # Test getting file stats mtime, size, file_hash = processor.get_file_stats("/test/file.txt") # Verify mock_stat.assert_called_once_with("/test/file.txt") processor.compute_file_hash.assert_called_once_with("/test/file.txt") assert mtime == 12345.6789 assert size == 1024 assert file_hash == "test_hash_digest" # Test large file (> 10MB) mock_stat_result.st_size = 11 * 1024 * 1024 mtime, size, file_hash = processor.get_file_stats("/test/large_file.bin") # For large files, we should not compute hash assert size == 11 * 1024 * 1024 assert file_hash == f"size:{size}_mtime:{mtime}" @patch("os.path.isfile") @patch("os.path.join") def test_file_needs_update(mock_join, mock_isfile, mock_makedirs): """Test file change detection logic""" # Set up mocks mock_isfile.return_value = True mock_join.side_effect = lambda *args: "/".join(str(arg) for arg in args) with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Mock file stats processor.get_file_stats = MagicMock(return_value=(12345.6789, 1024, "test_hash_digest")) # Test with file not in metadata (new file) assert processor.file_needs_update("src/new_file.py") is True # Test with file in metadata but modified processor.file_metadata = { "src/modified_file.py": { "mtime": 12345.0, "size": 1024, "hash": "old_hash_digest" } } processor.last_indexed_files = {"src/modified_file.py"} assert processor.file_needs_update("src/modified_file.py") is True # Test with file in metadata and unchanged processor.file_metadata = { "src/unchanged_file.py": { "mtime": 12345.6789, "size": 1024, "hash": "test_hash_digest" } } processor.last_indexed_files = {"src/unchanged_file.py"} assert processor.file_needs_update("src/unchanged_file.py") is False @patch("src.file_processor.os.walk") @patch("src.file_processor.os.path.relpath") def test_get_modified_files(mock_relpath, mock_walk, mock_makedirs): """Test modified files detection""" # Set up mocks mock_walk.return_value = [ ("/test/project", ["src"], ["README.md"]), ("/test/project/src", [], ["main.py", "utils.py", "config.py"]), ] mock_relpath.side_effect = lambda path, start: path.replace(str(start) + "/", "") with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Set up initial state with some previously indexed files processor.last_indexed_files = { "README.md", "src/main.py", "src/utils.py", "src/old_file.py" # This file no longer exists } processor.file_metadata = { "README.md": {"mtime": 12345.0, "size": 512, "hash": "readme_hash"}, "src/main.py": {"mtime": 12345.0, "size": 1024, "hash": "main_hash"}, "src/utils.py": {"mtime": 12345.0, "size": 768, "hash": "utils_hash"}, "src/old_file.py": {"mtime": 12345.0, "size": 256, "hash": "old_hash"}, } # Mock file_needs_update to control which files appear modified def mock_needs_update(rel_path): # README.md and utils.py are modified, main.py is unchanged return rel_path in ["README.md", "src/utils.py", "src/config.py"] processor.file_needs_update = MagicMock(side_effect=mock_needs_update) # Test getting modified files files_to_update, files_to_remove, total_files = processor.get_modified_files() # Verify results assert sorted(files_to_update) == sorted(["README.md", "src/utils.py", "src/config.py"]) assert sorted(files_to_remove) == ["src/old_file.py"] assert total_files == 4 # README.md, main.py, utils.py, config.py @patch("json.dump") @patch("builtins.open", new_callable=mock_open) def test_save_state(mock_file_open, mock_json_dump, mock_makedirs): """Test state saving""" with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Set up state processor.last_indexed_files = {"file1.py", "file2.py"} processor.file_metadata = { "file1.py": {"mtime": 123.456, "size": 100, "hash": "hash1"}, "file2.py": {"mtime": 789.012, "size": 200, "hash": "hash2"}, } # Save state processor.save_state() # Verify - use any() to check for the file path since it might be a PosixPath object assert any( call.args[0] == "/test/data/file_processor_state.json" or str(call.args[0]) == "/test/data/file_processor_state.json" for call in mock_file_open.call_args_list ) mock_json_dump.assert_called_once() # Check that we're saving the right data args, kwargs = mock_json_dump.call_args saved_data = args[0] assert "indexed_files" in saved_data assert "file_metadata" in saved_data assert "last_updated" in saved_data assert set(saved_data["indexed_files"]) == {"file1.py", "file2.py"} assert saved_data["file_metadata"] == processor.file_metadata def test_load_state(mock_makedirs): """Test state loading""" # Mock Path.exists() to return True with patch("pathlib.Path.exists", return_value=True), \ patch("builtins.open", mock_open(read_data='{"indexed_files": ["file1.py", "file2.py"], "file_metadata": {"file1.py": {"mtime": 123.456, "size": 100, "hash": "hash1"}, "file2.py": {"mtime": 789.012, "size": 200, "hash": "hash2"}}, "last_updated": 1234567890}')): # First patch load_state to avoid loading during init with patch.object(FileProcessor, 'load_state'): vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Then manually call load_state (the real one) processor.load_state() # Check that we loaded the right data assert processor.last_indexed_files == {"file1.py", "file2.py"} assert processor.file_metadata == { "file1.py": {"mtime": 123.456, "size": 100, "hash": "hash1"}, "file2.py": {"mtime": 789.012, "size": 200, "hash": "hash2"}, } @patch("src.file_processor.ThreadPoolExecutor") def test_incremental_indexing(mock_thread_pool, mock_makedirs): """Test incremental indexing functionality""" # Create mocks vector_search = MagicMock() executor = MagicMock() mock_thread_pool.return_value.__enter__.return_value = executor executor.map.return_value = [True, True, False] # 2 successful, 1 failed with patch.object(FileProcessor, 'load_state'): # Create file processor processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Create a replacement set of test files that exists processor.last_indexed_files = {"file1.py", "file2.py", "file3.py", "old_file.py"} # Mock methods - do this AFTER initializing the processor processor.get_modified_files = MagicMock(return_value=( ["file1.py", "file2.py", "file3.py"], # files to update ["old_file.py"], # files to remove 4 # total files )) processor.get_file_list = MagicMock(return_value=["file1.py", "file2.py", "file3.py", "file4.py"]) processor.save_state = MagicMock() # Test incremental indexing (default) processor.index_files(incremental=True) # Verify that we used the get_modified_files results processor.get_modified_files.assert_called_once() assert processor.total_files == 4 # Check if vector_search.delete_file was called for removed files vector_search.delete_file.assert_called_once_with("old_file.py") # Verify that we processed the right files executor.map.assert_called_once_with(processor.process_file, ["file1.py", "file2.py", "file3.py"]) # Check if save_state was called processor.save_state.assert_called_once() # Check that files_indexed was updated correctly assert processor.files_indexed == 2 # Only the successful ones # Reset mocks for testing full indexing processor.get_modified_files.reset_mock() vector_search.delete_file.reset_mock() executor.map.reset_mock() processor.save_state.reset_mock() # Test full indexing processor.index_files(incremental=False) # Verify that for full indexing, we didn't use get_modified_files processor.get_modified_files.assert_not_called() # But we did call get_file_list processor.get_file_list.assert_called() # And we processed all files executor.map.assert_called_once_with(processor.process_file, ["file1.py", "file2.py", "file3.py", "file4.py"])

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/randomm/files-db-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_incremental_indexing.py•11.8 KiB