Skip to main content
Glama
test_incremental_indexing.py12.1 kB
""" Unit tests for incremental indexing functionality """ import os import json from unittest.mock import MagicMock, patch, mock_open import pytest from src.file_processor import FileProcessor @pytest.fixture def mock_makedirs(): """Mock os.makedirs to prevent file system errors""" with patch("os.makedirs") as mock: yield mock @patch("src.file_processor.hashlib.sha256") @patch("src.file_processor.os.stat") @patch("builtins.open", new_callable=mock_open, read_data=b"test file content") def test_compute_file_hash(mock_file_open, mock_stat, mock_sha256, mock_makedirs): """Test file hash computation""" # Set up mocks mock_hasher = MagicMock() mock_sha256.return_value = mock_hasher mock_hasher.hexdigest.return_value = "test_hash_digest" with patch.object(FileProcessor, 'load_state') as mock_load_state: # Create file processor with mocked load_state vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Reset the mock_file_open to clear any calls from load_state mock_file_open.reset_mock() # Test hash computation result = processor.compute_file_hash("/test/file.txt") # Verify mock_file_open.assert_called_once_with("/test/file.txt", "rb") assert mock_hasher.update.called assert result == "test_hash_digest" @patch("src.file_processor.os.stat") @patch("src.file_processor.open", new_callable=mock_open, read_data=b"test file content") def test_get_file_stats(mock_file_open, mock_stat, mock_makedirs): """Test file stats retrieval""" # Set up mocks mock_stat_result = MagicMock() mock_stat_result.st_mtime = 12345.6789 mock_stat_result.st_size = 1024 mock_stat.return_value = mock_stat_result with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Mock hash computation processor.compute_file_hash = MagicMock(return_value="test_hash_digest") # Test getting file stats mtime, size, file_hash = processor.get_file_stats("/test/file.txt") # Verify mock_stat.assert_called_once_with("/test/file.txt") processor.compute_file_hash.assert_called_once_with("/test/file.txt") assert mtime == 12345.6789 assert size == 1024 assert file_hash == "test_hash_digest" # Test large file (> 10MB) mock_stat_result.st_size = 11 * 1024 * 1024 mtime, size, file_hash = processor.get_file_stats("/test/large_file.bin") # For large files, we should not compute hash assert size == 11 * 1024 * 1024 assert file_hash == f"size:{size}_mtime:{mtime}" @patch("os.path.isfile") @patch("os.path.join") def test_file_needs_update(mock_join, mock_isfile, mock_makedirs): """Test file change detection logic""" # Set up mocks mock_isfile.return_value = True mock_join.side_effect = lambda *args: "/".join(str(arg) for arg in args) with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Mock file stats processor.get_file_stats = MagicMock(return_value=(12345.6789, 1024, "test_hash_digest")) # Test with file not in metadata (new file) assert processor.file_needs_update("src/new_file.py") is True # Test with file in metadata but modified processor.file_metadata = { "src/modified_file.py": { "mtime": 12345.0, "size": 1024, "hash": "old_hash_digest" } } processor.last_indexed_files = {"src/modified_file.py"} assert processor.file_needs_update("src/modified_file.py") is True # Test with file in metadata and unchanged processor.file_metadata = { "src/unchanged_file.py": { "mtime": 12345.6789, "size": 1024, "hash": "test_hash_digest" } } processor.last_indexed_files = {"src/unchanged_file.py"} assert processor.file_needs_update("src/unchanged_file.py") is False @patch("src.file_processor.os.walk") @patch("src.file_processor.os.path.relpath") def test_get_modified_files(mock_relpath, mock_walk, mock_makedirs): """Test modified files detection""" # Set up mocks mock_walk.return_value = [ ("/test/project", ["src"], ["README.md"]), ("/test/project/src", [], ["main.py", "utils.py", "config.py"]), ] mock_relpath.side_effect = lambda path, start: path.replace(str(start) + "/", "") with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Set up initial state with some previously indexed files processor.last_indexed_files = { "README.md", "src/main.py", "src/utils.py", "src/old_file.py" # This file no longer exists } processor.file_metadata = { "README.md": {"mtime": 12345.0, "size": 512, "hash": "readme_hash"}, "src/main.py": {"mtime": 12345.0, "size": 1024, "hash": "main_hash"}, "src/utils.py": {"mtime": 12345.0, "size": 768, "hash": "utils_hash"}, "src/old_file.py": {"mtime": 12345.0, "size": 256, "hash": "old_hash"}, } # Mock file_needs_update to control which files appear modified def mock_needs_update(rel_path): # README.md and utils.py are modified, main.py is unchanged return rel_path in ["README.md", "src/utils.py", "src/config.py"] processor.file_needs_update = MagicMock(side_effect=mock_needs_update) # Test getting modified files files_to_update, files_to_remove, total_files = processor.get_modified_files() # Verify results assert sorted(files_to_update) == sorted(["README.md", "src/utils.py", "src/config.py"]) assert sorted(files_to_remove) == ["src/old_file.py"] assert total_files == 4 # README.md, main.py, utils.py, config.py @patch("json.dump") @patch("builtins.open", new_callable=mock_open) def test_save_state(mock_file_open, mock_json_dump, mock_makedirs): """Test state saving""" with patch.object(FileProcessor, 'load_state'): # Create file processor vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Set up state processor.last_indexed_files = {"file1.py", "file2.py"} processor.file_metadata = { "file1.py": {"mtime": 123.456, "size": 100, "hash": "hash1"}, "file2.py": {"mtime": 789.012, "size": 200, "hash": "hash2"}, } # Save state processor.save_state() # Verify - use any() to check for the file path since it might be a PosixPath object assert any( call.args[0] == "/test/data/file_processor_state.json" or str(call.args[0]) == "/test/data/file_processor_state.json" for call in mock_file_open.call_args_list ) mock_json_dump.assert_called_once() # Check that we're saving the right data args, kwargs = mock_json_dump.call_args saved_data = args[0] assert "indexed_files" in saved_data assert "file_metadata" in saved_data assert "last_updated" in saved_data assert set(saved_data["indexed_files"]) == {"file1.py", "file2.py"} assert saved_data["file_metadata"] == processor.file_metadata def test_load_state(mock_makedirs): """Test state loading""" # Mock Path.exists() to return True with patch("pathlib.Path.exists", return_value=True), \ patch("builtins.open", mock_open(read_data='{"indexed_files": ["file1.py", "file2.py"], "file_metadata": {"file1.py": {"mtime": 123.456, "size": 100, "hash": "hash1"}, "file2.py": {"mtime": 789.012, "size": 200, "hash": "hash2"}}, "last_updated": 1234567890}')): # First patch load_state to avoid loading during init with patch.object(FileProcessor, 'load_state'): vector_search = MagicMock() processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Then manually call load_state (the real one) processor.load_state() # Check that we loaded the right data assert processor.last_indexed_files == {"file1.py", "file2.py"} assert processor.file_metadata == { "file1.py": {"mtime": 123.456, "size": 100, "hash": "hash1"}, "file2.py": {"mtime": 789.012, "size": 200, "hash": "hash2"}, } @patch("src.file_processor.ThreadPoolExecutor") def test_incremental_indexing(mock_thread_pool, mock_makedirs): """Test incremental indexing functionality""" # Create mocks vector_search = MagicMock() executor = MagicMock() mock_thread_pool.return_value.__enter__.return_value = executor executor.map.return_value = [True, True, False] # 2 successful, 1 failed with patch.object(FileProcessor, 'load_state'): # Create file processor processor = FileProcessor( vector_search=vector_search, project_path="/test/project", ignore_patterns=[], data_dir="/test/data", ) # Create a replacement set of test files that exists processor.last_indexed_files = {"file1.py", "file2.py", "file3.py", "old_file.py"} # Mock methods - do this AFTER initializing the processor processor.get_modified_files = MagicMock(return_value=( ["file1.py", "file2.py", "file3.py"], # files to update ["old_file.py"], # files to remove 4 # total files )) processor.get_file_list = MagicMock(return_value=["file1.py", "file2.py", "file3.py", "file4.py"]) processor.save_state = MagicMock() # Test incremental indexing (default) processor.index_files(incremental=True) # Verify that we used the get_modified_files results processor.get_modified_files.assert_called_once() assert processor.total_files == 4 # Check if vector_search.delete_file was called for removed files vector_search.delete_file.assert_called_once_with("old_file.py") # Verify that we processed the right files executor.map.assert_called_once_with(processor.process_file, ["file1.py", "file2.py", "file3.py"]) # Check if save_state was called processor.save_state.assert_called_once() # Check that files_indexed was updated correctly assert processor.files_indexed == 2 # Only the successful ones # Reset mocks for testing full indexing processor.get_modified_files.reset_mock() vector_search.delete_file.reset_mock() executor.map.reset_mock() processor.save_state.reset_mock() # Test full indexing processor.index_files(incremental=False) # Verify that for full indexing, we didn't use get_modified_files processor.get_modified_files.assert_not_called() # But we did call get_file_list processor.get_file_list.assert_called() # And we processed all files executor.map.assert_called_once_with(processor.process_file, ["file1.py", "file2.py", "file3.py", "file4.py"])

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/randomm/files-db-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server