ChunkHound

Overview Schema Related Servers Score Discussions

chunkhound
tests
integration

test_lancedb_checksums.py•3.79 kB

import asyncio from pathlib import Path import pytest def test_lancedb_checksums_saved_on_first_index(lancedb_provider, tmp_path): """Verify checksums are computed and saved during first indexing pass with LanceDB.""" from chunkhound.services.indexing_coordinator import IndexingCoordinator from chunkhound.utils.hashing import compute_file_hash # Create test files test_files = [] for i in range(3): p = tmp_path / f"test{i}.py" p.write_text(f"# Test file {i}\nprint('hello')\n") test_files.append(p) coord = IndexingCoordinator(database_provider=lancedb_provider, base_directory=tmp_path) # First indexing pass - files are new result1 = asyncio.run( coord.process_directory(tmp_path, patterns=["**/*.py"], exclude_patterns=[]) ) assert result1["files_processed"] == 3, "Should process 3 new files" assert result1.get("skipped_unchanged", 0) == 0, "No files should be skipped on first pass" # Verify checksums were saved in database for p in test_files: rel_path = p.relative_to(tmp_path).as_posix() db_file = lancedb_provider.get_file_by_path(rel_path, as_model=False) assert db_file is not None, f"File {rel_path} should exist in DB" # Verify checksum was saved db_hash = db_file.get("content_hash") assert db_hash is not None, f"File {rel_path} should have checksum saved" assert db_hash != "", f"File {rel_path} checksum should not be empty" # Verify checksum matches actual file content expected_hash = compute_file_hash(p) assert db_hash == expected_hash, f"Checksum for {rel_path} should match actual content" # Second indexing pass - files unchanged result2 = asyncio.run( coord.process_directory(tmp_path, patterns=["**/*.py"], exclude_patterns=[]) ) # Files should be skipped on second pass assert result2["files_processed"] == 0, "No files should be processed on second pass" assert result2.get("skipped_unchanged", 0) == 3, "All 3 files should be skipped on second pass" def test_lancedb_checksum_skip_unchanged(lancedb_provider, tmp_path, monkeypatch: pytest.MonkeyPatch): """Verify LanceDB skips files with matching checksums.""" from chunkhound.core.models.file import File from chunkhound.core.types.common import Language from chunkhound.services.indexing_coordinator import IndexingCoordinator from chunkhound.utils.hashing import compute_file_hash # Create files and insert prior state with content hash files = [] for i in range(2): p = tmp_path / f"f{i}.txt" p.write_text("hello") st = p.stat() # Compute content hash to simulate a previous index with checksum content_hash = compute_file_hash(p) f = File( path=(p.relative_to(tmp_path)).as_posix(), mtime=float(st.st_mtime), language=Language.TEXT, size_bytes=int(st.st_size), content_hash=content_hash, ) lancedb_provider.insert_file(f) files.append(p) coord = IndexingCoordinator(database_provider=lancedb_provider, base_directory=tmp_path) # Avoid parsing: record what would be parsed called = [] async def _fake(files, config_file_size_threshold_kb=20, parse_task=None, on_batch=None): called.append(list(files)) return [] monkeypatch.setattr(coord, "_process_files_in_batches", _fake) result = asyncio.run( coord.process_directory( tmp_path, patterns=["**/*.txt"], exclude_patterns=[], config_file_size_threshold_kb=20 ) ) assert result["files_processed"] == 0 assert result.get("skipped_unchanged", 0) == 2 assert called and len(called[0]) == 0

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ofriw/chunkhound'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_lancedb_checksums.py•3.79 kB