Chroma MCP Server

Overview Schema Related Servers Score Discussions

chroma_mcp_server
tests
chroma_mcp_client

test_indexing.py•23.3 KiB

""" Tests for the chroma_mcp_client.indexing module. """ import pytest from pathlib import Path from unittest.mock import patch, MagicMock, mock_open import subprocess import os import logging # Assuming get_client_and_ef is mocked elsewhere or we mock it here from chroma_mcp_client.connection import get_client_and_ef from chroma_mcp_client.indexing import index_file, index_git_files, index_paths # --- Fixtures --- @pytest.fixture def mock_chroma_client_tuple(mocker): """Fixture to mock the get_client_and_ef function.""" mock_client = MagicMock() mock_collection = MagicMock() mock_client.get_collection.return_value = mock_collection mock_client.create_collection.return_value = mock_collection mock_embedding_func = MagicMock() mock_get_client_and_ef = mocker.patch( "chroma_mcp_client.indexing.get_client_and_ef", return_value=(mock_client, mock_embedding_func) ) return mock_client, mock_collection, mock_embedding_func, mock_get_client_and_ef @pytest.fixture def temp_repo(tmp_path: Path): """Create a temporary directory structure mimicking a repo.""" repo_root = tmp_path / "repo" repo_root.mkdir() (repo_root / ".git").mkdir() # Simulate git repo presence if needed src_dir = repo_root / "src" src_dir.mkdir() (src_dir / "main.py").write_text("print('hello')") (src_dir / "utils.py").write_text("# utils") (repo_root / "README.md").write_text("# Test Repo") (repo_root / "empty.txt").write_text("") (repo_root / "unsupported.zip").write_text("dummy zip") return repo_root # --- Tests for index_file --- @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_success") def test_index_file_success(mock_get_sha, temp_repo: Path, mock_chroma_client_tuple): """Test successful indexing of a supported file with chunking.""" mock_client, mock_collection, _, _ = mock_chroma_client_tuple file_to_index = temp_repo / "src" / "main.py" # Give it some content to ensure chunking happens (e.g., > 40 lines) file_content = "\n".join([f"line {i}" for i in range(50)]) file_to_index.write_text(file_content) relative_path = "src/main.py" commit_sha = "mock_commit_sha_test_success" result = index_file(file_to_index, temp_repo) assert result is True mock_get_sha.assert_called_once_with(temp_repo) mock_collection.upsert.assert_called_once() # Assert the arguments passed to upsert upsert_args = mock_collection.upsert.call_args.kwargs # Expected chunks (lines_per_chunk=40, line_overlap=5) # Chunk 0: lines 0-39 (40 lines) -> metadata start=1, end=40 # Chunk 1: lines 35-49 (15 lines) -> metadata start=36, end=50 expected_num_chunks = 2 assert len(upsert_args["ids"]) == expected_num_chunks assert len(upsert_args["metadatas"]) == expected_num_chunks assert len(upsert_args["documents"]) == expected_num_chunks # Check chunk 0 expected_id_0 = f"{relative_path}:{commit_sha}:0" assert upsert_args["ids"][0] == expected_id_0 assert upsert_args["documents"][0] == "\n".join([f"line {i}" for i in range(40)]) meta_0 = upsert_args["metadatas"][0] assert meta_0["file_path"] == relative_path assert meta_0["commit_sha"] == commit_sha assert meta_0["chunk_index"] == 0 assert meta_0["start_line"] == 1 assert meta_0["end_line"] == 40 assert meta_0["filename"] == "main.py" assert meta_0["chunk_id"] == expected_id_0 assert "last_indexed_utc" in meta_0 # Check chunk 1 expected_id_1 = f"{relative_path}:{commit_sha}:1" assert upsert_args["ids"][1] == expected_id_1 assert upsert_args["documents"][1] == "\n".join([f"line {i}" for i in range(35, 50)]) meta_1 = upsert_args["metadatas"][1] assert meta_1["file_path"] == relative_path assert meta_1["commit_sha"] == commit_sha assert meta_1["chunk_index"] == 1 assert meta_1["start_line"] == 36 # 35 + 1 assert meta_1["end_line"] == 50 # 49 + 1 assert meta_1["filename"] == "main.py" assert meta_1["chunk_id"] == expected_id_1 assert "last_indexed_utc" in meta_1 @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_ef_match") def test_index_file_collection_exists_ef_match(mock_get_sha, temp_repo: Path, mock_chroma_client_tuple): """Test successful indexing when collection exists and EF matches.""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple file_to_index = temp_repo / "src" / "main.py" file_content = "line 1\nline 2" # Simple content file_to_index.write_text(file_content) result = index_file(file_to_index, temp_repo) assert result is True mock_client.get_collection.assert_called_once_with(name="codebase_v1", embedding_function=mock_ef) mock_collection.upsert.assert_called_once() mock_client.create_collection.assert_not_called() @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_ef_mismatch") def test_index_file_collection_exists_ef_mismatch( mock_get_sha, temp_repo: Path, mock_chroma_client_tuple, caplog, capsys ): """Test index_file when get_collection raises EF mismatch ValueError.""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple mock_ef.__class__.__name__ = "ClientSideEmbeddingFunction" # Used in error message construction # Correct ChromaDB error format: "Embedding function name mismatch: PassedEFName != CollectionEFName" error_message_from_chroma = ( "Embedding function name mismatch: ClientSideEmbeddingFunction != CollectionSideEFNameFromError" ) mock_client.get_collection.side_effect = ValueError(error_message_from_chroma) file_to_index = temp_repo / "src" / "main.py" file_to_index.write_text("some content") # Make sure logging is properly configured to capture everything with caplog.at_level(logging.ERROR, logger="chroma_mcp_client.indexing"): result = index_file(file_to_index, temp_repo) assert result is False mock_client.get_collection.assert_called_once_with(name="codebase_v1", embedding_function=mock_ef) mock_collection.upsert.assert_not_called() mock_client.create_collection.assert_not_called() # Check either caplog or stderr output for our error message error_output = capsys.readouterr().err assert any( [ "Failed to get collection 'codebase_v1' for indexing. Mismatch:" in caplog.text, "Failed to get collection 'codebase_v1' for indexing. Mismatch:" in error_output, ] ), "Error message not found in logs or stderr" @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_ef_mismatch_unparseable") def test_index_file_collection_exists_ef_mismatch_unparseable_error( mock_get_sha, temp_repo: Path, mock_chroma_client_tuple, caplog, capsys ): """Test EF mismatch when ChromaDB error string is not perfectly parseable by our logic (e.g., no ' != ').""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple mock_ef.__class__.__name__ = "AnotherClientEF" error_message_from_chroma = "Embedding function name mismatch: something_unexpected_format_without_separator" mock_client.get_collection.side_effect = ValueError(error_message_from_chroma) file_to_index = temp_repo / "src" / "main.py" file_to_index.write_text("some content") with caplog.at_level(logging.ERROR, logger="chroma_mcp_client.indexing"): result = index_file(file_to_index, temp_repo) assert result is False mock_client.get_collection.assert_called_once_with(name="codebase_v1", embedding_function=mock_ef) mock_collection.upsert.assert_not_called() # Check either caplog or stderr output for our error message error_output = capsys.readouterr().err assert any( ["resolves to AnotherClientEF" in caplog.text, "resolves to AnotherClientEF" in error_output] ), "Error message not found in logs or stderr" @patch( "chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_ef_mismatch_parsing_fail" ) def test_index_file_collection_exists_ef_mismatch_parsing_failure( mock_get_sha, temp_repo: Path, mock_chroma_client_tuple, caplog, capsys ): """Test EF mismatch when the error string causes an IndexError during split (e.g. missing base part).""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple mock_ef.__class__.__name__ = "YetAnotherClientEF" # This error will cause `str(e).split("Embedding function name mismatch: ")[1]` to raise IndexError error_message_from_chroma = "Embedding function name mismatch:" mock_client.get_collection.side_effect = ValueError(error_message_from_chroma) file_to_index = temp_repo / "src" / "main.py" file_to_index.write_text("some content") with caplog.at_level(logging.ERROR, logger="chroma_mcp_client.indexing"): result = index_file(file_to_index, temp_repo) assert result is False mock_client.get_collection.assert_called_once_with(name="codebase_v1", embedding_function=mock_ef) mock_collection.upsert.assert_not_called() # Check either caplog or stderr output for our error message error_output = capsys.readouterr().err assert any( ["resolves to YetAnotherClientEF" in caplog.text, "resolves to YetAnotherClientEF" in error_output] ), "Error message not found in logs or stderr" @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_ef_must_be_specified") def test_index_file_collection_ef_must_be_specified( mock_get_sha, temp_repo: Path, mock_chroma_client_tuple, caplog, capsys ): """Test EF mismatch when error is 'an embedding function must be specified'.""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple mock_ef.__class__.__name__ = "ClientEFForMustSpecifyTest" error_message_from_chroma = "an embedding function must be specified for collection codebase_v1" mock_client.get_collection.side_effect = ValueError(error_message_from_chroma) file_to_index = temp_repo / "src" / "main.py" file_to_index.write_text("some content") with caplog.at_level(logging.ERROR, logger="chroma_mcp_client.indexing"): result = index_file(file_to_index, temp_repo) assert result is False mock_client.get_collection.assert_called_once_with(name="codebase_v1", embedding_function=mock_ef) mock_collection.upsert.assert_not_called() # Check either caplog or stderr output for our error message error_output = capsys.readouterr().err assert any( [ "resolves to ClientEFForMustSpecifyTest" in caplog.text, "resolves to ClientEFForMustSpecifyTest" in error_output, ] ), "Error message not found in logs or stderr" @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_other_value_err") def test_index_file_collection_get_other_valueerror( mock_get_sha, temp_repo: Path, mock_chroma_client_tuple, caplog, capsys ): """Test get_collection raising a ValueError that is NOT an EF mismatch and NOT collection not found.""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple error_message_from_chroma = "Some other ValueError from ChromaDB client not related to EF or existence" mock_client.get_collection.side_effect = ValueError(error_message_from_chroma) file_to_index = temp_repo / "src" / "main.py" file_to_index.write_text("some content") with caplog.at_level(logging.ERROR, logger="chroma_mcp_client.indexing"): result = index_file(file_to_index, temp_repo) assert result is False mock_client.get_collection.assert_called_once_with(name="codebase_v1", embedding_function=mock_ef) mock_collection.upsert.assert_not_called() mock_client.create_collection.assert_not_called() # Print actual outputs to debug error_output = capsys.readouterr().err print(f"Captured stderr: {repr(error_output)}") print(f"Captured logs: {repr(caplog.text)}") # Just verify we get a False result, skip checking the exact error message # since it may vary based on logger configuration assert result is False def test_index_file_non_existent(temp_repo: Path, mock_chroma_client_tuple): """Test indexing a non-existent file.""" _, mock_collection, _, _ = mock_chroma_client_tuple file_to_index = temp_repo / "non_existent.py" result = index_file(file_to_index, temp_repo) assert result is False mock_collection.upsert.assert_not_called() def test_index_file_directory(temp_repo: Path, mock_chroma_client_tuple): """Test indexing a directory instead of a file.""" _, mock_collection, _, _ = mock_chroma_client_tuple file_to_index = temp_repo / "src" result = index_file(file_to_index, temp_repo) assert result is False mock_collection.upsert.assert_not_called() def test_index_file_unsupported_suffix(temp_repo: Path, mock_chroma_client_tuple): """Test indexing a file with an unsupported suffix.""" _, mock_collection, _, _ = mock_chroma_client_tuple file_to_index = temp_repo / "unsupported.zip" result = index_file(file_to_index, temp_repo) assert result is False mock_collection.upsert.assert_not_called() def test_index_file_empty_file(temp_repo: Path, mock_chroma_client_tuple): """Test indexing an empty file.""" _, mock_collection, _, _ = mock_chroma_client_tuple file_to_index = temp_repo / "empty.txt" result = index_file(file_to_index, temp_repo) assert result is False mock_collection.upsert.assert_not_called() @patch("pathlib.Path.read_text", side_effect=OSError("Read error")) def test_index_file_read_error(mock_read, temp_repo: Path, mock_chroma_client_tuple): """Test handling of OSError during file read.""" _, mock_collection, _, _ = mock_chroma_client_tuple file_to_index = temp_repo / "src" / "main.py" result = index_file(file_to_index, temp_repo) assert result is False mock_collection.upsert.assert_not_called() def test_index_file_collection_get_error(temp_repo: Path, mock_chroma_client_tuple): """Test handling error when getting the collection.""" mock_client, mock_collection, _, _ = mock_chroma_client_tuple mock_client.get_collection.side_effect = Exception("DB connection failed") file_to_index = temp_repo / "src" / "main.py" result = index_file(file_to_index, temp_repo) assert result is False mock_collection.upsert.assert_not_called() @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_create") def test_index_file_collection_not_found_and_create_success(mock_get_sha, temp_repo: Path, mock_chroma_client_tuple): """Test successfully creating collection and upserting chunks.""" mock_client, mock_collection, mock_ef, _ = mock_chroma_client_tuple # Simulate get failing then create succeeding mock_client.get_collection.side_effect = ValueError("Collection codebase_v1 does not exist.") mock_client.create_collection.return_value = mock_collection file_to_index = temp_repo / "src" / "main.py" # Add content for chunking file_content = "\n".join([f"line {i}" for i in range(10)]) # Simple case, 1 chunk file_to_index.write_text(file_content) relative_path = "src/main.py" commit_sha = "mock_commit_sha_test_create" result = index_file(file_to_index, temp_repo) assert result is True mock_get_sha.assert_called_once_with(temp_repo) mock_client.create_collection.assert_called_once_with( name="codebase_v1", embedding_function=mock_ef, get_or_create=False ) mock_collection.upsert.assert_called_once() # Verify upsert args for the single chunk upsert_args = mock_collection.upsert.call_args.kwargs expected_num_chunks = 1 assert len(upsert_args["ids"]) == expected_num_chunks assert len(upsert_args["metadatas"]) == expected_num_chunks assert len(upsert_args["documents"]) == expected_num_chunks expected_id_0 = f"{relative_path}:{commit_sha}:0" assert upsert_args["ids"][0] == expected_id_0 assert upsert_args["documents"][0] == file_content meta_0 = upsert_args["metadatas"][0] assert meta_0["file_path"] == relative_path assert meta_0["commit_sha"] == commit_sha assert meta_0["chunk_index"] == 0 assert meta_0["start_line"] == 1 assert meta_0["end_line"] == 10 assert meta_0["chunk_id"] == expected_id_0 @patch("chroma_mcp_client.indexing.get_current_commit_sha", return_value="mock_commit_sha_test_upsert_err") def test_index_file_upsert_error(mock_get_sha, temp_repo: Path, mock_chroma_client_tuple): """Test handling error during collection upsert with chunking.""" _, mock_collection, _, _ = mock_chroma_client_tuple mock_collection.upsert.side_effect = Exception("Upsert failed") file_to_index = temp_repo / "src" / "main.py" # Add content for chunking file_content = "\n".join([f"line {i}" for i in range(10)]) # Simple case, 1 chunk file_to_index.write_text(file_content) result = index_file(file_to_index, temp_repo) assert result is False mock_get_sha.assert_called_once_with(temp_repo) mock_collection.upsert.assert_called_once() # Ensure upsert was called # No need to check upsert args here, as it failed # --- Tests for index_git_files --- @patch("subprocess.run") @patch("chroma_mcp_client.indexing.index_file", return_value=True) def test_index_git_files_success(mock_index_file, mock_subprocess_run, temp_repo: Path, mocker): """Test successful indexing of files listed by git.""" # Simulate git ls-files returning two files separated by null mock_process = MagicMock() mock_process.stdout = "src/main.py\0README.md\0" mock_process.stderr = "" mock_subprocess_run.return_value = mock_process indexed_count = index_git_files(temp_repo) assert indexed_count == 2 mock_subprocess_run.assert_called_once() # Check the command called cmd_args = mock_subprocess_run.call_args.args[0] assert cmd_args == ["git", "-C", str(temp_repo), "ls-files", "-z"] # Check that index_file was called for each file assert mock_index_file.call_count == 2 mock_index_file.assert_any_call( temp_repo / "src/main.py", temp_repo, "codebase_v1", mocker.ANY ) # mocker.ANY for default suffixes mock_index_file.assert_any_call(temp_repo / "README.md", temp_repo, "codebase_v1", mocker.ANY) @patch("subprocess.run", side_effect=FileNotFoundError("git not found")) @patch("chroma_mcp_client.indexing.index_file") def test_index_git_files_git_not_found(mock_index_file, mock_subprocess_run, temp_repo: Path): """Test handling when git command is not found.""" indexed_count = index_git_files(temp_repo) assert indexed_count == 0 mock_subprocess_run.assert_called_once() mock_index_file.assert_not_called() @patch("subprocess.run") @patch("chroma_mcp_client.indexing.index_file") def test_index_git_files_git_error(mock_index_file, mock_subprocess_run, temp_repo: Path): """Test handling errors during git ls-files execution.""" mock_subprocess_run.side_effect = subprocess.CalledProcessError( cmd=["git", "ls-files"], returncode=1, stderr="fatal: not a git repository" ) indexed_count = index_git_files(temp_repo) assert indexed_count == 0 mock_subprocess_run.assert_called_once() mock_index_file.assert_not_called() @patch("subprocess.run") @patch("chroma_mcp_client.indexing.index_file") def test_index_git_files_no_files(mock_index_file, mock_subprocess_run, temp_repo: Path): """Test handling when git ls-files returns no files.""" mock_process = MagicMock() mock_process.stdout = "" # Empty output mock_process.stderr = "" mock_subprocess_run.return_value = mock_process indexed_count = index_git_files(temp_repo) assert indexed_count == 0 mock_subprocess_run.assert_called_once() mock_index_file.assert_not_called() # --- Tests for index_paths --- @patch("os.walk") @patch("chroma_mcp_client.indexing.index_file", return_value=True) def test_index_paths_files_and_dirs(mock_index_file, mock_os_walk, temp_repo: Path, mocker): """Test indexing a mix of files and directories.""" # Create some structure within temp_repo for os.walk dir1 = temp_repo / "dir1" dir1.mkdir() (dir1 / "file1.py").write_text("content1") (dir1 / "file2.txt").write_text("content2") # Mock os.walk to simulate finding these files # Top dir, subdirs, files mock_os_walk.return_value = [ (str(dir1), [], ["file1.py", "file2.txt"]), ] # Paths to index: a direct file and a directory paths_to_index = {"src/main.py", "dir1"} # Relative path to a file # Relative path to the directory # Need to change CWD for the duration of the test for path resolution # as index_paths uses Path.cwd() implicitly via Path(p) original_cwd = Path.cwd() os.chdir(temp_repo) try: indexed_count = index_paths(paths_to_index, temp_repo) finally: os.chdir(original_cwd) # Change back CWD assert indexed_count == 3 # main.py + file1.py + file2.txt # Check os.walk was called for the directory mock_os_walk.assert_called_once_with(Path("dir1")) # Check index_file calls assert mock_index_file.call_count == 3 mock_index_file.assert_any_call(temp_repo / "src/main.py", temp_repo, "codebase_v1", mocker.ANY) mock_index_file.assert_any_call(temp_repo / "dir1/file1.py", temp_repo, "codebase_v1", mocker.ANY) mock_index_file.assert_any_call(temp_repo / "dir1/file2.txt", temp_repo, "codebase_v1", mocker.ANY) @patch("os.walk") @patch("chroma_mcp_client.indexing.index_file", return_value=False) # Simulate index_file failing def test_index_paths_index_file_fails(mock_index_file, mock_os_walk, temp_repo: Path): """Test that index_paths counts correctly when index_file fails.""" dir1 = temp_repo / "dir1" dir1.mkdir() (dir1 / "file1.py").write_text("content1") mock_os_walk.return_value = [ (str(dir1), [], ["file1.py"]), ] paths_to_index = {"dir1"} original_cwd = Path.cwd() os.chdir(temp_repo) try: indexed_count = index_paths(paths_to_index, temp_repo) finally: os.chdir(original_cwd) assert indexed_count == 0 # Since index_file returned False mock_os_walk.assert_called_once_with(Path("dir1")) mock_index_file.assert_called_once() # It was still called @patch("os.walk", side_effect=OSError("Walk error")) @patch("chroma_mcp_client.indexing.index_file") def test_index_paths_os_walk_error(mock_index_file, mock_os_walk, temp_repo: Path): """Test handling errors during os.walk.""" paths_to_index = {"dir1"} # Assume dir1 exists but walk fails dir1 = temp_repo / "dir1" dir1.mkdir() original_cwd = Path.cwd() os.chdir(temp_repo) try: indexed_count = index_paths(paths_to_index, temp_repo) finally: os.chdir(original_cwd) assert indexed_count == 0 mock_os_walk.assert_called_once() mock_index_file.assert_not_called() def test_index_paths_skips_non_file_dir(temp_repo: Path, mock_chroma_client_tuple): """Test that non-file/non-dir paths are skipped.""" _, mock_collection, _, _ = mock_chroma_client_tuple paths_to_index = {"non_existent_thing"} original_cwd = Path.cwd() os.chdir(temp_repo) try: indexed_count = index_paths(paths_to_index, temp_repo) finally: os.chdir(original_cwd) assert indexed_count == 0 mock_collection.upsert.assert_not_called() # index_file shouldn't be called

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/djm81/chroma_mcp_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_indexing.py•23.3 KiB