test_server_yellhornignore.py•27 kB
"""Tests for .yellhornignore and .yellhorncontext functionality."""
import os
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from yellhorn_mcp.formatters import get_codebase_snapshot
@pytest.mark.asyncio
async def test_yellhornignore_file_reading():
"""Test reading .yellhornignore file."""
# Create a temporary directory with a .yellhornignore file
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
# Create a .yellhornignore file with patterns
yellhornignore_file = tmp_path / ".yellhornignore"
yellhornignore_file.write_text(
"# Comment line\n"
"*.log\n"
"node_modules/\n"
"\n" # Empty line should be skipped
"dist/\n"
)
# Create a mock git command function
call_count = 0
async def mock_git_func(repo_path, command, git_func=None):
nonlocal call_count
if call_count == 0:
# First call: tracked files (ls-files)
call_count += 1
return "\n".join(
[
"file1.py",
"file2.js",
"src/components/Button.js",
]
)
else:
# Second call: untracked files
return "\n".join(
[
"file3.log",
"node_modules/package.json",
"dist/bundle.js",
]
)
# Create a test file that can be read
(tmp_path / "file1.py").write_text("# Test file 1")
(tmp_path / "file2.js").write_text("// Test file 2")
# Create directory structure for testing
os.makedirs(tmp_path / "node_modules")
os.makedirs(tmp_path / "dist")
os.makedirs(tmp_path / "src/components")
(tmp_path / "node_modules/package.json").write_text("{}")
(tmp_path / "dist/bundle.js").write_text("/* bundle */")
(tmp_path / "src/components/Button.js").write_text("// Button component")
(tmp_path / "file3.log").write_text("log data")
# Call get_codebase_snapshot with the mock function
file_paths, file_contents = await get_codebase_snapshot(
tmp_path, git_command_func=mock_git_func
)
# Verify that ignored files are not in results
assert "file1.py" in file_paths
assert "file2.js" in file_paths
assert "src/components/Button.js" in file_paths
assert "file3.log" not in file_paths # Ignored by *.log
assert "node_modules/package.json" not in file_paths # Ignored by node_modules/
assert "dist/bundle.js" not in file_paths # Ignored by dist/
# Verify contents
assert "file1.py" in file_contents
assert "file2.js" in file_contents
assert "file3.log" not in file_contents
assert "node_modules/package.json" not in file_contents
assert "dist/bundle.js" not in file_contents
@pytest.mark.asyncio
async def test_yellhornignore_file_error_handling():
"""Test error handling when reading .yellhornignore file."""
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
# Create a .yellhornignore file
yellhornignore_path = tmp_path / ".yellhornignore"
yellhornignore_path.write_text("*.log\nnode_modules/")
# Create a mock git command function
call_count = 0
async def mock_git_func(repo_path, command, git_func=None):
nonlocal call_count
if call_count == 0:
call_count += 1
return "file1.py\nfile2.js" # tracked files
else:
return "file3.log" # untracked files
# Mock Path.read_text to raise an exception when reading .yellhornignore
original_read_text = Path.read_text
def mock_read_text(self, *args, **kwargs):
if str(self).endswith(".yellhornignore"):
raise PermissionError("Permission denied")
# For other files, use the real read_text
return original_read_text(self, *args, **kwargs)
with patch.object(Path, "read_text", mock_read_text):
# Create test files
(tmp_path / "file1.py").write_text("# Test file 1")
(tmp_path / "file2.js").write_text("// Test file 2")
(tmp_path / "file3.log").write_text("log data")
# Call get_codebase_snapshot and expect it to raise an exception
with pytest.raises(PermissionError, match="Permission denied"):
file_paths, file_contents = await get_codebase_snapshot(
tmp_path, git_command_func=mock_git_func
)
@pytest.mark.asyncio
async def test_get_codebase_snapshot_directory_handling():
"""Test handling of directories in get_codebase_snapshot."""
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
# Create directory structure
os.makedirs(tmp_path / "src")
# Create a mock git command function
call_count = 0
async def mock_git_func(repo_path, command, git_func=None):
nonlocal call_count
if call_count == 0:
call_count += 1
return "file1.py" # tracked files
else:
return "src" # untracked files (directory)
# Create test file
(tmp_path / "file1.py").write_text("# Test file 1")
# Create a mock implementation for Path.is_dir
original_is_dir = Path.is_dir
def mock_is_dir(self):
# Check if the path ends with 'src'
if str(self).endswith("/src") or str(self).endswith("src"):
return True
# Otherwise call the original
return original_is_dir(self)
# Apply the patch
with patch.object(Path, "is_dir", mock_is_dir):
# Make sure .yellhornignore doesn't exist
with patch.object(Path, "exists", return_value=False):
# Call get_codebase_snapshot
file_paths, file_contents = await get_codebase_snapshot(
tmp_path, git_command_func=mock_git_func
)
# Verify directory handling
assert len(file_paths) == 2
assert "file1.py" in file_paths
assert "src" in file_paths
# Only the file should be in contents, directories are skipped
assert len(file_contents) == 1
assert "file1.py" in file_contents
assert "src" not in file_contents
@pytest.mark.asyncio
async def test_get_codebase_snapshot_binary_file_handling():
"""Test handling of binary files in get_codebase_snapshot."""
# Setup a temporary directory for testing
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
# Create a text file and a binary file
(tmp_path / "file1.py").write_text("# Test file 1")
# Create binary-like content for file2.jpg
with open(tmp_path / "file2.jpg", "wb") as f:
f.write(b"\x89PNG\r\n\x1a\n") # PNG file header
# Mock run_git_command to return our test files
with patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git:
# First call is for tracked files, second is for untracked files
mock_git.side_effect = [
"file1.py", # tracked files
"file2.jpg", # untracked files
]
# Make sure Path.is_dir returns False for our paths
with patch.object(Path, "is_dir", return_value=False):
# Make sure .yellhornignore doesn't exist
with patch.object(Path, "exists", return_value=False):
# Mock open to raise UnicodeDecodeError for binary file
original_open = open
def mock_open(filename, *args, **kwargs):
if str(filename).endswith("file2.jpg") and "r" in args[0]:
raise UnicodeDecodeError("utf-8", b"\x80", 0, 1, "invalid start byte")
return original_open(filename, *args, **kwargs)
# Apply the patch to builtins.open
with patch("builtins.open", mock_open):
# Call get_codebase_snapshot
file_paths, file_contents = await get_codebase_snapshot(
tmp_path, git_command_func=mock_git
)
# Verify binary file handling - binary files are filtered out
assert len(file_paths) == 1
assert "file1.py" in file_paths
assert "file2.jpg" not in file_paths # Binary files are filtered out
# Only text files should be in contents
assert len(file_contents) == 1
assert "file1.py" in file_contents
assert "file2.jpg" not in file_contents # Binary files are filtered out
# The text file content should be readable
assert "# Test file 1" in file_contents["file1.py"]
@pytest.mark.skip(reason="Whitelist functionality with ! prefix is not implemented")
@pytest.mark.asyncio
async def test_yellhornignore_whitelist_functionality():
"""Test whitelisting files with ! prefix in .yellhornignore file."""
# Create a temporary directory with a .yellhornignore file
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
# Create a .yellhornignore file with patterns and whitelist
yellhornignore_file = tmp_path / ".yellhornignore"
yellhornignore_file.write_text(
"# Comment line\n"
"*.log\n"
"node_modules/\n"
"dist/\n"
"# Whitelist specific files\n"
"!important.log\n"
"!node_modules/important-package.json\n"
)
# Mock run_git_command to return a list of files
with patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git:
# First call is for tracked files, second is for untracked files
mock_git.side_effect = [
# First call: tracked files
"\n".join(
[
"file1.py",
"file2.js",
"src/components/Button.js",
]
),
# Second call: untracked files
"\n".join(
[
"regular.log",
"important.log",
"node_modules/package.json",
"node_modules/important-package.json",
"dist/bundle.js",
]
),
]
# Create files for testing
(tmp_path / "file1.py").write_text("# Test file 1")
(tmp_path / "file2.js").write_text("// Test file 2")
os.makedirs(tmp_path / "node_modules")
os.makedirs(tmp_path / "dist")
os.makedirs(tmp_path / "src/components")
(tmp_path / "regular.log").write_text("regular log data")
(tmp_path / "important.log").write_text("important log data")
(tmp_path / "node_modules/package.json").write_text("{}")
(tmp_path / "node_modules/important-package.json").write_text('{"name": "important"}')
(tmp_path / "dist/bundle.js").write_text("/* bundle */")
(tmp_path / "src/components/Button.js").write_text("// Button component")
# Call get_codebase_snapshot
file_paths, file_contents = await get_codebase_snapshot(tmp_path)
# Verify that ignored files are not in results
assert "file1.py" in file_paths
assert "file2.js" in file_paths
assert "src/components/Button.js" in file_paths
# Verify that regular ignored files are not included
assert "regular.log" not in file_paths # Ignored by *.log
assert "node_modules/package.json" not in file_paths # Ignored by node_modules/
assert "dist/bundle.js" not in file_paths # Ignored by dist/
# Verify whitelisted files are included despite matching ignore patterns
assert "important.log" in file_paths # Whitelisted despite *.log
assert (
"node_modules/important-package.json" in file_paths
) # Whitelisted despite node_modules/
# Verify contents
assert "file1.py" in file_contents
assert "file2.js" in file_contents
assert "regular.log" not in file_contents
assert "important.log" in file_contents
assert "node_modules/package.json" not in file_contents
assert "node_modules/important-package.json" in file_contents
assert "dist/bundle.js" not in file_contents
# Helper class for creating async mocks
class AsyncMock(MagicMock):
"""MagicMock subclass that supports async with syntax and awaitable returns."""
async def __call__(self, *args, **kwargs):
return super().__call__(*args, **kwargs)
def __await__(self):
yield from []
return self().__await__()
@pytest.mark.skip(reason="Complex test that needs refactoring for proper mocking")
@pytest.mark.asyncio
async def test_curate_context():
"""Test the curate_context tool functionality with .yellhornignore integration."""
from yellhorn_mcp.server import curate_context
from yellhorn_mcp.utils.git_utils import YellhornMCPError
# Create a mock context with async log method
mock_ctx = MagicMock()
mock_ctx.log = AsyncMock()
mock_ctx.request_context.lifespan_context = {
"repo_path": Path("/fake/repo/path"),
"model": "gemini-2.5-pro",
"gemini_client": MagicMock(),
}
# Sample user task
user_task = "Implementing a new feature for data processing"
# Setup mock for get_codebase_snapshot - patch it where it's used
with patch("yellhorn_mcp.server.get_codebase_snapshot") as mock_snapshot:
# First test: No files found
mock_snapshot.return_value = ([], {})
# Test error handling when no files are found
with pytest.raises(YellhornMCPError, match="No files found in repository to analyze"):
await curate_context(mock_ctx, user_task)
# Second test: Without .yellhornignore file
# Create a list of files to analyze
mock_sample_files = [
"src/main.py",
"src/utils.py",
"src/data/processor.py",
"src/data/models.py",
"tests/test_main.py",
"tests/test_data/test_processor.py",
"docs/README.md",
"build/output.js",
"node_modules/package1/index.js",
]
mock_snapshot.return_value = (mock_sample_files, {})
# Mock Path.exists to return False for .yellhornignore
with patch("pathlib.Path.exists", return_value=False):
# Mock Path.write_text to avoid writing to the filesystem
with patch("pathlib.Path.write_text", MagicMock()):
# Mock the Gemini client response for directory selection
gemini_client_mock = mock_ctx.request_context.lifespan_context["gemini_client"]
gemini_client_mock.aio = MagicMock()
gemini_client_mock.aio.models = MagicMock()
# Configure the Gemini response mock
mock_response = MagicMock()
mock_response.text = """```context
src
src/data
tests
tests/test_data
```"""
# Set up both API patterns for backward compatibility
gemini_client_mock.aio.models.generate_content = AsyncMock(
return_value=mock_response
)
gemini_client_mock.aio.generate_content = AsyncMock(return_value=mock_response)
# Call curate_context
result = await curate_context(mock_ctx, user_task)
# Verify the result
assert "Successfully created .yellhorncontext file" in result
# We now match different directories, so just check for "important directories"
assert "important directories" in result
assert "recommended blacklist patterns" in result
# Verify that correct log messages were created
log_calls = [
call[1]["message"]
for call in mock_ctx.log.call_args_list
if isinstance(call[1].get("message"), str)
]
assert any("No .yellhornignore file found" in msg for msg in log_calls)
assert any(
"Processing complete, identified 4 important directories" in msg
for msg in log_calls
)
assert any(
"Using Git's tracking information - respecting .gitignore patterns" in msg
for msg in log_calls
)
# Test with .yellhornignore file
mock_ctx.reset_mock()
with patch("yellhorn_mcp.server.get_codebase_snapshot") as mock_snapshot:
# Create a list of files to analyze
mock_sample_files = [
"src/main.py",
"src/utils.py",
"src/data/processor.py",
"src/data/models.py",
"tests/test_main.py",
"tests/test_data/test_processor.py",
"docs/README.md",
"build/output.js",
"node_modules/package1/index.js",
]
mock_snapshot.return_value = (mock_sample_files, {})
# Setup mock Path.exists and Path.is_file for .yellhornignore
with (
patch("pathlib.Path.exists", return_value=True),
patch("pathlib.Path.is_file", return_value=True),
):
# Mock reading .yellhornignore file
with patch("builtins.open") as mock_open:
# Create a mock file-like object for .yellhornignore
mock_file = MagicMock()
# The file contains patterns to ignore node_modules and build directories
mock_file.__enter__.return_value.readlines.return_value = [
"# Ignore patterns\n",
"node_modules/\n",
"build/\n",
"*.log\n",
]
# Make the mock open return the mock file for .yellhornignore
# but use the normal open for other files
def side_effect(*args, **kwargs):
if str(args[0]).endswith(".yellhornignore"):
return mock_file
# For our output file (.yellhorncontext), create a mock
elif str(args[0]).endswith(".yellhorncontext"):
return MagicMock()
# For other files, use a mock as well
return MagicMock()
mock_open.side_effect = side_effect
mock_file.__enter__.return_value.__iter__.return_value = [
"# Ignore patterns\n",
"node_modules/\n",
"build/\n",
"*.log\n",
]
# Mock the Gemini client response for directory selection
gemini_client_mock = mock_ctx.request_context.lifespan_context["gemini_client"]
gemini_client_mock.aio = MagicMock()
gemini_client_mock.aio.models = MagicMock()
# Configure the Gemini response mock
mock_response = MagicMock()
mock_response.text = """```context
src
src/data
tests
tests/test_data
docs
```"""
gemini_client_mock.aio.models.generate_content = AsyncMock(
return_value=mock_response
)
# Call curate_context with .yellhornignore
result = await curate_context(mock_ctx, user_task)
# Verify the result
assert "Successfully created .yellhorncontext file" in result
assert "5 important directories" in result
assert "existing ignore patterns from .yellhornignore" in result
# Verify that correct log messages were created
log_calls = [
call[1]["message"]
for call in mock_ctx.log.call_args_list
if isinstance(call[1].get("message"), str)
]
assert any("Found .yellhornignore file" in msg for msg in log_calls)
assert any("Applied .yellhornignore filtering" in msg for msg in log_calls)
assert any("identified 5 important directories" in msg for msg in log_calls)
# Test with depth_limit parameter
mock_ctx.reset_mock()
with patch("yellhorn_mcp.server.get_codebase_snapshot") as mock_snapshot:
# Create a list of files with various depths
mock_sample_files = [
"root_file.py", # depth 1
"first_level/file.py", # depth 2
"first_level/second_level/file.py", # depth 3
"deep/path/to/file.py", # depth 4
]
mock_snapshot.return_value = (mock_sample_files, {})
# Mock Path.exists and Path.is_file for no .yellhornignore
with patch("pathlib.Path.exists", return_value=False):
# Mock Path.write_text to avoid writing to the filesystem
with patch("pathlib.Path.write_text", MagicMock()):
# Mock the Gemini client response for directory selection
gemini_client_mock = mock_ctx.request_context.lifespan_context["gemini_client"]
gemini_client_mock.aio = MagicMock()
gemini_client_mock.aio.models = MagicMock()
# Configure the Gemini response mock
mock_response = MagicMock()
mock_response.text = """```context
first_level
```"""
gemini_client_mock.aio.models.generate_content = AsyncMock(
return_value=mock_response
)
# Call curate_context with depth_limit=2
result = await curate_context(mock_ctx, user_task, depth_limit=2)
# Verify that depth filtering was applied
log_calls = [
call[1]["message"]
for call in mock_ctx.log.call_args_list
if isinstance(call[1].get("message"), str)
]
assert any("Applied depth limit 2" in msg for msg in log_calls)
assert any("filtered from" in msg for msg in log_calls)
# Test error handling during LLM call
mock_ctx.reset_mock()
with patch("yellhorn_mcp.server.get_codebase_snapshot") as mock_snapshot:
# Create a simple list of files
mock_snapshot.return_value = (["file1.py", "file2.py"], {})
# Mock Path.exists for no .yellhornignore
with patch("pathlib.Path.exists", return_value=False):
# Mock Path.write_text to avoid writing to the filesystem
with patch("pathlib.Path.write_text", MagicMock()):
# Mock the Gemini client to raise an exception
gemini_client_mock = mock_ctx.request_context.lifespan_context["gemini_client"]
gemini_client_mock.aio = MagicMock()
gemini_client_mock.aio.models = MagicMock()
gemini_client_mock.aio.models.generate_content = AsyncMock(
side_effect=Exception("API Error")
)
# Test we handle errors and use all directories as fallback
result = await curate_context(mock_ctx, user_task)
# Verify the result shows we included all directories as fallback
assert "Successfully created .yellhorncontext file" in result
# Verify that we logged the error and fallback behavior
log_calls = [
call[1]["message"]
for call in mock_ctx.log.call_args_list
if isinstance(call[1].get("message"), str)
]
assert any("Error processing chunk" in msg for msg in log_calls)
assert any(
"No important directories identified, including all directories" in msg
for msg in log_calls
)
# Test with OpenAI model
mock_ctx.reset_mock()
mock_ctx.request_context.lifespan_context = {
"repo_path": Path("/fake/repo/path"),
"model": "gpt-4o", # Use an OpenAI model
"openai_client": MagicMock(),
}
with patch("yellhorn_mcp.server.get_codebase_snapshot") as mock_snapshot:
# Create a simple list of files
mock_snapshot.return_value = (["src/file1.py", "src/file2.py"], {})
# Mock Path.exists for no .yellhornignore
with patch("pathlib.Path.exists", return_value=False):
# Mock Path.write_text to avoid writing to the filesystem
with patch("pathlib.Path.write_text", MagicMock()):
# Mock the OpenAI client response
openai_client_mock = mock_ctx.request_context.lifespan_context["openai_client"]
openai_client_mock.chat = MagicMock()
openai_client_mock.chat.completions = MagicMock()
# Create response object mock
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message = MagicMock()
mock_response.choices[
0
].message.content = """```context
src
```"""
# Mock the create function
openai_client_mock.chat.completions.create = AsyncMock(return_value=mock_response)
# Call curate_context with OpenAI model
result = await curate_context(mock_ctx, user_task)
# Verify the result shows successful creation
assert "Successfully created .yellhorncontext file" in result
# Verify that we made a call to OpenAI
log_calls = [
call[1]["message"]
for call in mock_ctx.log.call_args_list
if isinstance(call[1].get("message"), str)
]
assert any("gpt-4o" in msg for msg in log_calls)