Yellhorn MCP

Overview Schema Related Servers Score Discussions

yellhorn-mcp
tests

test_judgement_processor.py•30.1 KiB

"""Unit tests for judgement_processor module.""" import re from datetime import datetime, timezone from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest from yellhorn_mcp.llm import LLMManager from yellhorn_mcp.llm.base import ReasoningEffort from yellhorn_mcp.llm.usage import UsageMetadata from yellhorn_mcp.processors.judgement_processor import get_git_diff, process_judgement_async from yellhorn_mcp.utils.git_utils import YellhornMCPError class TestGetGitDiff: """Test suite for get_git_diff function.""" @pytest.mark.asyncio async def test_get_git_diff_full_mode(self, tmp_path): """Test git diff in full mode.""" repo_path = tmp_path / "repo" repo_path.mkdir() expected_diff = """diff --git a/file.py b/file.py index 123..456 100644 --- a/file.py +++ b/file.py @@ -1,3 +1,4 @@ def hello(): + print('world') pass""" with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.return_value = expected_diff result = await get_git_diff(repo_path, "main", "feature", "full") assert result == expected_diff mock_git.assert_called_once_with(repo_path, ["diff", "--patch", "main...feature"], None) @pytest.mark.asyncio async def test_get_git_diff_file_structure_mode(self, tmp_path): """Test git diff in file_structure mode.""" repo_path = tmp_path / "repo" repo_path.mkdir() with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.return_value = "file1.py\nfile2.js\nREADME.md" result = await get_git_diff(repo_path, "main", "feature", "file_structure") expected = "Changed files between main and feature:\nfile1.py\nfile2.js\nREADME.md" assert result == expected mock_git.assert_called_once_with( repo_path, ["diff", "--name-only", "main...feature"], None ) @pytest.mark.asyncio async def test_get_git_diff_none_mode(self, tmp_path): """Test git diff in none mode.""" repo_path = tmp_path / "repo" repo_path.mkdir() with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.return_value = "file1.py\nfile2.js" result = await get_git_diff(repo_path, "main", "feature", "none") expected = "Changed files between main and feature:\nfile1.py\nfile2.js" assert result == expected mock_git.assert_called_once_with( repo_path, ["diff", "--name-only", "main...feature"], None ) @pytest.mark.asyncio @pytest.mark.skip(reason="LSP diff test needs complex mocking of internal Git calls") async def test_get_git_diff_lsp_mode(self, tmp_path): """Test git diff in lsp mode.""" repo_path = tmp_path / "repo" repo_path.mkdir() changed_files = "file1.py\nfile2.py" lsp_diff_content = "Mock LSP diff content" with ( patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch("yellhorn_mcp.utils.lsp_utils.get_lsp_diff") as mock_lsp_diff, ): # Mock git command to return changed files list mock_git.return_value = changed_files # Mock LSP diff to return expected content mock_lsp_diff.return_value = lsp_diff_content result = await get_git_diff(repo_path, "main", "feature", "lsp") assert result == lsp_diff_content mock_git.assert_called_once_with( repo_path, ["diff", "--name-only", "main...feature"], None ) mock_lsp_diff.assert_called_once_with( repo_path, "main", "feature", ["file1.py", "file2.py"], None ) @pytest.mark.asyncio async def test_get_git_diff_empty_result(self, tmp_path): """Test git diff with empty result.""" repo_path = tmp_path / "repo" repo_path.mkdir() with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.return_value = "" result = await get_git_diff(repo_path, "main", "feature", "full") assert result == "" @pytest.mark.asyncio async def test_get_git_diff_file_structure_empty(self, tmp_path): """Test git diff in file_structure mode with no changes.""" repo_path = tmp_path / "repo" repo_path.mkdir() with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.return_value = "" result = await get_git_diff(repo_path, "main", "feature", "file_structure") expected = "Changed files between main and feature:" assert result == expected @pytest.mark.asyncio async def test_get_git_diff_lsp_mode_empty(self, tmp_path): """Test git diff in lsp mode with no changes.""" repo_path = tmp_path / "repo" repo_path.mkdir() with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.return_value = "" result = await get_git_diff(repo_path, "main", "feature", "lsp") assert result == "" @pytest.mark.asyncio async def test_get_git_diff_error(self, tmp_path): """Test git diff with error.""" repo_path = tmp_path / "repo" repo_path.mkdir() with patch("yellhorn_mcp.processors.judgement_processor.run_git_command") as mock_git: mock_git.side_effect = Exception("Git command failed") with pytest.raises(YellhornMCPError, match="Failed to generate git diff"): await get_git_diff(repo_path, "main", "feature", "full") class TestProcessJudgementAsync: """Test suite for process_judgement_async function.""" @pytest.mark.asyncio async def test_process_judgement_async_success_openai(self, tmp_path): """Test successful judgement processing with OpenAI.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = True mock_usage = UsageMetadata( {"prompt_tokens": 200, "completion_tokens": 100, "total_tokens": 300} ) mock_llm_manager.call_llm_with_usage.return_value = { "content": "## Judgement Summary\nAPPROVED - Implementation looks good", "usage_metadata": mock_usage, "reasoning_effort": ReasoningEffort.MEDIUM, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() workplan_content = "# Test Workplan\n\nImplement user authentication." diff_content = "diff --git a/auth.py b/auth.py\n+def authenticate():\n+ pass" with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch( "yellhorn_mcp.integrations.github_integration.update_github_issue" ) as mock_update, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, patch( "yellhorn_mcp.processors.judgement_processor.calculate_cost", return_value=0.456 ) as mock_cost, ): mock_context.return_value = ("Mock codebase context", ["auth.py"]) mock_update.return_value = None mock_git.return_value = "https://github.com/owner/repo" mock_comment.return_value = None await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-5", workplan_content=workplan_content, diff_content=diff_content, base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, reasoning_effort=ReasoningEffort.MEDIUM, ) # Verify LLM was called with correct prompt mock_llm_manager.call_llm_with_usage.assert_called_once() call_kwargs = mock_llm_manager.call_llm_with_usage.call_args.kwargs assert workplan_content in call_kwargs["prompt"] assert diff_content in call_kwargs["prompt"] assert "main" in call_kwargs["prompt"] assert "feature" in call_kwargs["prompt"] assert call_kwargs["reasoning_effort"] is ReasoningEffort.MEDIUM mock_cost.assert_called_once() assert mock_cost.call_args.args[3] == ReasoningEffort.MEDIUM.value # Note: GitHub integration tests are complex due to dependencies # Core LLM functionality is verified above @pytest.mark.asyncio async def test_process_judgement_async_success_gemini(self, tmp_path): """Test successful judgement processing with Gemini.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = False mock_usage = UsageMetadata( {"prompt_tokens": 200, "completion_tokens": 100, "total_tokens": 300} ) mock_llm_manager.call_llm_with_citations.return_value = { "content": "## Judgement Summary\nNEEDS_WORK - Missing tests", "usage_metadata": mock_usage, "grounding_metadata": None, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch( "yellhorn_mcp.integrations.github_integration.update_github_issue" ) as mock_update, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, ): mock_context.return_value = ("Mock codebase context", ["auth.py"]) mock_update.return_value = None mock_git.return_value = "https://github.com/owner/repo" mock_comment.return_value = None await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gemini-2.5-pro", workplan_content="# Test Workplan\n\nImplement authentication.", diff_content="diff --git a/auth.py b/auth.py\n+def authenticate():\n+ pass", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, ) # Verify LLM was called with citations mock_llm_manager.call_llm_with_citations.assert_called_once() # Note: GitHub integration tests are complex due to dependencies # Core LLM functionality is verified above @pytest.mark.asyncio async def test_process_judgement_async_create_new_subissue(self, tmp_path): """Test judgement processing when creating new sub-issue.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = True mock_usage = UsageMetadata( {"prompt_tokens": 200, "completion_tokens": 100, "total_tokens": 300} ) mock_llm_manager.call_llm_with_usage.return_value = { "content": "## Judgement Summary\nAPPROVED", "usage_metadata": mock_usage, "reasoning_effort": None, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch( "yellhorn_mcp.integrations.github_integration.create_judgement_subissue" ) as mock_create, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, ): mock_context.return_value = ("Mock codebase context", []) mock_create.return_value = "https://github.com/owner/repo/issues/125" mock_comment.return_value = None mock_git.return_value = "https://github.com/owner/repo" await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-4o", workplan_content="# Test Workplan", diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update=None, # No existing sub-issue debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, ) # Verify LLM was called mock_llm_manager.call_llm_with_usage.assert_called_once() # Note: Subissue creation involves complex GitHub integration # Core judgment logic is verified by LLM call above @pytest.mark.asyncio async def test_process_judgement_async_different_codebase_modes(self, tmp_path): """Test judgement processing with different codebase reasoning modes.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = True mock_usage = UsageMetadata( {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150} ) mock_llm_manager.call_llm_with_usage.return_value = { "content": "## Judgement Summary\nAPPROVED", "usage_metadata": mock_usage, "reasoning_effort": None, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() # Test different codebase reasoning modes test_modes = ["full", "lsp", "file_structure", "none"] for mode in test_modes: with ( patch( "yellhorn_mcp.formatters.context_fetcher.get_codebase_context" ) as mock_context, patch("yellhorn_mcp.utils.lsp_utils.get_lsp_diff") as mock_lsp_diff, patch( "yellhorn_mcp.integrations.github_integration.update_github_issue" ) as mock_update, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch( "yellhorn_mcp.integrations.github_integration.add_issue_comment" ) as mock_comment, ): mock_context.return_value = ("Mock codebase context", ["file.py"]) mock_lsp_diff.return_value = "# LSP diff content" mock_update.return_value = None mock_git.return_value = "https://github.com/owner/repo" mock_comment.return_value = None await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-4o", workplan_content="# Test Workplan", diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning=mode, disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, ) # Verify LLM was called for each mode (core functionality) mock_llm_manager.call_llm_with_usage.assert_called() # Note: judgement processor uses provided diff_content directly, # so codebase_reasoning mode doesn't affect function calls in this context @pytest.mark.asyncio async def test_process_judgement_async_with_debug(self, tmp_path): """Test judgement processing with debug enabled.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = True mock_usage = UsageMetadata( {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150} ) mock_llm_manager.call_llm_with_usage.return_value = { "content": "## Judgement Summary\nAPPROVED", "usage_metadata": mock_usage, "reasoning_effort": None, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch( "yellhorn_mcp.integrations.github_integration.update_github_issue" ) as mock_update, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, ): mock_context.return_value = ("Mock codebase context", []) mock_update.return_value = None mock_git.return_value = "https://github.com/owner/repo" mock_comment.return_value = None await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-4o", workplan_content="# Test Workplan", diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=True, # Enable debug codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, ) # Verify LLM was called mock_llm_manager.call_llm_with_usage.assert_called_once() # Note: Debug comment functionality requires complex GitHub integration # Core judgment logic with debug=True is verified by LLM call above @pytest.mark.asyncio async def test_process_judgement_async_empty_response(self, tmp_path): """Test judgement processing with empty LLM response.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager with empty response mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = True mock_usage = UsageMetadata( {"prompt_tokens": 100, "completion_tokens": 0, "total_tokens": 100} ) mock_llm_manager.call_llm_with_usage.return_value = { "content": "", # Empty content "usage_metadata": mock_usage, "reasoning_effort": None, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, ): mock_context.return_value = ("Mock codebase context", []) mock_comment.return_value = None with pytest.raises(YellhornMCPError, match="Failed to generate judgement"): await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-4o", workplan_content="# Test Workplan", diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, ) @pytest.mark.asyncio async def test_process_judgement_async_error_handling(self, tmp_path): """Test judgement processing error handling.""" repo_path = tmp_path / "repo" repo_path.mkdir() mock_llm_manager = MagicMock(spec=LLMManager) mock_ctx = MagicMock() mock_ctx.log = AsyncMock() with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, ): mock_context.side_effect = Exception("Codebase error") mock_comment.return_value = None with pytest.raises(YellhornMCPError, match="Error processing judgement"): await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-4o", workplan_content="# Test Workplan", diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, ) # Verify error was logged mock_ctx.log.assert_called() # Note: Error comment functionality requires complex GitHub integration # Error handling logic is verified by log call above @pytest.mark.asyncio async def test_process_judgement_async_with_grounding(self, tmp_path): """Test judgement processing with Gemini grounding metadata.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager with grounding mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = False mock_usage = UsageMetadata( {"prompt_tokens": 200, "completion_tokens": 100, "total_tokens": 300} ) # Mock grounding metadata mock_grounding = MagicMock() mock_grounding.grounding_chunks = ["search result 1", "search result 2"] mock_llm_manager.call_llm_with_citations.return_value = { "content": "## Judgement Summary\nAPPROVED with search context", "usage_metadata": mock_usage, "grounding_metadata": mock_grounding, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch( "yellhorn_mcp.integrations.github_integration.update_github_issue" ) as mock_update, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, patch( "yellhorn_mcp.utils.search_grounding_utils.add_citations_from_metadata" ) as mock_citations, ): mock_context.return_value = ("Mock codebase context", []) mock_update.return_value = None mock_git.return_value = "https://github.com/owner/repo" mock_comment.return_value = None mock_citations.return_value = "APPROVED with search context and citations" await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gemini-2.5-pro", workplan_content="# Test Workplan", diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"start_time": datetime.now(timezone.utc)}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, ) # Verify citations were processed mock_citations.assert_called_once() # Verify completion metadata includes search results comment_calls = mock_comment.call_args_list completion_comment_found = any( "search_results_used" in str(call) for call in comment_calls ) # Note: This might not be directly visible in the call args, # but the metadata should be processed @pytest.mark.asyncio async def test_process_judgement_async_url_extraction(self, tmp_path): """Test URL extraction from workplan for references.""" repo_path = tmp_path / "repo" repo_path.mkdir() # Mock LLM Manager mock_llm_manager = MagicMock(spec=LLMManager) mock_llm_manager._is_openai_model.return_value = True mock_usage = UsageMetadata( {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150} ) mock_llm_manager.call_llm_with_usage.return_value = { "content": "## Judgement Summary\nAPPROVED", "usage_metadata": mock_usage, "reasoning_effort": None, } mock_ctx = MagicMock() mock_ctx.log = AsyncMock() # Workplan with URLs workplan_content = """# Test Workplan Implement authentication based on: - https://example.com/auth-guide - https://docs.example.com/security ## Implementation Details Follow the patterns from https://github.com/example/auth-library """ with ( patch("yellhorn_mcp.formatters.context_fetcher.get_codebase_context") as mock_context, patch( "yellhorn_mcp.integrations.github_integration.update_github_issue" ) as mock_update, patch("yellhorn_mcp.utils.git_utils.run_git_command") as mock_git, patch("yellhorn_mcp.integrations.github_integration.add_issue_comment") as mock_comment, ): mock_context.return_value = ("Mock codebase context", []) mock_update.return_value = None mock_git.return_value = "https://github.com/owner/repo" mock_comment.return_value = None await process_judgement_async( repo_path=repo_path, llm_manager=mock_llm_manager, model="gpt-4o", workplan_content=workplan_content, diff_content="diff content", base_ref="main", head_ref="feature", base_commit_hash="abc123", head_commit_hash="def456", parent_workplan_issue_number="123", subissue_to_update="124", debug=False, codebase_reasoning="full", disable_search_grounding=False, _meta={"submitted_urls": ["https://example.com/auth-guide"]}, ctx=mock_ctx, github_command_func=AsyncMock( return_value="https://github.com/owner/repo/issues/125" ), git_command_func=mock_git, ) # Verify LLM was called with proper content mock_llm_manager.call_llm_with_usage.assert_called_once() # Note: GitHub integration complex to test due to dependencies # Core URL extraction and processing logic verified by LLM call

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/msnidal/yellhorn-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_judgement_processor.py•30.1 KiB