DM20 Protocol

Overview Schema Related Servers Score Discussions

dm20-protocol
tests
claudmaster

test_dual_agent_flow.py•21.3 KiB

""" Tests for the dual-agent response architecture (Narrator + Arbiter). These tests verify: 1. Orchestrator routes to both Narrator and Arbiter for combat/action intents 2. Agents execute in parallel (not sequentially) 3. Response aggregation merges narrative + mechanical results 4. Narrative hooks from Arbiter are appended to the narrative 5. State changes from Arbiter appear in the aggregated response 6. Partial failures (one agent fails) are handled gracefully """ import asyncio import json import time from unittest.mock import AsyncMock, MagicMock, patch import pytest from dm20_protocol.claudmaster.base import Agent, AgentResponse, AgentRole from dm20_protocol.claudmaster.config import ClaudmasterConfig from dm20_protocol.claudmaster.orchestrator import ( IntentType, Orchestrator, OrchestratorResponse, AgentTimeoutError, AgentExecutionError, ) from dm20_protocol.claudmaster.agents.narrator import NarratorAgent, NarrativeStyle from dm20_protocol.claudmaster.agents.arbiter import ArbiterAgent, MechanicalResolution from dm20_protocol.claudmaster.llm_client import MockLLMClient from dm20_protocol.models import Campaign, GameState # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture def campaign(): """Create a minimal campaign for testing.""" return Campaign( name="Test Campaign", description="A test campaign", game_state=GameState(campaign_name="Test Campaign"), ) @pytest.fixture def config(): """Create a ClaudmasterConfig with fast timeout for tests.""" return ClaudmasterConfig(agent_timeout=5.0) @pytest.fixture def narrator_llm(): """Mock LLM for Narrator (returns narrative text).""" return MockLLMClient( default_response="The goblin snarls as your blade arcs through the air." ) @pytest.fixture def arbiter_llm(): """Mock LLM for Arbiter (returns JSON mechanical resolution).""" resolution = { "success": True, "dice_rolls": [ { "description": "Attack roll", "notation": "1d20+5", "result": 18, "success": True, "dc": 13, } ], "state_changes": [ { "target": "Goblin", "change_type": "hp", "description": "Goblin takes 8 slashing damage", "value": -8, } ], "rules_applied": ["PHB p.194: Attack action"], "narrative_hooks": ["Your blade strikes true, cutting deep into the goblin's shoulder."], "reasoning": "Melee attack roll 18 vs AC 13 hits. Damage: 1d8+3 = 8.", } return MockLLMClient(default_response=json.dumps(resolution)) @pytest.fixture def orchestrator_with_dual_agents(campaign, config, narrator_llm, arbiter_llm): """Create an orchestrator with both Narrator and Arbiter registered.""" orch = Orchestrator(campaign=campaign, config=config) narrator = NarratorAgent(llm=narrator_llm, style=NarrativeStyle.DESCRIPTIVE) arbiter = ArbiterAgent(llm=arbiter_llm, campaign=campaign) orch.register_agent("narrator", narrator) orch.register_agent("arbiter", arbiter) return orch # --------------------------------------------------------------------------- # Test: Agent Routing # --------------------------------------------------------------------------- class TestDualAgentRouting: """Test that the orchestrator routes to both agents for appropriate intents.""" def test_combat_routes_to_narrator_and_arbiter(self, orchestrator_with_dual_agents): """Combat intent should route to both Narrator and Arbiter.""" orch = orchestrator_with_dual_agents intent = orch.classify_intent("I attack the goblin with my sword") agents = orch._get_agents_for_intent(intent) roles = {a.role for a in agents} assert AgentRole.NARRATOR in roles assert AgentRole.ARBITER in roles def test_action_routes_to_narrator_and_arbiter(self, orchestrator_with_dual_agents): """General action intent should route to Narrator and Arbiter.""" orch = orchestrator_with_dual_agents intent = orch.classify_intent("I try to climb the wall") agents = orch._get_agents_for_intent(intent) roles = {a.role for a in agents} assert AgentRole.NARRATOR in roles assert AgentRole.ARBITER in roles def test_exploration_routes_to_narrator_and_arbiter(self, orchestrator_with_dual_agents): """Exploration intent should include Arbiter for skill checks.""" orch = orchestrator_with_dual_agents intent = orch.classify_intent("I search the room for traps") agents = orch._get_agents_for_intent(intent) roles = {a.role for a in agents} assert AgentRole.NARRATOR in roles assert AgentRole.ARBITER in roles def test_question_routes_to_narrator_only(self, orchestrator_with_dual_agents): """Question intent should not include Arbiter (no mechanics needed).""" orch = orchestrator_with_dual_agents intent = orch.classify_intent("what is the history of this castle?") agents = orch._get_agents_for_intent(intent) roles = {a.role for a in agents} assert AgentRole.NARRATOR in roles assert AgentRole.ARBITER not in roles def test_system_routes_to_narrator_only(self, orchestrator_with_dual_agents): """System commands should not include Arbiter.""" orch = orchestrator_with_dual_agents intent = orch.classify_intent("show my inventory") agents = orch._get_agents_for_intent(intent) roles = {a.role for a in agents} assert AgentRole.NARRATOR in roles # System routes to Archivist, not Arbiter assert AgentRole.ARBITER not in roles def test_roleplay_routes_to_narrator_and_arbiter(self, orchestrator_with_dual_agents): """Roleplay may need Arbiter for persuasion/deception checks.""" orch = orchestrator_with_dual_agents intent = orch.classify_intent("I try to persuade the guard to let us through") agents = orch._get_agents_for_intent(intent) roles = {a.role for a in agents} assert AgentRole.NARRATOR in roles assert AgentRole.ARBITER in roles # --------------------------------------------------------------------------- # Test: Parallel Execution # --------------------------------------------------------------------------- class TestParallelExecution: """Test that agents run in parallel, not sequentially.""" @pytest.mark.anyio async def test_agents_run_concurrently(self, campaign, config): """Both agents should start ~simultaneously, not wait for each other.""" execution_log = [] class SlowNarrator(Agent): def __init__(self): super().__init__(name="narrator", role=AgentRole.NARRATOR) async def reason(self, context): return "narrate" async def act(self, reasoning): execution_log.append(("narrator_start", time.monotonic())) await asyncio.sleep(0.1) execution_log.append(("narrator_end", time.monotonic())) return "The scene unfolds..." async def observe(self, result): return {"word_count": 3} class SlowArbiter(Agent): def __init__(self): super().__init__(name="arbiter", role=AgentRole.ARBITER) async def reason(self, context): return "resolve" async def act(self, reasoning): execution_log.append(("arbiter_start", time.monotonic())) await asyncio.sleep(0.1) execution_log.append(("arbiter_end", time.monotonic())) return MechanicalResolution( success=True, reasoning="mock", narrative_hooks=["Hit!"] ) async def observe(self, result): return {"success": True, "narrative_hooks": ["Hit!"]} orch = Orchestrator(campaign=campaign, config=config) orch.register_agent("narrator", SlowNarrator()) orch.register_agent("arbiter", SlowArbiter()) orch.start_session() await orch.process_player_input("I attack the goblin") # Both agents should have started before either finished starts = [t for name, t in execution_log if name.endswith("_start")] ends = [t for name, t in execution_log if name.endswith("_end")] assert len(starts) == 2 assert len(ends) == 2 # The gap between the two start times should be small (< 50ms) # If sequential, it would be ~100ms apart start_gap = abs(starts[1] - starts[0]) assert start_gap < 0.05, f"Agents started {start_gap:.3f}s apart — not parallel!" @pytest.mark.anyio async def test_total_time_is_max_not_sum(self, campaign, config): """Total execution time should be ~max(agent_times), not sum.""" class TimedNarrator(Agent): def __init__(self): super().__init__(name="narrator", role=AgentRole.NARRATOR) async def reason(self, context): return "narrate" async def act(self, reasoning): await asyncio.sleep(0.1) return "Narrative text" async def observe(self, result): return {} class TimedArbiter(Agent): def __init__(self): super().__init__(name="arbiter", role=AgentRole.ARBITER) async def reason(self, context): return "resolve" async def act(self, reasoning): await asyncio.sleep(0.15) return MechanicalResolution( success=True, reasoning="mock", narrative_hooks=[] ) async def observe(self, result): return {"success": True} orch = Orchestrator(campaign=campaign, config=config) orch.register_agent("narrator", TimedNarrator()) orch.register_agent("arbiter", TimedArbiter()) orch.start_session() start = time.monotonic() await orch.process_player_input("I attack") elapsed = time.monotonic() - start # If parallel: elapsed ~ 0.15s. If sequential: elapsed ~ 0.25s assert elapsed < 0.22, f"Took {elapsed:.3f}s — agents likely ran sequentially" # --------------------------------------------------------------------------- # Test: Response Aggregation # --------------------------------------------------------------------------- class TestResponseAggregation: """Test that dual-agent responses are merged correctly.""" @pytest.mark.anyio async def test_narrative_hooks_appended(self, orchestrator_with_dual_agents): """Arbiter narrative hooks should appear in the final narrative.""" orch = orchestrator_with_dual_agents orch.start_session() response = await orch.process_player_input("I attack the goblin with my sword") assert "goblin snarls" in response.narrative.lower() or "blade arcs" in response.narrative.lower() assert "strikes true" in response.narrative.lower() @pytest.mark.anyio async def test_state_changes_from_arbiter(self, orchestrator_with_dual_agents): """Arbiter state changes should appear in the aggregated response.""" orch = orchestrator_with_dual_agents orch.start_session() response = await orch.process_player_input("I attack the goblin with my sword") assert len(response.state_changes) > 0 goblin_change = response.state_changes[0] assert goblin_change["target"] == "Goblin" assert goblin_change["type"] == "hp" assert goblin_change["value"] == -8 @pytest.mark.anyio async def test_metadata_includes_arbiter_info(self, orchestrator_with_dual_agents): """Response metadata should include Arbiter mechanical summary.""" orch = orchestrator_with_dual_agents orch.start_session() response = await orch.process_player_input("I attack the goblin") assert response.metadata["has_mechanical_resolution"] is True assert response.metadata["arbiter_success"] is True assert response.metadata["dice_roll_count"] == 1 @pytest.mark.anyio async def test_both_agent_responses_preserved(self, orchestrator_with_dual_agents): """Both raw agent responses should be preserved in the response.""" orch = orchestrator_with_dual_agents orch.start_session() response = await orch.process_player_input("I attack the goblin") roles = {r.agent_role for r in response.agent_responses} assert AgentRole.NARRATOR in roles assert AgentRole.ARBITER in roles @pytest.mark.anyio async def test_no_arbiter_means_no_mechanical_metadata(self, campaign, config, narrator_llm): """When no Arbiter is registered, metadata should reflect that.""" orch = Orchestrator(campaign=campaign, config=config) narrator = NarratorAgent(llm=narrator_llm, style=NarrativeStyle.DESCRIPTIVE) orch.register_agent("narrator", narrator) orch.start_session() response = await orch.process_player_input("what is that sound?") assert response.metadata["has_mechanical_resolution"] is False # --------------------------------------------------------------------------- # Test: Partial Failures # --------------------------------------------------------------------------- class TestPartialFailures: """Test graceful handling when one agent fails but the other succeeds.""" @pytest.mark.anyio async def test_arbiter_failure_still_returns_narrative(self, campaign, config, narrator_llm): """If Arbiter fails, Narrator response should still be returned.""" class FailingArbiter(Agent): def __init__(self): super().__init__(name="arbiter", role=AgentRole.ARBITER) async def reason(self, context): return "resolve" async def act(self, reasoning): raise RuntimeError("LLM API error") async def observe(self, result): return {} orch = Orchestrator(campaign=campaign, config=config) narrator = NarratorAgent(llm=narrator_llm, style=NarrativeStyle.DESCRIPTIVE) orch.register_agent("narrator", narrator) orch.register_agent("arbiter", FailingArbiter()) orch.start_session() response = await orch.process_player_input("I attack the goblin") # Narrator should still succeed assert len(response.narrative) > 0 assert "goblin snarls" in response.narrative.lower() or "blade arcs" in response.narrative.lower() @pytest.mark.anyio async def test_narrator_failure_still_returns_arbiter_fallback(self, campaign, config, arbiter_llm): """If Narrator fails, Arbiter results should still be accessible.""" class FailingNarrator(Agent): def __init__(self): super().__init__(name="narrator", role=AgentRole.NARRATOR) async def reason(self, context): return "narrate" async def act(self, reasoning): raise RuntimeError("LLM connection timeout") async def observe(self, result): return {} orch = Orchestrator(campaign=campaign, config=config) arbiter = ArbiterAgent(llm=arbiter_llm, campaign=campaign) orch.register_agent("narrator", FailingNarrator()) orch.register_agent("arbiter", arbiter) orch.start_session() response = await orch.process_player_input("I attack the goblin") # Arbiter should have succeeded — its response should be in agent_responses assert len(response.agent_responses) >= 1 arbiter_resp = next( (r for r in response.agent_responses if r.agent_role == AgentRole.ARBITER), None, ) assert arbiter_resp is not None @pytest.mark.anyio async def test_all_agents_fail_raises_error(self, campaign, config): """If ALL agents fail, an error should be raised.""" class FailingAgent(Agent): def __init__(self, name, role): super().__init__(name=name, role=role) async def reason(self, context): return "fail" async def act(self, reasoning): raise RuntimeError("Everything is broken") async def observe(self, result): return {} orch = Orchestrator(campaign=campaign, config=config) orch.register_agent("narrator", FailingAgent("narrator", AgentRole.NARRATOR)) orch.register_agent("arbiter", FailingAgent("arbiter", AgentRole.ARBITER)) orch.start_session() with pytest.raises(AgentExecutionError): await orch.process_player_input("I attack the goblin") @pytest.mark.anyio async def test_timeout_one_agent_other_succeeds(self, campaign, narrator_llm): """If one agent times out but other succeeds, response is still returned.""" config = ClaudmasterConfig(agent_timeout=0.2) class SlowArbiter(Agent): def __init__(self): super().__init__(name="arbiter", role=AgentRole.ARBITER) async def reason(self, context): return "resolve" async def act(self, reasoning): await asyncio.sleep(1.0) # Will timeout return "Never reached" async def observe(self, result): return {} orch = Orchestrator(campaign=campaign, config=config) narrator = NarratorAgent(llm=narrator_llm, style=NarrativeStyle.DESCRIPTIVE) orch.register_agent("narrator", narrator) orch.register_agent("arbiter", SlowArbiter()) orch.start_session() response = await orch.process_player_input("I attack the goblin") # Narrator should succeed despite Arbiter timeout assert len(response.narrative) > 0 # --------------------------------------------------------------------------- # Test: Session Tools LLM Client Creation # --------------------------------------------------------------------------- class TestSessionToolsIntegration: """Test that SessionManager creates and wires LLM clients correctly.""" def test_create_llm_clients_returns_mock_fallback(self): """When Anthropic SDK is unavailable, should return MockLLMClient.""" from dm20_protocol.claudmaster.tools.session_tools import SessionManager config = ClaudmasterConfig() manager = SessionManager() # Force mock by patching the import check with patch( "dm20_protocol.claudmaster.tools.session_tools.AnthropicLLMClient", side_effect=Exception("No API key"), ): narrator_llm, arbiter_llm = manager._create_llm_clients(config) assert isinstance(narrator_llm, MockLLMClient) assert isinstance(arbiter_llm, MockLLMClient) def test_mock_arbiter_llm_returns_valid_json(self): """The mock Arbiter LLM should return parseable JSON.""" from dm20_protocol.claudmaster.tools.session_tools import SessionManager config = ClaudmasterConfig() manager = SessionManager() with patch( "dm20_protocol.claudmaster.tools.session_tools.AnthropicLLMClient", side_effect=Exception("No API key"), ): _, arbiter_llm = manager._create_llm_clients(config) # Verify the default response is valid JSON data = json.loads(arbiter_llm.default_response) assert "success" in data assert "reasoning" in data assert isinstance(data["narrative_hooks"], list) # --------------------------------------------------------------------------- # Test: End-to-End Action Processing # --------------------------------------------------------------------------- class TestEndToEndActionProcessing: """Full pipeline test: player_action -> dual response -> structured output.""" @pytest.mark.anyio async def test_combat_action_full_pipeline(self, orchestrator_with_dual_agents): """A combat action should produce narrative + mechanics.""" orch = orchestrator_with_dual_agents orch.start_session() response = await orch.process_player_input("I swing my sword at the goblin") # Narrative should exist assert len(response.narrative) > 10 # Metadata should show both agents participated assert "narrator" in response.metadata["agents_used"] assert "arbiter" in response.metadata["agents_used"] # Mechanical resolution should be present assert response.metadata["has_mechanical_resolution"] is True @pytest.mark.anyio async def test_session_turn_tracking(self, orchestrator_with_dual_agents): """Turn counter should increment with each processed input.""" orch = orchestrator_with_dual_agents session = orch.start_session() assert session.turn_count == 0 await orch.process_player_input("I attack the goblin") # process_player_input doesn't increment turn, but adds to history assert len(session.conversation_history) == 2 # user + assistant await orch.process_player_input("I cast fireball") assert len(session.conversation_history) == 4 # 2 more messages

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_dual_agent_flow.py•21.3 KiB