DM20 Protocol

Overview Schema Related Servers Score Discussions

dm20-protocol
tests
claudmaster

test_stress_session.py•21.1 KiB

""" Stress test for session stability over 20+ sequential turns. This test validates that the Claudmaster system maintains consistency and performance across extended gameplay sessions by: 1. Simulating 20+ sequential player actions with varied intent types 2. Validating game state consistency after every turn 3. Verifying conversation_history grows correctly 4. Checking that response generation doesn't degrade over time 5. Validating context window management keeps conversation_history bounded 6. Testing session save/load after extended play All tests use MockLLMClient for deterministic, API-free execution. """ import asyncio import json import time from pathlib import Path import pytest from dm20_protocol.claudmaster.base import AgentRole from dm20_protocol.claudmaster.config import ClaudmasterConfig from dm20_protocol.claudmaster.llm_client import MockLLMClient from dm20_protocol.claudmaster.orchestrator import IntentType, Orchestrator from dm20_protocol.claudmaster.agents.arbiter import ArbiterAgent from dm20_protocol.claudmaster.agents.narrator import NarratorAgent, NarrativeStyle from dm20_protocol.claudmaster.persistence.session_serializer import SessionSerializer from dm20_protocol.models import Campaign, GameState # ============================================================================ # Fixtures # ============================================================================ @pytest.fixture def campaign(): """Minimal campaign for stress testing.""" return Campaign( name="Stress Test Campaign", description="A campaign designed for long-duration stress testing", game_state=GameState(campaign_name="Stress Test Campaign"), ) @pytest.fixture def config(): """ClaudmasterConfig optimized for stress testing.""" return ClaudmasterConfig(agent_timeout=5.0) @pytest.fixture def narrator_llm(): """Mock LLM for Narrator with rotating responses to simulate variety.""" return MockLLMClient( default_response=( "The dungeon stretches before you, shadows dancing in the torchlight. " "Your footsteps echo through the ancient corridors." ) ) @pytest.fixture def arbiter_llm(): """Mock LLM for Arbiter with generic successful resolution.""" resolution = { "success": True, "dice_rolls": [ { "description": "Action resolution", "notation": "1d20+5", "result": 18, "success": True, "dc": 15, } ], "state_changes": [], "rules_applied": ["PHB p.178: Standard action resolution"], "narrative_hooks": ["Your action succeeds without complication."], "reasoning": "Standard action check 18 vs DC 15 succeeds.", } return MockLLMClient(default_response=json.dumps(resolution)) def _make_orchestrator(campaign, config, narrator_llm, arbiter_llm): """Helper: create orchestrator with Narrator + Arbiter.""" orch = Orchestrator(campaign=campaign, config=config) narrator = NarratorAgent(llm=narrator_llm, style=NarrativeStyle.DESCRIPTIVE) arbiter = ArbiterAgent(llm=arbiter_llm, campaign=campaign) orch.register_agent("narrator", narrator) orch.register_agent("arbiter", arbiter) return orch # ============================================================================ # Test Player Action Sequences # ============================================================================ class TestStressSession: """20+ turn stress test for session stability.""" # Realistic 20+ turn gameplay sequence covering all major intent types PLAYER_ACTIONS = [ # Exploration phase (turns 1-5) ("I carefully enter the dark cave, looking for any signs of danger", IntentType.EXPLORATION), ("I search the walls for hidden doors or passages", IntentType.EXPLORATION), ("What do I know about this region's history?", IntentType.QUESTION), ("I examine the ancient runes carved into the stone", IntentType.EXPLORATION), ("I listen carefully for any sounds deeper in the cave", IntentType.EXPLORATION), # NPC encounter and roleplay (turns 6-9) ("I approach the figure cautiously and introduce myself", IntentType.ROLEPLAY), ("I try to persuade the merchant to share information about the area", IntentType.ROLEPLAY), ("What does this NPC look like?", IntentType.QUESTION), ("I ask about recent troubles in the region", IntentType.ROLEPLAY), # Combat sequence (turns 10-15) ("I draw my sword and attack the goblin", IntentType.COMBAT), ("I cast fireball at the group of goblins", IntentType.COMBAT), ("I dodge the incoming attack and reposition", IntentType.COMBAT), ("I cast healing word on myself", IntentType.COMBAT), ("I make an opportunity attack as the enemy retreats", IntentType.COMBAT), ("I strike the wounded goblin with my sword", IntentType.COMBAT), # Post-combat exploration (turns 16-20) ("I search the bodies for loot and clues", IntentType.EXPLORATION), ("I examine the room where the goblins were camping", IntentType.EXPLORATION), ("What exits are available from this chamber?", IntentType.QUESTION), ("I take a short rest to recover", IntentType.ACTION), ("I proceed deeper into the dungeon, staying alert", IntentType.EXPLORATION), # Extended play (turns 21-25) ("I investigate the mysterious glowing orb", IntentType.EXPLORATION), ("I attempt to decipher the magical symbols", IntentType.ACTION), ("Can I recall any lore about artifacts like this?", IntentType.QUESTION), ("I carefully touch the orb with my staff", IntentType.ACTION), ("I back away slowly and prepare for whatever happens next", IntentType.ACTION), ] @pytest.mark.anyio async def test_20_turn_session_stability( self, campaign, config, narrator_llm, arbiter_llm ): """Full 20+ turn session without errors or state corruption.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Execute all 25 turns for turn_idx, (player_input, expected_intent) in enumerate(self.PLAYER_ACTIONS, 1): response = await orch.process_player_input(player_input) # Verify conversation history grows correctly (2 messages per turn) expected_history_len = turn_idx * 2 assert len(session.conversation_history) == expected_history_len, ( f"Turn {turn_idx}: expected {expected_history_len} messages, " f"got {len(session.conversation_history)}" ) # Verify last user message matches input assert session.conversation_history[-2]["role"] == "user" assert session.conversation_history[-2]["content"] == player_input # Verify last assistant message is present and non-empty assert session.conversation_history[-1]["role"] == "assistant" assert len(session.conversation_history[-1]["content"]) > 0 # Verify response structure is valid assert response.narrative, f"Turn {turn_idx}: narrative is empty" assert isinstance(response.metadata, dict) assert isinstance(response.state_changes, list) assert isinstance(response.agent_responses, list) # Verify agents are tracked correctly assert "narrator" in session.active_agents assert "arbiter" in session.active_agents # Final validation: 25 turns = 50 messages assert len(session.conversation_history) == 50 @pytest.mark.anyio async def test_conversation_history_bounded( self, campaign, config, narrator_llm, arbiter_llm ): """Context stays manageable as turns accumulate.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Run 25 turns for player_input, _ in self.PLAYER_ACTIONS: await orch.process_player_input(player_input) # Full conversation history should have all 50 messages assert len(session.conversation_history) == 50 # get_context() should respect max_messages limit context_10 = session.get_context(max_messages=10) assert len(context_10["recent_messages"]) == 10 assert context_10["recent_messages"][0] == session.conversation_history[-10] assert context_10["recent_messages"][-1] == session.conversation_history[-1] # get_context() with max_messages=20 (default) context_20 = session.get_context(max_messages=20) assert len(context_20["recent_messages"]) == 20 # Verify context includes other session metadata assert context_20["session_id"] == session.session_id assert context_20["campaign_id"] == session.campaign_id assert "agent_statuses" in context_20 @pytest.mark.anyio async def test_no_latency_degradation( self, campaign, config, narrator_llm, arbiter_llm ): """Response times don't grow with turn count (mock environment).""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Track timing for each turn turn_times = [] for player_input, _ in self.PLAYER_ACTIONS: start_time = time.perf_counter() await orch.process_player_input(player_input) elapsed = time.perf_counter() - start_time turn_times.append(elapsed) # In mock environment, response time should be consistently fast # Check that later turns aren't significantly slower than early turns first_5_avg = sum(turn_times[:5]) / 5 last_5_avg = sum(turn_times[-5:]) / 5 # Allow for 3x degradation maximum (should be minimal in mock env) assert last_5_avg < first_5_avg * 3, ( f"Performance degradation detected: " f"first 5 turns avg={first_5_avg:.4f}s, " f"last 5 turns avg={last_5_avg:.4f}s" ) # No single turn should take unreasonably long (timeout is 5s per agent) max_acceptable_time = 15.0 # generous for 2 agents @ 5s timeout each for turn_idx, elapsed in enumerate(turn_times, 1): assert elapsed < max_acceptable_time, ( f"Turn {turn_idx} took {elapsed:.2f}s (max {max_acceptable_time}s)" ) @pytest.mark.anyio async def test_session_save_load_after_stress( self, campaign, config, narrator_llm, arbiter_llm, tmp_path ): """Session can be saved and loaded after 20+ turns.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() session_id = session.session_id # Play 25 turns for player_input, _ in self.PLAYER_ACTIONS: await orch.process_player_input(player_input) # Verify final state before save (turn_count stays 0 with process_player_input) assert len(session.conversation_history) == 50 # Save session serializer = SessionSerializer(tmp_path) session_data = { "session_id": session_id, "campaign_id": session.campaign_id, "started_at": session.started_at.isoformat(), "turn_count": session.turn_count, "conversation_history": session.conversation_history, "config": session.config.model_dump(), "active_agents": dict(session.active_agents), "metadata": session.metadata, } save_path = serializer.save_session(session_data, mode="pause") assert save_path.exists() # Load session loaded = serializer.load_session(session_id) assert loaded is not None assert loaded["session_id"] == session_id assert len(loaded["conversation_history"]) == 50 # Verify loaded conversation history matches exactly assert loaded["conversation_history"] == session.conversation_history # Verify agent status persisted assert loaded["active_agents"] == dict(session.active_agents) @pytest.mark.anyio async def test_agent_status_tracking_across_turns( self, campaign, config, narrator_llm, arbiter_llm ): """Agent statuses remain consistent across all turns.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Initially all idle assert session.active_agents["narrator"] == "idle" assert session.active_agents["arbiter"] == "idle" # Process 10 turns and check status after each for turn_idx, (player_input, _) in enumerate(self.PLAYER_ACTIONS[:10], 1): await orch.process_player_input(player_input) # After each turn, agents should be "completed" assert session.active_agents["narrator"] == "completed", ( f"Narrator status incorrect after processing {turn_idx} inputs" ) assert session.active_agents["arbiter"] == "completed", ( f"Arbiter status incorrect after processing {turn_idx} inputs" ) @pytest.mark.anyio async def test_mixed_intent_routing( self, campaign, config, narrator_llm, arbiter_llm ): """Different intent types route correctly across all turns.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() intent_counts = { IntentType.EXPLORATION: 0, IntentType.COMBAT: 0, IntentType.ROLEPLAY: 0, IntentType.QUESTION: 0, IntentType.ACTION: 0, } # Execute all turns and track intent classification for player_input, expected_intent in self.PLAYER_ACTIONS: intent = orch.classify_intent(player_input) # Track actual classified intents (may differ from expected in edge cases) if intent.intent_type in intent_counts: intent_counts[intent.intent_type] += 1 response = await orch.process_player_input(player_input) # All intents should produce valid responses assert response.narrative assert len(response.narrative) > 0 # Verify we exercised multiple intent types executed_intents = sum(1 for count in intent_counts.values() if count > 0) assert executed_intents >= 3, ( f"Only {executed_intents} intent types were classified, expected >= 3" ) @pytest.mark.anyio async def test_state_changes_accumulation( self, campaign, config, narrator_llm, arbiter_llm ): """State changes from multiple turns don't corrupt session state.""" # Create arbiter that produces state changes state_change_resolution = { "success": True, "dice_rolls": [ { "description": "Attack roll", "notation": "1d20+5", "result": 18, "success": True, "dc": 13, } ], "state_changes": [ { "target": "Enemy", "change_type": "hp", "description": "Enemy takes damage", "value": -15, } ], "rules_applied": ["PHB p.194: Melee Attack"], "narrative_hooks": ["Your blade strikes true."], "reasoning": "Attack roll succeeds.", } arbiter_with_changes = MockLLMClient( default_response=json.dumps(state_change_resolution) ) orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_with_changes) session = orch.start_session() total_state_changes = 0 # Run 15 combat-heavy turns combat_actions = [ ("I attack with my sword", IntentType.COMBAT), ("I strike again", IntentType.COMBAT), ("I cast magic missile", IntentType.COMBAT), ("I make another attack", IntentType.COMBAT), ("I finish the enemy", IntentType.COMBAT), ] * 3 # 15 turns total for player_input, _ in combat_actions: response = await orch.process_player_input(player_input) # Track state changes if response.state_changes: total_state_changes += len(response.state_changes) # Verify state changes were collected across all turns assert total_state_changes > 0, "No state changes were recorded" # Session should still be valid after all state changes assert len(session.conversation_history) == 30 @pytest.mark.anyio async def test_context_metadata_preservation( self, campaign, config, narrator_llm, arbiter_llm ): """Session metadata persists correctly across all turns.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Add custom metadata session.metadata["player_name"] = "Thorin" session.metadata["difficulty"] = "hard" session.metadata["test_flag"] = True # Process 10 turns for player_input, _ in self.PLAYER_ACTIONS[:10]: await orch.process_player_input(player_input) # Verify metadata persists context = session.get_context(max_messages=5) assert context["player_name"] == "Thorin" assert context["difficulty"] == "hard" assert context["test_flag"] is True # Verify metadata still present after all turns final_context = session.get_context() assert final_context["player_name"] == "Thorin" assert final_context["difficulty"] == "hard" assert final_context["test_flag"] is True # ============================================================================ # Edge Case Stress Tests # ============================================================================ class TestStressEdgeCases: """Edge cases under stress conditions.""" @pytest.mark.anyio async def test_rapid_sequential_turns( self, campaign, config, narrator_llm, arbiter_llm ): """Rapid-fire turns (no delay) don't cause race conditions.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Fire 30 turns as fast as possible rapid_actions = ["I move forward"] * 30 for idx, action in enumerate(rapid_actions, 1): response = await orch.process_player_input(action) # Verify state consistency despite rapid execution assert len(session.conversation_history) == idx * 2 assert response.narrative @pytest.mark.anyio async def test_very_long_player_input( self, campaign, config, narrator_llm, arbiter_llm ): """System handles unusually long player inputs across multiple turns.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Create very long input (simulate verbose player) long_input = ( "I carefully and methodically examine every single inch of the ancient " "stone wall, running my fingers along the mortar between the massive " "granite blocks, looking for any irregularities, hidden switches, or " "concealed mechanisms that might reveal a secret passage or hidden " "compartment, while also keeping one eye on the shadows and my other " "hand ready on my weapon in case of an ambush." * 3 # Triple for stress ) # Process 5 turns with long inputs for i in range(5): response = await orch.process_player_input(long_input) # Verify long input is stored correctly assert session.conversation_history[-2]["content"] == long_input assert response.narrative # Verify all long inputs are preserved assert len(session.conversation_history) == 10 @pytest.mark.anyio async def test_alternating_simple_complex_actions( self, campaign, config, narrator_llm, arbiter_llm ): """Alternating between simple and complex actions maintains stability.""" orch = _make_orchestrator(campaign, config, narrator_llm, arbiter_llm) session = orch.start_session() # Alternate between minimal and verbose inputs alternating_actions = [ "I look", "I carefully examine the intricate carvings on the door, trying to understand their meaning", "I wait", "I search the entire room methodically, checking every corner and surface", "I go north", "I cautiously approach the mysterious altar, ready to react if anything happens", ] * 4 # 24 turns for idx, action in enumerate(alternating_actions, 1): response = await orch.process_player_input(action) assert len(session.conversation_history) == idx * 2 assert response.narrative assert len(session.conversation_history) == 48

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Polloinfilzato/dm20-protocol'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

test_stress_session.py•21.1 KiB