Skip to main content
Glama

Gemini MCP Server

test_planner_validation.py30.3 kB
#!/usr/bin/env python3 """ PlannerWorkflow Tool Validation Test Tests the planner tool's capabilities using the new workflow architecture. This validates that the new workflow-based implementation maintains all the functionality of the original planner tool while using the workflow pattern like the debug tool. """ import json from typing import Optional from .conversation_base_test import ConversationBaseTest class PlannerValidationTest(ConversationBaseTest): """Test planner tool with new workflow architecture""" @property def test_name(self) -> str: return "planner_validation" @property def test_description(self) -> str: return "PlannerWorkflow tool validation with new workflow architecture" def run_test(self) -> bool: """Test planner tool capabilities""" # Set up the test environment self.setUp() try: self.logger.info("Test: PlannerWorkflow tool validation (new architecture)") # Test 1: Single planning session with workflow architecture if not self._test_single_planning_session(): return False # Test 2: Planning with continuation using workflow if not self._test_planning_with_continuation(): return False # Test 3: Complex plan with deep thinking pauses if not self._test_complex_plan_deep_thinking(): return False # Test 4: Self-contained completion (no expert analysis) if not self._test_self_contained_completion(): return False # Test 5: Branching and revision with workflow if not self._test_branching_and_revision(): return False # Test 6: Workflow file context behavior if not self._test_workflow_file_context(): return False self.logger.info(" ✅ All planner validation tests passed") return True except Exception as e: self.logger.error(f"PlannerWorkflow validation test failed: {e}") return False def _test_single_planning_session(self) -> bool: """Test a complete planning session with workflow architecture""" try: self.logger.info(" 1.1: Testing single planning session with workflow") # Step 1: Start planning self.logger.info(" 1.1.1: Step 1 - Initial planning step") response1, continuation_id = self.call_mcp_tool( "planner", { "step": "I need to plan a comprehensive API redesign for our legacy system. Let me start by analyzing the current state and identifying key requirements for the new API architecture.", "step_number": 1, "total_steps": 4, "next_step_required": True, "model": "flash", }, ) if not response1 or not continuation_id: self.logger.error("Failed to get initial planning response") return False # Parse and validate JSON response response1_data = self._parse_planner_response(response1) if not response1_data: return False # Validate step 1 response structure - expect pause_for_planner for next_step_required=True if not self._validate_step_response(response1_data, 1, 4, True, "pause_for_planner"): return False # Debug: Log the actual response structure to see what we're getting self.logger.debug(f"Response structure: {list(response1_data.keys())}") # Check workflow-specific response structure (more flexible) status_key = None for key in response1_data.keys(): if key.endswith("_status"): status_key = key break if not status_key: self.logger.error(f"Missing workflow status field in response: {list(response1_data.keys())}") return False self.logger.debug(f"Found status field: {status_key}") # Check required_actions for workflow guidance if not response1_data.get("required_actions"): self.logger.error("Missing required_actions in workflow response") return False self.logger.info(f" ✅ Step 1 successful with workflow, continuation_id: {continuation_id}") # Step 2: Continue planning self.logger.info(" 1.1.2: Step 2 - API domain analysis") response2, _ = self.call_mcp_tool( "planner", { "step": "After analyzing the current API, I can identify three main domains: User Management, Content Management, and Analytics. Let me design the new API structure with RESTful endpoints and proper versioning.", "step_number": 2, "total_steps": 4, "next_step_required": True, "continuation_id": continuation_id, "model": "flash", }, ) if not response2: self.logger.error("Failed to continue planning to step 2") return False response2_data = self._parse_planner_response(response2) if not self._validate_step_response(response2_data, 2, 4, True, "pause_for_planner"): return False # Check step history tracking in workflow (more flexible) status_key = None for key in response2_data.keys(): if key.endswith("_status"): status_key = key break if status_key: workflow_status = response2_data.get(status_key, {}) step_history_length = workflow_status.get("step_history_length", 0) if step_history_length < 2: self.logger.error(f"Step history not properly tracked in workflow: {step_history_length}") return False self.logger.debug(f"Step history length: {step_history_length}") else: self.logger.warning("No workflow status found, skipping step history check") self.logger.info(" ✅ Step 2 successful with workflow tracking") # Step 3: Final step - should trigger completion self.logger.info(" 1.1.3: Step 3 - Final planning step") response3, _ = self.call_mcp_tool( "planner", { "step": "API redesign plan complete: Phase 1 - User Management API, Phase 2 - Content Management API, Phase 3 - Analytics API. Each phase includes proper authentication, rate limiting, and comprehensive documentation.", "step_number": 3, "total_steps": 3, # Adjusted total "next_step_required": False, # Final step - should complete without expert analysis "continuation_id": continuation_id, "model": "flash", }, ) if not response3: self.logger.error("Failed to complete planning session") return False response3_data = self._parse_planner_response(response3) if not response3_data: return False # Validate final response structure - should be self-contained completion if response3_data.get("status") != "planner_complete": self.logger.error(f"Expected status 'planner_complete', got '{response3_data.get('status')}'") return False if not response3_data.get("planning_complete"): self.logger.error("Expected planning_complete=true for final step") return False # Should NOT have expert_analysis (self-contained) if "expert_analysis" in response3_data: self.logger.error("PlannerWorkflow should be self-contained without expert analysis") return False # Check plan_summary exists if not response3_data.get("plan_summary"): self.logger.error("Missing plan_summary in final step") return False self.logger.info(" ✅ Planning session completed successfully with workflow architecture") # Store continuation_id for next test self.api_continuation_id = continuation_id return True except Exception as e: self.logger.error(f"Single planning session test failed: {e}") return False def _test_planning_with_continuation(self) -> bool: """Test planning continuation with workflow architecture""" try: self.logger.info(" 1.2: Testing planning continuation with workflow") # Use continuation from previous test if available continuation_id = getattr(self, "api_continuation_id", None) if not continuation_id: # Start fresh if no continuation available self.logger.info(" 1.2.0: Starting fresh planning session") response0, continuation_id = self.call_mcp_tool( "planner", { "step": "Planning API security strategy", "step_number": 1, "total_steps": 2, "next_step_required": True, "model": "flash", }, ) if not response0 or not continuation_id: self.logger.error("Failed to start fresh planning session") return False # Test continuation step self.logger.info(" 1.2.1: Continue planning session") response1, _ = self.call_mcp_tool( "planner", { "step": "Building on the API redesign, let me now plan the security implementation with OAuth 2.0, API keys, and rate limiting strategies.", "step_number": 2, "total_steps": 2, "next_step_required": True, "continuation_id": continuation_id, "model": "flash", }, ) if not response1: self.logger.error("Failed to continue planning") return False response1_data = self._parse_planner_response(response1) if not response1_data: return False # Validate continuation behavior if not self._validate_step_response(response1_data, 2, 2, True, "pause_for_planner"): return False # Check that continuation_id is preserved if response1_data.get("continuation_id") != continuation_id: self.logger.error("Continuation ID not preserved in workflow") return False self.logger.info(" ✅ Planning continuation working with workflow") return True except Exception as e: self.logger.error(f"Planning continuation test failed: {e}") return False def _test_complex_plan_deep_thinking(self) -> bool: """Test complex plan with deep thinking pauses""" try: self.logger.info(" 1.3: Testing complex plan with deep thinking pauses") # Start complex plan (≥5 steps) - should trigger deep thinking self.logger.info(" 1.3.1: Step 1 of complex plan (should trigger deep thinking)") response1, continuation_id = self.call_mcp_tool( "planner", { "step": "I need to plan a complete digital transformation for our enterprise organization, including cloud migration, process automation, and cultural change management.", "step_number": 1, "total_steps": 8, # Complex plan ≥5 steps "next_step_required": True, "model": "flash", }, ) if not response1 or not continuation_id: self.logger.error("Failed to start complex planning") return False response1_data = self._parse_planner_response(response1) if not response1_data: return False # Should trigger deep thinking pause for complex plan if response1_data.get("status") != "pause_for_deep_thinking": self.logger.error("Expected deep thinking pause for complex plan step 1") return False if not response1_data.get("thinking_required"): self.logger.error("Expected thinking_required=true for complex plan") return False # Check required thinking actions required_thinking = response1_data.get("required_thinking", []) if len(required_thinking) < 4: self.logger.error("Expected comprehensive thinking requirements for complex plan") return False # Check for deep thinking guidance in next_steps next_steps = response1_data.get("next_steps", "") if "MANDATORY" not in next_steps or "deep thinking" not in next_steps.lower(): self.logger.error("Expected mandatory deep thinking guidance") return False self.logger.info(" ✅ Complex plan step 1 correctly triggered deep thinking pause") # Step 2 of complex plan - should also trigger deep thinking self.logger.info(" 1.3.2: Step 2 of complex plan (should trigger deep thinking)") response2, _ = self.call_mcp_tool( "planner", { "step": "After deep analysis, I can see this transformation requires three parallel tracks: Technical Infrastructure, Business Process, and Human Capital. Let me design the coordination strategy.", "step_number": 2, "total_steps": 8, "next_step_required": True, "continuation_id": continuation_id, "model": "flash", }, ) if not response2: self.logger.error("Failed to continue complex planning") return False response2_data = self._parse_planner_response(response2) if not response2_data: return False # Step 2 should also trigger deep thinking for complex plans if response2_data.get("status") != "pause_for_deep_thinking": self.logger.error("Expected deep thinking pause for complex plan step 2") return False self.logger.info(" ✅ Complex plan step 2 correctly triggered deep thinking pause") # Step 4 of complex plan - should use normal flow (after step 3) self.logger.info(" 1.3.3: Step 4 of complex plan (should use normal flow)") response4, _ = self.call_mcp_tool( "planner", { "step": "Now moving to tactical planning: Phase 1 execution details with specific timelines and resource allocation for the technical infrastructure track.", "step_number": 4, "total_steps": 8, "next_step_required": True, "continuation_id": continuation_id, "model": "flash", }, ) if not response4: self.logger.error("Failed to continue to step 4") return False response4_data = self._parse_planner_response(response4) if not response4_data: return False # Step 4 should use normal flow (no more deep thinking pauses) if response4_data.get("status") != "pause_for_planner": self.logger.error("Expected normal planning flow for step 4") return False if response4_data.get("thinking_required"): self.logger.error("Step 4 should not require special thinking pause") return False self.logger.info(" ✅ Complex plan transitions to normal flow after step 3") return True except Exception as e: self.logger.error(f"Complex plan deep thinking test failed: {e}") return False def _test_self_contained_completion(self) -> bool: """Test self-contained completion without expert analysis""" try: self.logger.info(" 1.4: Testing self-contained completion") # Simple planning session that should complete without expert analysis self.logger.info(" 1.4.1: Simple planning session") response1, continuation_id = self.call_mcp_tool( "planner", { "step": "Planning a simple website redesign with new color scheme and improved navigation.", "step_number": 1, "total_steps": 2, "next_step_required": True, "model": "flash", }, ) if not response1 or not continuation_id: self.logger.error("Failed to start simple planning") return False # Final step - should complete without expert analysis self.logger.info(" 1.4.2: Final step - self-contained completion") response2, _ = self.call_mcp_tool( "planner", { "step": "Website redesign plan complete: Phase 1 - Update color palette and typography, Phase 2 - Redesign navigation structure and user flows.", "step_number": 2, "total_steps": 2, "next_step_required": False, # Final step "continuation_id": continuation_id, "model": "flash", }, ) if not response2: self.logger.error("Failed to complete simple planning") return False response2_data = self._parse_planner_response(response2) if not response2_data: return False # Validate self-contained completion if response2_data.get("status") != "planner_complete": self.logger.error("Expected self-contained completion status") return False # Should NOT call expert analysis if "expert_analysis" in response2_data: self.logger.error("PlannerWorkflow should not call expert analysis") return False # Should have planning_complete flag if not response2_data.get("planning_complete"): self.logger.error("Expected planning_complete=true") return False # Should have plan_summary if not response2_data.get("plan_summary"): self.logger.error("Expected plan_summary in completion") return False # Check completion instructions output = response2_data.get("output", {}) if not output.get("instructions"): self.logger.error("Missing output instructions for plan presentation") return False self.logger.info(" ✅ Self-contained completion working correctly") return True except Exception as e: self.logger.error(f"Self-contained completion test failed: {e}") return False def _test_branching_and_revision(self) -> bool: """Test branching and revision with workflow architecture""" try: self.logger.info(" 1.5: Testing branching and revision with workflow") # Start planning session for branching test self.logger.info(" 1.5.1: Start planning for branching test") response1, continuation_id = self.call_mcp_tool( "planner", { "step": "Planning mobile app development strategy with different technology options to evaluate.", "step_number": 1, "total_steps": 4, "next_step_required": True, "model": "flash", }, ) if not response1 or not continuation_id: self.logger.error("Failed to start branching test") return False # Create branch self.logger.info(" 1.5.2: Create branch for React Native approach") response2, _ = self.call_mcp_tool( "planner", { "step": "Branch A: React Native approach - cross-platform development with shared codebase, faster development cycle, and consistent UI across platforms.", "step_number": 2, "total_steps": 4, "next_step_required": True, "is_branch_point": True, "branch_from_step": 1, "branch_id": "react-native", "continuation_id": continuation_id, "model": "flash", }, ) if not response2: self.logger.error("Failed to create branch") return False response2_data = self._parse_planner_response(response2) if not response2_data: return False # Validate branching in workflow metadata = response2_data.get("metadata", {}) if not metadata.get("is_branch_point"): self.logger.error("Branch point not recorded in workflow") return False if metadata.get("branch_id") != "react-native": self.logger.error("Branch ID not properly recorded") return False if "react-native" not in metadata.get("branches", []): self.logger.error("Branch not added to branches list") return False self.logger.info(" ✅ Branching working with workflow architecture") # Test revision self.logger.info(" 1.5.3: Test revision capability") response3, _ = self.call_mcp_tool( "planner", { "step": "Revision of step 2: After consideration, let me revise the React Native approach to include performance optimizations and native module integration for critical features.", "step_number": 3, "total_steps": 4, "next_step_required": True, "is_step_revision": True, "revises_step_number": 2, "continuation_id": continuation_id, "model": "flash", }, ) if not response3: self.logger.error("Failed to create revision") return False response3_data = self._parse_planner_response(response3) if not response3_data: return False # Validate revision in workflow metadata = response3_data.get("metadata", {}) if not metadata.get("is_step_revision"): self.logger.error("Step revision not recorded in workflow") return False if metadata.get("revises_step_number") != 2: self.logger.error("Revised step number not properly recorded") return False self.logger.info(" ✅ Revision working with workflow architecture") return True except Exception as e: self.logger.error(f"Branching and revision test failed: {e}") return False def _test_workflow_file_context(self) -> bool: """Test workflow file context behavior (should be minimal for planner)""" try: self.logger.info(" 1.6: Testing workflow file context behavior") # Planner typically doesn't use files, but test the workflow handles this correctly self.logger.info(" 1.6.1: Planning step with no files (normal case)") response1, continuation_id = self.call_mcp_tool( "planner", { "step": "Planning data architecture for analytics platform.", "step_number": 1, "total_steps": 2, "next_step_required": True, "model": "flash", }, ) if not response1 or not continuation_id: self.logger.error("Failed to start workflow file context test") return False response1_data = self._parse_planner_response(response1) if not response1_data: return False # Planner workflow should not have file_context since it doesn't use files if "file_context" in response1_data: self.logger.info(" ℹ️ Workflow file context present but should be minimal for planner") # Final step self.logger.info(" 1.6.2: Final step (should complete without file embedding)") response2, _ = self.call_mcp_tool( "planner", { "step": "Data architecture plan complete with data lakes, processing pipelines, and analytics layers.", "step_number": 2, "total_steps": 2, "next_step_required": False, "continuation_id": continuation_id, "model": "flash", }, ) if not response2: self.logger.error("Failed to complete workflow file context test") return False response2_data = self._parse_planner_response(response2) if not response2_data: return False # Final step should complete self-contained if response2_data.get("status") != "planner_complete": self.logger.error("Expected self-contained completion for planner workflow") return False self.logger.info(" ✅ Workflow file context behavior appropriate for planner") return True except Exception as e: self.logger.error(f"Workflow file context test failed: {e}") return False def call_mcp_tool(self, tool_name: str, params: dict) -> tuple[Optional[str], Optional[str]]: """Call an MCP tool in-process - override for planner-specific response handling""" # Use in-process implementation to maintain conversation memory response_text, _ = self.call_mcp_tool_direct(tool_name, params) if not response_text: return None, None # Extract continuation_id from planner response specifically continuation_id = self._extract_planner_continuation_id(response_text) return response_text, continuation_id def _extract_planner_continuation_id(self, response_text: str) -> Optional[str]: """Extract continuation_id from planner response""" try: # Parse the response response_data = json.loads(response_text) return response_data.get("continuation_id") except json.JSONDecodeError as e: self.logger.debug(f"Failed to parse response for planner continuation_id: {e}") return None def _parse_planner_response(self, response_text: str) -> dict: """Parse planner tool JSON response""" try: # Parse the response - it should be direct JSON return json.loads(response_text) except json.JSONDecodeError as e: self.logger.error(f"Failed to parse planner response as JSON: {e}") self.logger.error(f"Response text: {response_text[:500]}...") return {} def _validate_step_response( self, response_data: dict, expected_step: int, expected_total: int, expected_next_required: bool, expected_status: str, ) -> bool: """Validate a planner step response structure""" try: # Check status if response_data.get("status") != expected_status: self.logger.error(f"Expected status '{expected_status}', got '{response_data.get('status')}'") return False # Check step number if response_data.get("step_number") != expected_step: self.logger.error(f"Expected step_number {expected_step}, got {response_data.get('step_number')}") return False # Check total steps if response_data.get("total_steps") != expected_total: self.logger.error(f"Expected total_steps {expected_total}, got {response_data.get('total_steps')}") return False # Check next_step_required if response_data.get("next_step_required") != expected_next_required: self.logger.error( f"Expected next_step_required {expected_next_required}, got {response_data.get('next_step_required')}" ) return False # Check step_content exists if not response_data.get("step_content"): self.logger.error("Missing step_content in response") return False # Check next_steps guidance if not response_data.get("next_steps"): self.logger.error("Missing next_steps guidance in response") return False return True except Exception as e: self.logger.error(f"Error validating step response: {e}") return False

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BeehiveInnovations/gemini-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server