Skip to main content
Glama
srwlli

Documentation Generator MCP Server

by srwlli
test_validate_plan_handler.py22.5 kB
#!/usr/bin/env python3 """ Handler Tests for validate_implementation_plan Tool (Phase 5.3). Tests the validate_implementation_plan MCP tool handler directly to ensure: 1. Valid plans are scored correctly (high score, PASS result) 2. Invalid plans are scored correctly (low score, FAIL result with issues) 3. Error cases are handled properly (missing files, invalid paths, etc.) These tests focus on handler-level functionality (not full workflow). """ import sys from pathlib import Path import asyncio import json import tempfile import shutil from typing import Dict, Any # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent)) import tool_handlers from type_defs import ValidationResultDict # Module-level constants DOCS_MCP_PATH = Path(__file__).parent # Use docs-mcp itself as test project # Test Fixtures: Minimal Plan Structures (TEST-010) # Valid plan with good quality (should score high) VALID_PLAN: Dict[str, Any] = { "META_DOCUMENTATION": { "plan_id": "TEST-VALID-001", "plan_name": "Valid Test Plan", "status": "draft", "estimated_effort": "3-4 hours", "description": "A well-formed test plan with all required sections and good quality content" }, "UNIVERSAL_PLANNING_STRUCTURE": { "0_preparation": { "foundation_docs": ["README.md", "ARCHITECTURE.md", "API.md"], "standards": ["BEHAVIOR-STANDARDS.md", "COMPONENT-PATTERN.md"], "patterns": ["error-handling-pattern.md", "validation-pattern.md"], "reference_components": ["similar_tool_1.py", "similar_tool_2.py"], "technology_stack": { "language": "Python 3.11+", "framework": "MCP SDK", "testing": "pytest", "dependencies": ["pathlib", "json", "asyncio"] } }, "1_executive_summary": { "feature_overview": "Comprehensive test plan demonstrating proper planning structure with detailed requirements and implementation strategy", "value_proposition": "Validates that planning tools can accurately assess plan quality through structured validation checks", "real_world_analogy": "Like a building inspector checking blueprints against construction codes before breaking ground", "primary_use_cases": [ "Validate implementation plans before execution", "Identify quality issues early in planning phase", "Ensure autonomous AI agents have complete guidance" ], "success_metrics": [ "Validation score >= 90 on all production plans", "Zero critical issues in executed plans", "Planning time reduced by 60% vs manual approach", "Plan revision cycles reduced from 5+ to 2-3 iterations" ] }, "2_risk_assessment": { "overall_risk": "Low", "complexity": "Medium", "scope": "Focused - single tool validation", "risk_factors": { "technical": "Moderate - requires comprehensive validation rules", "dependencies": "Low - uses only standard libraries", "performance": "Low - validation is fast JSON processing", "security": "Low - read-only operations on project files" }, "mitigation_strategies": [ "Start with strict validation rules, adjust based on feedback", "Comprehensive test coverage for all validation scenarios", "Clear documentation of scoring algorithm", "User approval gate prevents flawed plans from execution" ] }, "3_current_state_analysis": { "current_state": "Planning tools exist but need comprehensive validation to ensure quality. Without validation, AI agents may create incomplete or flawed plans that fail during execution.", "affected_files": [ "tool_handlers.py - Add validate_implementation_plan handler", "generators/plan_validator.py - Validation logic and scoring", "type_defs.py - ValidationResultDict TypedDict", "validation.py - Input validation for plan paths" ], "dependencies": [ "Existing: feature-implementation-planning-standard.json template", "Existing: path validation utilities", "New: PlanValidator class with 25+ validation checks" ] }, "4_key_features": { "features": [ "Comprehensive validation against 25+ quality checklist items", "Scoring algorithm: 100 - (10*critical + 5*major + 1*minor)", "Issue categorization by severity (critical/major/minor)", "Section-specific validation (structure, completeness, quality, autonomy)", "Detailed suggestions for fixing each issue", "Approval threshold enforcement (score >= 90 for production)" ] }, "5_task_id_system": { "prefix": "VALID", "format": "VALID-NNN", "example": "VALID-001" }, "6_implementation_phases": { "phase_1": { "title": "Validation Infrastructure", "tasks": [ { "id": "VALID-001", "description": "Create PlanValidator class with initialization logic to load plan and template files for comparison", "location": "generators/plan_validator.py", "effort": "45 minutes", "depends_on": [] }, { "id": "VALID-002", "description": "Implement structural validation checking all required sections (0-9) exist and have correct types", "location": "generators/plan_validator.py", "effort": "1 hour", "depends_on": ["VALID-001"] }, { "id": "VALID-003", "description": "Implement completeness validation checking no placeholders, all fields filled, task IDs formatted correctly", "location": "generators/plan_validator.py", "effort": "1 hour", "depends_on": ["VALID-001"] }, { "id": "VALID-004", "description": "Implement quality validation checking task descriptions >= 20 words, success criteria measurable, edge cases listed", "location": "generators/plan_validator.py", "effort": "1.5 hours", "depends_on": ["VALID-001"] }, { "id": "VALID-005", "description": "Implement scoring algorithm (100 - deductions) and result classification (PASS/NEEDS_REVISION/FAIL)", "location": "generators/plan_validator.py", "effort": "45 minutes", "depends_on": ["VALID-002", "VALID-003", "VALID-004"] } ] }, "phase_2": { "title": "Handler Integration", "tasks": [ { "id": "VALID-006", "description": "Create handle_validate_implementation_plan handler with input validation and error handling", "location": "tool_handlers.py", "effort": "30 minutes", "depends_on": ["VALID-005"] }, { "id": "VALID-007", "description": "Add validation result TypedDict defining score, result, issues, checklist fields", "location": "type_defs.py", "effort": "15 minutes", "depends_on": [] }, { "id": "VALID-008", "description": "Register handler in TOOL_HANDLERS registry and add tool definition to server.py", "location": "server.py + tool_handlers.py", "effort": "15 minutes", "depends_on": ["VALID-006", "VALID-007"] } ] } }, "7_testing_strategy": { "unit_tests": [ "test_validate_structure - Verify structural validation catches missing sections", "test_validate_completeness - Verify completeness checks catch placeholders and empty fields", "test_validate_quality - Verify quality checks catch short descriptions and vague criteria", "test_scoring_algorithm - Verify score calculation is correct for various issue combinations", "test_result_classification - Verify PASS/NEEDS_REVISION/FAIL thresholds work correctly" ], "integration_tests": [ "test_validate_valid_plan - Complete workflow with high-quality plan returns score >= 90", "test_validate_flawed_plan - Complete workflow with flawed plan returns appropriate score and issues", "test_validate_missing_file - Handler returns proper error when plan file doesn't exist" ], "edge_cases": [ "Empty plan file (invalid JSON)", "Plan missing META_DOCUMENTATION section", "Plan with circular task dependencies", "Plan file outside project directory (path traversal)", "Very large plan with 100+ tasks", "Plan with unicode characters in descriptions", "Plan with all sections but all content is placeholders", "Plan with perfect structure but zero task details" ] }, "8_success_criteria": { "criteria": [ "Validation handler returns ValidationResultDict with score 0-100", "Perfect plan (no issues) scores exactly 100", "Critical issues reduce score by 10 points each", "Major issues reduce score by 5 points each", "Minor issues reduce score by 1 point each", "Result classification: PASS (>=90), PASS_WITH_WARNINGS (>=85), NEEDS_REVISION (>=70), FAIL (<70)", "All issues include severity, section, issue description, and fix suggestion", "Handler properly validates project_path and plan_file_path inputs", "Error responses use ErrorResponse factory pattern", "All operations logged with structured logging" ] }, "9_implementation_checklist": { "items": [ "Create PlanValidator class", "Implement structural validation (sections 0-9 present)", "Implement completeness validation (no placeholders, all fields filled)", "Implement quality validation (description length, measurable criteria)", "Implement autonomy validation (no ambiguity, implementable without clarification)", "Implement scoring algorithm (100 - deductions)", "Create ValidationResultDict TypedDict", "Create handle_validate_implementation_plan handler", "Add input validation for paths", "Register handler and add tool definition", "Write unit tests for validator", "Write integration tests for handler", "Test error cases (missing file, invalid path)", "Update documentation (CLAUDE.md, API.md)" ] } } } # Invalid plan with multiple issues (should score low) INVALID_PLAN: Dict[str, Any] = { "META_DOCUMENTATION": { "plan_id": "TEST-INVALID-001", "plan_name": "Invalid Test Plan", "status": "TBD", # Placeholder - major issue "estimated_effort": "TBD" # Placeholder - same major issue }, "UNIVERSAL_PLANNING_STRUCTURE": { # Missing 0_preparation - critical issue "1_executive_summary": { "feature_overview": "Short overview", # Too short - minor issue "value_proposition": "Value", # Way too short - major issue "real_world_analogy": "Like thing", # Too short - major issue "primary_use_cases": ["Use case"], # Too short - minor issue "success_metrics": ["Success"] # Not measurable - major issue }, # Missing 2_risk_assessment - critical issue # Missing 3_current_state_analysis - critical issue "4_key_features": { "features": [] # Empty - critical issue }, "5_task_id_system": { "prefix": "INV" }, "6_implementation_phases": { "phase_1": { "tasks": [ { "id": "INV-001", "description": "Do thing", # 2 words - major issue "depends_on": ["INV-002"] # Depends on next task }, { "id": "INV-002", "description": "What?", # 1 word, question - major issue "depends_on": ["INV-001"] # Circular dependency - critical issue } ] } }, # Missing 7_testing_strategy - critical issue "8_success_criteria": { "criteria": ["Maybe works", "Possibly good"] # Ambiguous - major issues } # Missing 9_implementation_checklist - critical issue } } # Expected issues: 7 critical, 8+ major, 2+ minor # Expected score: ~20 (very low, multiple critical issues) def test_validate_valid_plan(): """ Test validating a high-quality plan (TEST-011). Validates that a well-formed plan with all sections and good content receives a high score (>= 85) and PASS or PASS_WITH_WARNINGS result. """ print("\n" + "="*70) print("TEST: Validate Valid Plan") print("="*70) project_path = DOCS_MCP_PATH # Create temp directory WITHIN project temp_path = project_path / '.test_output_valid' temp_path.mkdir(exist_ok=True) try: # Create valid plan file print("\n[Step 1] Creating valid plan file...") plan_file = temp_path / "valid-plan.json" with open(plan_file, 'w', encoding='utf-8') as f: json.dump(VALID_PLAN, f, indent=2) print(f" [OK] Plan file created: {plan_file}") # Validate the plan via handler print("\n[Step 2] Validating plan via handler...") result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': str(plan_file) })) # Parse response validation_text = result[0].text validation_data = json.loads(validation_text) # Assertions assert 'score' in validation_data, "Response should contain score" assert 'validation_result' in validation_data, "Response should contain validation_result" assert 'issues' in validation_data, "Response should contain issues" assert 'approved' in validation_data, "Response should contain approved" score = validation_data['score'] validation_result = validation_data['validation_result'] issues = validation_data['issues'] approved = validation_data['approved'] print(f"\n[Results]") print(f" Score: {score}/100") print(f" Result: {validation_result}") print(f" Issues: {len(issues)}") print(f" Approved: {approved}") # Valid plan should score >= 85 assert score >= 85, f"Valid plan should score >= 85, got {score}" assert validation_result in ['PASS', 'PASS_WITH_WARNINGS'], \ f"Valid plan should PASS or PASS_WITH_WARNINGS, got {validation_result}" # Check issue structure for issue in issues: assert 'severity' in issue, "Issue should have severity" assert 'section' in issue, "Issue should have section" assert 'issue' in issue, "Issue should have issue description" assert 'suggestion' in issue, "Issue should have fix suggestion" print("\n[PASS] Valid plan handler test completed successfully") print(f" [OK] Score {score} >= 85") print(f" [OK] Result: {validation_result}") print(f" [OK] Issues properly structured") finally: # Cleanup if temp_path.exists(): shutil.rmtree(temp_path) def test_validate_invalid_plan(): """ Test validating a low-quality plan (TEST-012). Validates that a flawed plan with missing sections and poor content receives a low score and FAIL result with appropriate issues identified. """ print("\n" + "="*70) print("TEST: Validate Invalid Plan") print("="*70) project_path = DOCS_MCP_PATH # Create temp directory WITHIN project temp_path = project_path / '.test_output_invalid' temp_path.mkdir(exist_ok=True) try: # Create invalid plan file print("\n[Step 1] Creating invalid plan file...") plan_file = temp_path / "invalid-plan.json" with open(plan_file, 'w', encoding='utf-8') as f: json.dump(INVALID_PLAN, f, indent=2) print(f" [OK] Plan file created: {plan_file}") # Validate the plan via handler print("\n[Step 2] Validating plan via handler...") result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': str(plan_file) })) # Parse response validation_text = result[0].text validation_data = json.loads(validation_text) score = validation_data['score'] validation_result = validation_data['validation_result'] issues = validation_data['issues'] approved = validation_data['approved'] print(f"\n[Results]") print(f" Score: {score}/100") print(f" Result: {validation_result}") print(f" Issues: {len(issues)}") print(f" Approved: {approved}") # Invalid plan should score < 70 (FAIL) assert score < 70, f"Invalid plan should score < 70, got {score}" assert validation_result in ['FAIL', 'NEEDS_REVISION'], \ f"Invalid plan should FAIL or NEEDS_REVISION, got {validation_result}" assert approved == False, "Invalid plan should not be approved" # Should have multiple issues assert len(issues) > 5, f"Invalid plan should have > 5 issues, got {len(issues)}" # Should have critical issues critical_issues = [i for i in issues if i['severity'] == 'critical'] assert len(critical_issues) > 0, "Invalid plan should have critical issues" # Check that specific known issues are detected issue_descriptions = [i['issue'] for i in issues] print(f"\n[Issues detected]") for issue in issues[:5]: # Show first 5 issues print(f" [{issue['severity'].upper()}] {issue['section']}: {issue['issue'][:60]}...") print("\n[PASS] Invalid plan handler test completed successfully") print(f" [OK] Score {score} < 70 (FAIL)") print(f" [OK] Result: {validation_result}") print(f" [OK] {len(issues)} issues detected") print(f" [OK] {len(critical_issues)} critical issues detected") print(f" [OK] Plan correctly rejected (approved=False)") finally: # Cleanup if temp_path.exists(): shutil.rmtree(temp_path) def test_validate_missing_file(): """ Test error handling when plan file doesn't exist. Validates that handler returns proper error response (not crash) when given a non-existent plan file path. """ print("\n" + "="*70) print("TEST: Validate Missing File Error Handling") print("="*70) project_path = DOCS_MCP_PATH print("\n[Step 1] Attempting to validate non-existent file...") result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': 'nonexistent-plan.json' })) response_text = result[0].text print(f"\n[Response]") # Safely print response (avoid Unicode errors on Windows) try: print(f" {response_text[:200]}...") except UnicodeEncodeError: print(f" [Response contains special characters, length: {len(response_text)}]") # Should return error response (not crash) assert 'error' in response_text.lower() or 'not found' in response_text.lower(), \ "Should return error response for missing file" print("\n[PASS] Missing file error handling test completed successfully") print(" [OK] Handler returned error response (didn't crash)") print(" [OK] Error message is informative") if __name__ == "__main__": print("\n" + "="*70) print("VALIDATE PLAN HANDLER TEST SUITE (Phase 5.3)") print("="*70) try: # Run handler tests test_validate_valid_plan() test_validate_invalid_plan() test_validate_missing_file() print("\n" + "="*70) print("[PASS] ALL HANDLER TESTS PASSED") print("="*70) print("\nTest Coverage:") print(" [OK] Valid plan scored correctly (>= 85)") print(" [OK] Invalid plan scored correctly (< 70)") print(" [OK] Issues properly structured and categorized") print(" [OK] Error handling for missing files") print("\nTotal: 3 test functions, all passing") exit(0) except AssertionError as e: print(f"\n[FAIL] TEST FAILED: {e}") import traceback traceback.print_exc() exit(1) except Exception as e: print(f"\n[FAIL] UNEXPECTED ERROR: {e}") import traceback traceback.print_exc() exit(1)

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/srwlli/docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server