Documentation Generator MCP Server

Overview Schema Related Servers Score Discussions

docs-mcp
tests
integration

test_planning_workflow_e2e.py•16.9 kB

#!/usr/bin/env python3 """ End-to-End Integration Tests for Planning Workflow System (Phase 5). Tests the complete planning workflow: analyze → validate → review → (iterate) → approve Validates that all 4 tools work together seamlessly. """ import sys from pathlib import Path import asyncio import json import tempfile import shutil from typing import Dict, Any # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent)) import tool_handlers from type_defs import ValidationResultDict, PreparationSummaryDict # Module-level constants DOCS_MCP_PATH = Path(__file__).parent # Use docs-mcp itself as test project MIN_APPROVAL_SCORE = 90 # Minimum score for plan approval MAX_ITERATIONS = 5 # Maximum review loop iterations # Test Fixture: Sample Project (TEST-002) def get_sample_project_path() -> Path: """ Returns path to docs-mcp project itself for testing. Using docs-mcp as the test project is ideal because: - Has foundation docs (README, ARCHITECTURE, CLAUDE.md) - Has standards directory (coderef/standards/) - Has changelog (coderef/changelog/CHANGELOG.json) - Has sufficient complexity (~50 files) - Already available in test environment """ return DOCS_MCP_PATH # Test Fixtures: Partial Mock Plans (TEST-003) # These are minimal plans designed to trigger specific validation scores. # Scoring algorithm: 100 - (10*critical + 5*major + 1*minor) # Thresholds: PASS (>=90), PASS_WITH_WARNINGS (>=85), NEEDS_REVISION (>=70), FAIL (<70) MOCK_PLAN_GOOD: Dict[str, Any] = { "META_DOCUMENTATION": { "plan_id": "MOCK-GOOD", "plan_name": "Good Mock Plan", "status": "draft", "estimated_effort": "2 hours" }, "UNIVERSAL_PLANNING_STRUCTURE": { "0_preparation": { "foundation_docs": ["README.md"], "standards": ["coding-standards.md"], "patterns": ["auth-pattern.md"] }, "1_executive_summary": { "feature_overview": "Test feature for validation", "value_proposition": "Provides test coverage", "real_world_analogy": "Like a safety net", "primary_use_cases": ["Testing"], "success_metrics": ["Tests pass", "Coverage above 90%"] # Measurable }, "2_risk_assessment": { "risks": ["Test may fail"] }, "3_current_state_analysis": { "current_state": "No tests exist" }, "4_key_features": { "features": ["Test framework"] }, "5_task_id_system": { "prefix": "TEST", "format": "TEST-NNN" }, "6_implementation_phases": { "phase_1": { "tasks": [ { "id": "TEST-001", "description": "Short desc", # 2 words - major issue (-5) "depends_on": [] }, { "id": "TEST-002", "description": "Another short task description here today", # 6 words - minor issue (-1) "depends_on": ["TEST-001"] } ] } }, "7_testing_strategy": { "unit_tests": ["test basic functionality"], "integration_tests": ["test workflow"], "edge_cases": ["null", "empty", "invalid", "error", "boundary"] # 5 edge cases }, "8_success_criteria": { "criteria": ["Tests pass", "No bugs"] # Only 1 number in plan (90%) - major issue (-5) }, "9_implementation_checklist": { "items": ["Complete tests"] } } } # Expected score: 100 - 5 (major: short desc) - 5 (major: vague success criteria) - 1 (minor: short desc) - 1 (minor: <20 word desc) = 88 MOCK_PLAN_FLAWED: Dict[str, Any] = { "META_DOCUMENTATION": { "plan_id": "MOCK-FLAWED", "plan_name": "Flawed Mock Plan", "status": "draft", "estimated_effort": "TBD" # Placeholder - major issue (-5) }, "UNIVERSAL_PLANNING_STRUCTURE": { "0_preparation": { "foundation_docs": [] }, "1_executive_summary": { "feature_overview": "Test", "value_proposition": "Value", "real_world_analogy": "Analogy", "primary_use_cases": ["Use case"], "success_metrics": ["Success"] }, "2_risk_assessment": { "risks": ["Risk"] }, "3_current_state_analysis": { "current_state": "State" }, "4_key_features": { "features": ["Feature"] }, "5_task_id_system": { "prefix": "TST" }, "6_implementation_phases": { "phase_1": { "tasks": [ { "id": "TST-001", "description": "Short", # 1 word - major issue (-5) "depends_on": [] }, { "id": "TST-002", "description": "What should we do here?", # Question - major issue (-5) "depends_on": [] } ] } }, # Missing 7_testing_strategy - critical issue (-10) "8_success_criteria": { "criteria": ["Maybe pass"] # Ambiguous "maybe" - major issue (-5), no metrics - major issue (-5) }, "9_implementation_checklist": { "items": ["Item"] } } } # Expected score: 100 - 10 (critical: missing testing strategy) - 5*5 (5 major issues) = 75 MOCK_PLAN_FAILED: Dict[str, Any] = { "META_DOCUMENTATION": { "plan_id": "MOCK-FAILED", "plan_name": "Failed Mock Plan", "status": "TBD", # Placeholder - major issue (-5) "estimated_effort": "TBD" # Placeholder - counts as same major }, "UNIVERSAL_PLANNING_STRUCTURE": { # Missing 0_preparation - critical (-10) "1_executive_summary": { "feature_overview": "Overview" }, # Missing 2_risk_assessment - critical (-10) # Missing 3_current_state_analysis - critical (-10) "4_key_features": { "features": [] }, "5_task_id_system": { "prefix": "FAIL" }, "6_implementation_phases": { "phase_1": { "tasks": [ { "id": "FAIL-001", "description": "Do something", # 2 words - major (-5) "depends_on": ["FAIL-002"] # Circular dependency }, { "id": "FAIL-002", "description": "X", # 1 word - major (same category) "depends_on": ["FAIL-001"] # Circular dependency - critical (-10) } ] } } # Missing 7_testing_strategy - critical (-10) # Missing 8_success_criteria - critical (-10) # Missing 9_implementation_checklist - critical (-10) } } # Expected score: 100 - 7*10 (7 critical) - 5 (1 major for placeholders) = 25 # Note: Actual might be ~45 depending on how issues are counted def test_e2e_workflow_complete(): """ Test complete end-to-end workflow: analyze → validate → review → approve. This test validates the full planning workflow: 1. Analyze project to gather foundation docs and standards 2. Create a mock plan (simulating AI planning) 3. Validate the plan (score it) 4. Generate review report 5. Verify approval logic (score >= 90) This is the primary integration test for the planning workflow system. """ print("\n" + "="*70) print("TEST: E2E Workflow Complete") print("="*70) project_path = get_sample_project_path() print(f"Using test project: {project_path}") # Create temporary directory WITHIN the project for test outputs (security requirement) temp_path = project_path / '.test_output' temp_path.mkdir(exist_ok=True) try: # Step 1: Analyze project (Tool #1) print("\n[Step 1] Analyzing project...") result = asyncio.run(tool_handlers.handle_analyze_project_for_planning({ 'project_path': str(project_path) })) analysis_text = result[0].text analysis_data = json.loads(analysis_text) # analysis_data IS the preparation summary (no nesting) assert 'foundation_docs' in analysis_data assert 'coding_standards' in analysis_data assert 'technology_stack' in analysis_data print(f" [OK] Project analyzed - found {len(analysis_data['foundation_docs'].get('available', []))} foundation docs") # Step 2: Create mock plan file (simulates AI planning) print("\n[Step 2] Creating mock plan file...") plan_file = temp_path / "test-plan.json" with open(plan_file, 'w', encoding='utf-8') as f: json.dump(MOCK_PLAN_GOOD, f, indent=2) print(f" [OK] Plan file created: {plan_file}") # Step 3: Validate plan (Tool #3) print("\n[Step 3] Validating plan...") result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': str(plan_file) })) validation_text = result[0].text validation_data = json.loads(validation_text) assert 'score' in validation_data assert 'validation_result' in validation_data assert 'issues' in validation_data assert 'approved' in validation_data score = validation_data['score'] validation_result = validation_data['validation_result'] approved = validation_data['approved'] print(f" [OK] Plan validated - Score: {score}/100, Result: {validation_result}, Approved: {approved}") # Step 4: Generate review report (Tool #4) print("\n[Step 4] Generating review report...") review_output_path = temp_path / "review-report.md" result = asyncio.run(tool_handlers.handle_generate_plan_review_report({ 'project_path': str(project_path), 'plan_file_path': str(plan_file), 'output_path': str(review_output_path) })) report_response = result[0].text # Handler returns formatted text, not JSON assert '✅ Review report generated successfully!' in report_response assert f'Score: {score}/100' in report_response # Verify file was created assert review_output_path.exists() # Read report content report_content = review_output_path.read_text(encoding='utf-8') assert f'{score}/100' in report_content assert 'Implementation Plan Review Report' in report_content print(f" [OK] Review report generated: {review_output_path}") # Step 5: Verify approval logic print("\n[Step 5] Verifying approval logic...") if score >= MIN_APPROVAL_SCORE: assert approved == True assert 'PASS' in validation_result assert 'APPROVED FOR IMPLEMENTATION' in report_content print(f" [OK] Plan approved (score {score} >= {MIN_APPROVAL_SCORE})") else: assert approved == False assert 'NOT APPROVED' in report_content print(f" [OK] Plan not approved (score {score} < {MIN_APPROVAL_SCORE})") print("\n" + "="*70) print("[PASS] E2E workflow test completed successfully") print("="*70) print("\nWorkflow verified:") print(" [OK] Tool #1 (analyze_project_for_planning) - Discovered foundation docs") print(f" [OK] Tool #3 (validate_implementation_plan) - Scored plan at {score}/100") print(" [OK] Tool #4 (generate_plan_review_report) - Generated markdown report") print(f" [OK] Approval logic - Correctly {'approved' if approved else 'rejected'} plan") print(f" [OK] All 4 tools working together seamlessly") finally: # Cleanup temp directory if temp_path.exists(): shutil.rmtree(temp_path) def test_review_loop_iteration(): """ Test simulated review loop: validate → score → refine → re-validate. Simulates the procedural AI review loop workflow: 1. Start with MOCK_PLAN_FAILED (score ~45) 2. Validate and get issues 3. "Refine" to MOCK_PLAN_FLAWED (score ~75) 4. Validate again 5. "Refine" to MOCK_PLAN_GOOD (score ~88) 6. Verify score improves each iteration This tests that plans can be iteratively improved across multiple validations. """ print("\n" + "="*70) print("TEST: Review Loop Iteration") print("="*70) project_path = get_sample_project_path() # Create temporary directory WITHIN the project (security requirement) temp_path = project_path / '.test_output_iteration' temp_path.mkdir(exist_ok=True) plan_file = temp_path / "iterative-plan.json" try: # Iteration 1: Failed plan print("\n[Iteration 1] Testing failed plan...") with open(plan_file, 'w', encoding='utf-8') as f: json.dump(MOCK_PLAN_FAILED, f, indent=2) result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': str(plan_file) })) validation_data = json.loads(result[0].text) score_1 = validation_data['score'] assert score_1 < 20 # Should be FAIL (very low score) print(f" [OK] Iteration 1: Score {score_1}/100 (FAIL)") # Iteration 2: Flawed plan (improvement) print("\n[Iteration 2] Testing flawed plan...") with open(plan_file, 'w', encoding='utf-8') as f: json.dump(MOCK_PLAN_FLAWED, f, indent=2) result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': str(plan_file) })) validation_data = json.loads(result[0].text) score_2 = validation_data['score'] assert 40 <= score_2 < 70 # Should be FAIL but improved assert score_2 > score_1 # Must improve print(f" [OK] Iteration 2: Score {score_2}/100 (FAIL but improved) - improved by {score_2 - score_1} points") # Iteration 3: Good plan (near approval) print("\n[Iteration 3] Testing good plan...") with open(plan_file, 'w', encoding='utf-8') as f: json.dump(MOCK_PLAN_GOOD, f, indent=2) result = asyncio.run(tool_handlers.handle_validate_implementation_plan({ 'project_path': str(project_path), 'plan_file_path': str(plan_file) })) validation_data = json.loads(result[0].text) score_3 = validation_data['score'] assert 85 <= score_3 < 90 # Should be PASS_WITH_WARNINGS (near approval) assert score_3 > score_2 # Must improve print(f" [OK] Iteration 3: Score {score_3}/100 (PASS_WITH_WARNINGS) - improved by {score_3 - score_2} points") print("\n" + "="*70) print("[PASS] Review loop iteration test completed successfully") print("="*70) print("\nIterative improvement verified:") print(f" [OK] Iteration 1: {score_1}/100 (FAIL)") print(f" [OK] Iteration 2: {score_2}/100 (NEEDS_REVISION) +{score_2-score_1}") print(f" [OK] Iteration 3: {score_3}/100 (PASS_WITH_WARNINGS) +{score_3-score_2}") print(f" [OK] Total improvement: {score_3-score_1} points across 3 iterations") finally: # Cleanup temp directory if temp_path.exists(): shutil.rmtree(temp_path) if __name__ == "__main__": print("\n" + "="*70) print("PLANNING WORKFLOW E2E TEST SUITE (Phase 5)") print("="*70) try: # Run E2E tests test_e2e_workflow_complete() test_review_loop_iteration() print("\n" + "="*70) print("[PASS] ALL E2E TESTS PASSED") print("="*70) print("\nTest Coverage:") print(" [OK] Complete workflow (analyze -> validate -> review -> approve)") print(" [OK] Review loop iteration (failed -> flawed -> good)") print(" [OK] All 4 planning tools working together") print("\nTotal: 2 test functions, all passing") exit(0) except AssertionError as e: print(f"\n[FAIL] TEST FAILED: {e}") import traceback traceback.print_exc() exit(1) except Exception as e: print(f"\n[FAIL] UNEXPECTED ERROR: {e}") import traceback traceback.print_exc() exit(1)

Latest Blog Posts

Federated Learning with MCP: Building Privacy-Preserving Agents Across Distributed Edges
By Om-Shree-0709 on December 21, 2025.
Secure
mcp
Learning
What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/srwlli/docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server