Skip to main content
Glama
srwlli

Documentation Generator MCP Server

by srwlli
phase-3-quality-system-plan.json92.9 kB
{ "$schema": "./tool-implementation-template-schema.json", "META_DOCUMENTATION": { "plan_id": "PHASE-3", "plan_name": "Phase 3: Quality & Validation System (Tool #3: validate_implementation_plan)", "status": "planning", "created_date": "2025-10-10", "dependencies": [ "Phase 1 (Tool #1: get_planning_template) - COMPLETED (commit 45e146b)", "Phase 2 (Tool #2: analyze_project_for_planning) - COMPLETED (commit 7d6d18a)" ], "estimated_effort": "4-5 hours", "actual_effort": null, "commits": [], "context": { "meta_plan": "coderef/planning-workflow/planning-workflow-system-meta-plan.json", "current_progress": "Phases 1 & 2 complete. Foundation infrastructure (constants, TypedDicts, validation) ready. Tool #1 (get_planning_template) working. Tool #2 (analyze_project_for_planning) implemented with PlanningAnalyzer class. Ready for Tool #3 implementation." } }, "UNIVERSAL_PLANNING_STRUCTURE": { "0_preparation": { "purpose": "Phase 3 implements Tool #3: validate_implementation_plan. This tool validates implementation plans against 23 quality checklist items with 0-100 scoring. Creates PlanValidator class in generators/plan_validator.py. Enables iterative review loop where AI self-validates plans and refines until score ≥ 85. Built on Phase 1 & 2 foundation.", "foundation_documents_consulted": { "planning_template": { "file": "context/feature-implementation-planning-standard.json", "relevant_sections": [ "QUALITY_CHECKLIST_FOR_PLANS (lines 1111-1145) - Validation rules to implement", "UNIVERSAL_PLANNING_STRUCTURE - Structure to validate against", "COMMON_MISTAKES_TO_AVOID - Patterns that indicate issues" ], "key_insights": [ "Plan must have all 10 sections: META_DOCUMENTATION + sections 0-9", "Quality checklist has 3 categories: completeness (9 items), quality (8 items), autonomy (6 items) = 23 checklist items", "Common mistakes document anti-patterns to detect" ] }, "architecture": { "file": "coderef/foundation-docs/ARCHITECTURE.md", "relevant_sections": [ "Module Architecture - Generator pattern, error handling", "Error Response Factory (ARCH-001)", "Structured logging (ARCH-003)" ], "key_insights": [ "PlanValidator as standalone class (like PlanningAnalyzer in Phase 2)", "Use ErrorResponse factory for all errors", "Log validation operations for observability" ] }, "type_defs": { "file": "type_defs.py", "relevant_sections": [ "Lines 270-284: ValidationIssueDict and ValidationResultDict already defined in Phase 1", "ValidationIssueDict: severity, section, issue, suggestion", "ValidationResultDict: validation_result, score, issues, checklist_results, approved" ], "key_insights": [ "Return types already defined - just need to return valid dict", "validation_result can be: PASS, PASS_WITH_WARNINGS, NEEDS_REVISION, FAIL", "approved=true only if score ≥ 90" ] }, "phase_2_reference": { "file": "coderef/planning-workflow/phase-2-core-automation-plan.json", "relevant_sections": [ "PlanningAnalyzer as reference for class structure", "Task breakdown pattern with INFRA, SCAN, PATTERN, TOOL, TEST prefixes", "Testing approach with fixtures" ], "key_insights": [ "Follow similar structure: __init__, main validate() method, helper validators", "Use Path for file operations", "Progress logging for long operations" ] } }, "coding_standards_and_conventions": { "behavior_standards": { "file": "coderef/standards/BEHAVIOR-STANDARDS.md", "patterns_to_follow": [ "Error handling: Try-except with ErrorResponse factory (ARCH-001)", "Logging: Use structured logging with extra fields (ARCH-003)", "Validation: Validate inputs at boundaries (REF-003)", "Constants: Use enums and constants, no magic strings (REF-002)" ] }, "validation_patterns": { "file": "validation.py", "patterns": [ "validate_plan_file_path() - Use for plan file path validation", "validate_project_path_input() - Use for project path validation", "Raise ValueError with clear messages on validation failures" ] }, "project_specific": [ "Follow handler registry pattern (QUA-002)", "Use TypedDict for return types (QUA-001)", "Security: Path traversal protection on all user inputs", "Performance: Validation should complete in < 2 seconds for typical plans" ] }, "reference_components_for_implementation": { "primary_references": { "component": "PlanningAnalyzer (generators/planning_analyzer.py)", "why_similar": "Standalone class with project_path, multiple scanner methods, aggregated results", "reusable_patterns": [ "Class structure: __init__(paths) → main method → helper methods", "Error handling with try-except blocks", "Progress logging", "Result aggregation into TypedDict" ] }, "secondary_references": { "component": "AuditGenerator (generators/audit_generator.py)", "why_similar": "Validation/checking logic, scoring algorithm, issue detection", "reusable_patterns": [ "Scoring algorithm: base score - deductions", "Issue categorization by severity (critical, major, minor)", "Pattern matching with regex", "Detailed violation reporting" ] } }, "key_patterns_identified": [ "Validation Pattern: Check condition → if fail, create ValidationIssueDict → append to issues list", "Scoring Pattern: Start at 100 → deduct points based on severity (critical: -10, major: -5, minor: -1)", "Result Determination: score ≥ 90 = PASS, 85-89 = PASS_WITH_WARNINGS, 70-84 = NEEDS_REVISION, < 70 = FAIL", "Error Handling: Try-except with ErrorResponse factory, specific error types", "Logging Pattern: Log operation start, progress, completion with stats", "JSON Loading: Use json.load() with error handling for malformed JSON" ], "technology_stack_context": { "language": "Python 3.11+", "framework": "MCP (Model Context Protocol)", "key_libraries": [ "pathlib - Path manipulation and file operations", "json - JSON loading and parsing", "re - Pattern matching for validation rules", "typing - ValidationIssueDict, ValidationResultDict TypedDicts", "logging - Structured logging via logger_config" ], "testing": "Python unittest pattern (asyncio test functions)", "deployment": "MCP server via stdio transport" }, "project_structure_relevant_to_task": { "files_to_modify": [ "server.py - Add validate_implementation_plan tool definition (~45 lines)", "tool_handlers.py - Add handle_validate_implementation_plan handler (~70-80 lines)", "generators/ - NEW: plan_validator.py (~300-400 lines)" ], "files_to_reference": [ "constants.py - Use ValidationSeverity enum (already exists from Phase 1)", "type_defs.py - Use ValidationIssueDict, ValidationResultDict (already exists from Phase 1)", "validation.py - Use validate_plan_file_path, validate_project_path_input", "error_responses.py - Use ErrorResponse factory (ARCH-001)", "logger_config.py - Use logger, log_tool_call, log_error (ARCH-003)" ], "new_test_files": [ "test_validate_plan.py - Comprehensive tests for Tool #3" ] }, "dependencies_and_relationships": { "depends_on": [ "Phase 1: Foundation complete (ValidationIssueDict, ValidationResultDict, ValidationSeverity, validate_plan_file_path)", "Phase 2: PlanningAnalyzer pattern established", "context/feature-implementation-planning-standard.json - Template to validate against" ], "blocks": [ "Phase 4: Tool #4 (generate_plan_review_report) - Needs validation results", "Phase 5: End-to-end workflow testing - Needs working validator" ], "enables": [ "Automated plan quality validation with 0-100 scoring", "Iterative review loop (validate → refine → re-validate until score ≥ 85)", "AI self-review capability", "Prevention of flawed plans from reaching execution" ] }, "potential_risks_and_gaps": { "accuracy_risks": [ { "risk": "False positives (flagging valid plans as flawed)", "likelihood": "MEDIUM - Heuristic-based validation", "impact": "MEDIUM - Frustrates users, wastes time", "mitigation": "Start with conservative rules; iterate based on testing; provide clear issue messages with examples" }, { "risk": "False negatives (missing real issues)", "likelihood": "MEDIUM - Complex validation logic", "impact": "HIGH - Flawed plans reach execution", "mitigation": "Comprehensive testing with intentionally flawed plans; review against quality checklist" } ], "complexity_risks": [ { "risk": "Validation rules are complex and hard to maintain", "likelihood": "MEDIUM - 25+ checklist items to implement", "impact": "MEDIUM - Hard to update/extend", "mitigation": "Modular validator methods (one per category); clear rule documentation; test each rule independently" } ], "performance_risks": [ { "risk": "Slow validation on large plans (1000+ tasks)", "likelihood": "LOW - Validation is mostly JSON parsing and regex", "impact": "LOW - User waits a few seconds", "mitigation": "Target < 2 seconds for typical plans; log duration; optimize if needed" } ], "technical_gaps": [ { "gap": "Circular dependency detection requires graph algorithm", "impact": "Need to implement DAG validation for task dependencies", "resolution": "Simple DFS-based cycle detection; well-known algorithm" } ] } }, "1_executive_summary": { "feature_overview": "Phase 3 implements Tool #3: validate_implementation_plan. This tool validates implementation plans against the feature-implementation-planning-standard.json quality checklist (25+ items). Returns a score (0-100) and list of issues categorized by severity (critical/major/minor). Enables iterative review loop where AI validates its own plans, identifies issues, and refines until score ≥ 85 before presenting to user. Prevents flawed plans from reaching execution.", "value_proposition": "Without this tool, plans go straight to user review or execution without automated quality checks. With this tool: (1) AI self-validates plans before user sees them, (2) Objective 0-100 scoring provides clear quality metric, (3) Specific issue suggestions enable targeted refinement, (4) Review loop prevents plans with critical flaws from reaching execution, (5) Users only see plans that have passed quality gates. This automation ensures consistent, high-quality plans and reduces user review time.", "real_world_analogy": "Like automated building code inspection before construction. Instead of building inspector manually reviewing blueprints (slow, subjective), automated system checks blueprints against building codes (fast, objective). Catches structural issues (missing sections), code violations (placeholders, vague descriptions), and safety problems (circular dependencies, missing edge cases). Returns inspection report with score and specific violations to fix. Building can't start until inspection passes.", "primary_use_cases": [ { "use_case": "AI self-review during planning", "workflow": "AI generates plan draft → calls validate_implementation_plan → receives score 75 with 4 major issues → AI reads issue suggestions → AI refines plan → re-validates → score 87 → presents to user" }, { "use_case": "Quality gate before user review", "workflow": "AI completes plan → validates → score < 85 → AI continues refining (max 5 iterations) → only presents plan to user when score ≥ 85" }, { "use_case": "User-initiated validation", "workflow": "User creates plan manually → asks AI to validate → AI runs validate_implementation_plan → provides detailed feedback → user fixes issues" } ], "success_metrics": { "accuracy": "100% detection rate on test plans with known flaws (critical: missing sections, major: placeholders, minor: vague descriptions)", "performance": "Validation completes in < 2 seconds for plans with < 100 tasks, < 10 seconds for plans with < 1000 tasks", "scoring_consistency": "Same plan validated multiple times produces same score (deterministic scoring)", "false_positive_rate": "< 5% (< 1 in 20 flagged issues are not actually problems)" } }, "2_risk_assessment": { "overall_risk_level": "MEDIUM - Complex validation logic with potential for false positives/negatives", "risk_breakdown": { "technical_complexity": { "level": "MEDIUM-HIGH", "factors": [ "25+ validation rules to implement (complex logic)", "Multiple validation categories (structure, completeness, quality, autonomy)", "Circular dependency detection requires graph algorithm", "Scoring algorithm must be fair and consistent", "Issue suggestions must be specific and helpful" ], "mitigation": "Break into modular validator methods; comprehensive testing with edge cases; iterative refinement based on testing" }, "accuracy_impact": { "level": "HIGH", "factors": [ "False positives frustrate users (flagging valid plans)", "False negatives allow flawed plans through (defeats purpose)", "Subjective rules (e.g., 'clear descriptions') hard to validate objectively", "Edge cases may not be caught" ], "mitigation": "Conservative rules initially; test with diverse plans (perfect, flawed, minimal); provide clear examples in issue suggestions; iterate based on feedback" }, "performance_impact": { "level": "LOW", "factors": [ "JSON parsing and regex matching is fast", "Validation may be slow on very large plans (1000+ tasks)", "Circular dependency detection could be O(n²) in worst case" ], "mitigation": "Target < 2 seconds for typical plans; optimize graph algorithm; log duration; set reasonable limits (plans with 1000+ tasks are too large anyway)" }, "maintainability_impact": { "level": "MEDIUM", "factors": [ "Many validation rules to maintain (25+ rules)", "Rules may need updates as planning standard evolves", "Scoring weights may need tuning" ], "mitigation": "Modular design (one method per category); clear documentation of rules; comprehensive tests to prevent regression" } }, "deployment_risks": { "backwards_compatibility": "NONE - Purely additive; no existing tools affected", "rollback_plan": "Remove tool from server.py list_tools(); remove handler registration; existing system continues working", "testing_requirements": "Test with: perfect plan (score 100), flawed plan (score < 70), minimal plan (score 40), plan with circular deps, plan with missing sections" }, "dependencies_and_external_factors": { "hard_dependencies": [ "Phase 1 complete (ValidationIssueDict, ValidationResultDict, ValidationSeverity)", "Python pathlib, json, re libraries (standard library - always available)" ], "soft_dependencies": [ "Planning template (context/feature-implementation-planning-standard.json) - must exist", "Plans follow template structure - gracefully handles non-conforming plans" ], "external_factors": [ "Plan file size affects validation time (expected)", "JSON parsing speed depends on Python version (minimal impact)" ] } }, "3_current_state_analysis": { "existing_infrastructure": { "from_phase_1": [ "✅ ValidationIssueDict TypedDict - Structure for individual issues", "✅ ValidationResultDict TypedDict - Return type for validator", "✅ ValidationSeverity enum - CRITICAL, MAJOR, MINOR", "✅ validate_plan_file_path() - Path validation with security checks", "✅ validate_project_path_input() - Project path validation", "✅ ErrorResponse factory - Consistent error handling", "✅ Structured logging - logger, log_tool_call, log_error", "✅ Handler registry pattern - TOOL_HANDLERS dict for registration" ], "from_existing_patterns": [ "✅ PlanningAnalyzer pattern (Phase 2) - Standalone class structure", "✅ AuditGenerator scoring algorithm - Point deduction approach", "✅ JSON loading with error handling - Template for plan loading" ] }, "files_to_create": [ { "file": "generators/plan_validator.py", "purpose": "PlanValidator class - Plan validation and scoring", "size_estimate": "300-400 lines", "structure": "class PlanValidator with __init__, validate(), and 5 validator methods" }, { "file": "test_validate_plan.py", "purpose": "Comprehensive tests for Tool #3", "size_estimate": "200-250 lines", "structure": "10-12 test functions covering success cases, edge cases, errors" } ], "files_to_modify": [ { "file": "server.py", "section": "list_tools() function", "change": "Add validate_implementation_plan Tool definition", "lines_added": "~45 lines (1 Tool object)", "location": "After analyze_project_for_planning tool definition" }, { "file": "tool_handlers.py", "section": "Handler functions + TOOL_HANDLERS dict", "change": "Add handle_validate_implementation_plan handler + register", "lines_added": "~70-80 lines (handler function) + 1 line (registration)", "imports_added": [ "from generators.plan_validator import PlanValidator" ] } ], "integration_points": { "input_validation": "validate_project_path_input(), validate_plan_file_path() - Reuse from Phase 1", "error_handling": "ErrorResponse.invalid_input(), .not_found(), .malformed_json() - Reuse factory", "logging": "log_tool_call('validate_implementation_plan'), logger.info() - Reuse logging", "return_type": "ValidationResultDict - Already defined in type_defs.py" }, "testing_infrastructure": { "test_patterns": "Follow test_analyze_project_basic.py structure from Phase 2", "test_fixtures": [ "Create perfect_plan.json - All sections, no issues (score 100)", "Create flawed_plan.json - Missing sections, placeholders, circular deps (score 60)", "Create minimal_plan.json - Barely functional (score 40)" ], "validation_benchmarks": "Track and assert validation duration < 2s for typical plans" } }, "4_key_features": { "feature_1_structure_validation": { "description": "Validates plan has all required sections from UNIVERSAL_PLANNING_STRUCTURE (sections 0-9 + META_DOCUMENTATION). Checks each section exists and has required subsections.", "technical_approach": "Load plan JSON → check top-level keys → verify UNIVERSAL_PLANNING_STRUCTURE exists → check for sections 0-9 within it → flag missing sections as CRITICAL issues", "user_benefit": "Ensures plans are complete; prevents missing critical sections like testing or success criteria", "implementation_notes": "Simple key existence check; most straightforward validator" }, "feature_2_completeness_validation": { "description": "Validates no placeholder text (TBD, TODO, [placeholder], 'Coming soon', 'Fill this in'). Checks all task IDs are present and unique. Ensures all task dependencies reference valid task IDs.", "technical_approach": "Regex search for placeholder patterns in all text fields → check task_id uniqueness → validate depends_on references → flag placeholders as MAJOR issues, invalid dependencies as CRITICAL", "user_benefit": "Prevents plans with unfinished sections or broken dependencies from reaching execution", "implementation_notes": "Placeholder patterns: r'\\b(TBD|TODO|\\[placeholder\\]|Coming soon|Fill this in)\\b'" }, "feature_3_quality_validation": { "description": "Validates task descriptions are clear and specific (>20 words for MAJOR, <10 words = MAJOR issue, 10-20 = MINOR). Checks success criteria are measurable (contain numbers/metrics). Validates 5-10 edge case scenarios documented.", "technical_approach": "Word count analysis for task descriptions → regex for numbers/metrics in success criteria → count edge cases → score quality and flag issues", "user_benefit": "Ensures plans are detailed enough for autonomous execution; prevents vague plans", "implementation_notes": "Measurable pattern: r'\\d+|\\b(\\d+\\.\\d+|percent|%|seconds?|ms|minutes?|hours?)\\b'" }, "feature_4_autonomy_validation": { "description": "Validates zero ambiguity - no phrases like 'might', 'could', 'maybe', 'TBD'. Checks for no questions in task descriptions ('Should we...?', 'What about...?'). Validates all technical decisions documented.", "technical_approach": "Regex search for ambiguous phrases → regex for questions → flag as CRITICAL if in critical sections (exec summary, tasks), MAJOR elsewhere", "user_benefit": "Ensures plans are implementable without human clarification; AI can execute autonomously", "implementation_notes": "Ambiguous patterns: r'\\b(might|could|maybe|possibly|perhaps|unclear|TBD)\\b'" }, "feature_5_dependency_validation": { "description": "Validates no circular dependencies (A depends on B, B depends on A). Checks task dependencies form valid DAG (directed acyclic graph). Ensures all dependencies can be resolved.", "technical_approach": "Build dependency graph from task IDs and depends_on fields → DFS cycle detection → flag circular dependencies as CRITICAL → verify all depends_on IDs exist", "user_benefit": "Prevents impossible execution orders; ensures tasks can be executed sequentially", "implementation_notes": "Use DFS with recursion stack to detect cycles; well-known graph algorithm" }, "feature_6_scoring_algorithm": { "description": "Calculates 0-100 score based on issue severity. Starts at 100, deducts points: critical=-10, major=-5, minor=-1. Determines result: PASS (90-100), PASS_WITH_WARNINGS (85-89), NEEDS_REVISION (70-84), FAIL (0-69).", "technical_approach": "Aggregate all issues → count by severity → calculate_score(issues) → determine result from score → set approved=true if score ≥ 90", "user_benefit": "Objective quality metric; clear threshold for approval; comparable across plans", "implementation_notes": "Score = max(0, 100 - (critical*10 + major*5 + minor*1))" }, "feature_7_detailed_issue_reporting": { "description": "For each issue, provides: severity (critical/major/minor), section (which plan section), issue (description of problem), suggestion (how to fix). Enables targeted refinement.", "technical_approach": "Each validator creates ValidationIssueDict with all 4 fields → aggregate into issues list → return in ValidationResultDict", "user_benefit": "AI knows exactly what to fix and how; enables iterative refinement", "implementation_notes": "Suggestions should be specific with examples, not vague advice" }, "feature_8_checklist_results": { "description": "Returns checklist_results dict mapping each quality checklist item to pass/fail status. Provides detailed breakdown beyond just score.", "technical_approach": "Track each checklist item separately (e.g., 'all_sections_present': True, 'no_placeholders': False) → return dict in ValidationResultDict", "user_benefit": "User can see exactly which checklist items passed/failed; transparency in scoring", "implementation_notes": "23 checklist items from template quality checklist; each gets boolean pass/fail" } }, "5_task_id_system": { "prefix_definitions": { "INFRA": "Infrastructure setup for PlanValidator class", "VALID": "Validation method implementations", "TOOL": "MCP tool definition and handler", "TEST": "Testing and validation", "DOC": "Documentation updates" }, "task_id_format": "PREFIX-NNN (e.g., INFRA-001, VALID-002)", "dependency_notation": "depends_on: [TASK-ID, ...]", "task_relationships": "Tasks within a phase are ordered by dependencies; tests depend on implementation; documentation depends on completion" }, "6_implementation_phases": { "phase_1_infrastructure_setup": { "goal": "Create PlanValidator class structure and basic infrastructure", "duration": "1 hour", "tasks": [ { "id": "INFRA-001", "title": "Create generators/plan_validator.py file", "description": "Create new file generators/plan_validator.py with PlanValidator class skeleton", "technical_details": "class PlanValidator: with __init__(self, plan_path: Path) method. Import Path, ValidationResultDict, ValidationIssueDict, ValidationSeverity, logger. Add docstring explaining purpose.", "code_template": [ "from pathlib import Path", "from typing import List", "import json", "import re", "import time", "from type_defs import ValidationResultDict, ValidationIssueDict", "from constants import ValidationSeverity", "from logger_config import logger", "", "class PlanValidator:", " \"\"\"Validates implementation plans against quality checklist.\"\"\"", " ", " def __init__(self, plan_path: Path):", " \"\"\"Initialize validator with path to plan file.\"\"\"", " self.plan_path = plan_path", " self.plan_data = None", " self.issues: List[ValidationIssueDict] = []" ], "depends_on": [], "acceptance_criteria": [ "File exists at generators/plan_validator.py", "Class PlanValidator defined with __init__ method", "__init__ accepts plan_path and stores as self.plan_path", "File imports: Path, ValidationResultDict, ValidationIssueDict, ValidationSeverity, logger" ], "estimated_effort": "15 minutes" }, { "id": "INFRA-002", "title": "Implement validate() main method signature", "description": "Create validate() method that orchestrates all validation operations. Returns ValidationResultDict. Initially returns placeholder dict structure.", "technical_details": "def validate(self) -> ValidationResultDict: Load plan JSON → run validators → calculate score → determine result → return ValidationResultDict. Log operation start and end.", "code_template": [ "def validate(self) -> ValidationResultDict:", " \"\"\"Validate plan and return results.\"\"\"", " start_time = time.time()", " logger.info(f'Starting validation of plan: {self.plan_path}')", " ", " # Load plan JSON", " self._load_plan()", " ", " # Run validators (to be implemented)", " self.validate_structure()", " self.validate_completeness()", " self.validate_quality()", " self.validate_autonomy()", " ", " # Check for circular dependencies", " if 'UNIVERSAL_PLANNING_STRUCTURE' in self.plan_data:", " structure = self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", " if '6_implementation_phases' in structure:", " self._validate_no_circular_dependencies(structure['6_implementation_phases'])", " ", " # Calculate score and determine result", " score = self.calculate_score()", " result = self.determine_result(score)", " approved = score >= 90", " ", " # Build checklist results", " checklist_results = self._build_checklist_results()", " ", " # Track performance", " duration = time.time() - start_time", " logger.info(f'Validation complete: score={score}, result={result}, issues={len(self.issues)}, duration={duration:.2f}s')", " ", " return ValidationResultDict(", " validation_result=result,", " score=score,", " issues=self.issues,", " checklist_results=checklist_results,", " approved=approved", " )" ], "depends_on": ["INFRA-001"], "acceptance_criteria": [ "validate() method exists and returns ValidationResultDict", "All required keys present in return dict (5 keys)", "Method logs 'Starting validation' at start", "Method logs 'Validation complete' at end with score, result, issues count" ], "estimated_effort": "20 minutes" }, { "id": "INFRA-003", "title": "Add method stubs for all validator methods", "description": "Create method signatures for: validate_structure(), validate_completeness(), validate_quality(), validate_autonomy(), plus helpers: _load_plan(), calculate_score(), determine_result(), _build_checklist_results(). Each returns appropriate type.", "technical_details": "Add 8 method stubs with docstrings. Each method logs 'Running [method_name]...' and returns empty/placeholder data. This establishes the complete interface.", "code_template": [ "def _load_plan(self):", " \"\"\"Load and parse plan JSON file.\"\"\"", " try:", " with open(self.plan_path, 'r', encoding='utf-8') as f:", " self.plan_data = json.load(f)", " except json.JSONDecodeError as e:", " raise ValueError(f'Invalid JSON in plan file: {str(e)}')", " except FileNotFoundError:", " raise FileNotFoundError(f'Plan file not found: {self.plan_path}')", "", "def validate_structure(self):", " \"\"\"Validate plan has all required sections.\"\"\"", " logger.debug('Validating structure...')", " # To be implemented", "", "def validate_completeness(self):", " \"\"\"Validate no placeholders, all task IDs valid.\"\"\"", " logger.debug('Validating completeness...')", " # To be implemented", "", "def validate_quality(self):", " \"\"\"Validate task descriptions clear, success criteria measurable.\"\"\"", " logger.debug('Validating quality...')", " # To be implemented", "", "def validate_autonomy(self):", " \"\"\"Validate no ambiguity, implementable without clarification.\"\"\"", " logger.debug('Validating autonomy...')", " # To be implemented", "", "def calculate_score(self) -> int:", " \"\"\"Calculate 0-100 score based on issues.\"\"\"", " score = 100", " for issue in self.issues:", " if issue['severity'] == 'critical':", " score -= 10", " elif issue['severity'] == 'major':", " score -= 5", " elif issue['severity'] == 'minor':", " score -= 1", " return max(0, score)", "", "def determine_result(self, score: int) -> str:", " \"\"\"Determine validation result from score.\"\"\"", " if score >= 90:", " return 'PASS'", " elif score >= 85:", " return 'PASS_WITH_WARNINGS'", " elif score >= 70:", " return 'NEEDS_REVISION'", " else:", " return 'FAIL'", "", "def _build_checklist_results(self) -> dict:", " \"\"\"Build checklist results dict.\"\"\"", " # To be implemented - map each checklist item to pass/fail", " return {}" ], "depends_on": ["INFRA-002"], "acceptance_criteria": [ "All 8 helper methods exist with docstrings", "Each method has correct return type annotation", "validate() calls all 4 validation methods", "Code runs without errors (returns empty issues)", "calculate_score() implements scoring algorithm correctly", "determine_result() implements result thresholds correctly" ], "estimated_effort": "30 minutes" } ] }, "phase_2_structure_and_completeness_validation": { "goal": "Implement structure and completeness validators", "duration": "1.5 hours", "tasks": [ { "id": "VALID-001", "title": "Implement validate_structure() method", "description": "Validates plan has all required sections: META_DOCUMENTATION + UNIVERSAL_PLANNING_STRUCTURE with sections 0-9. Flags missing sections as CRITICAL issues.", "technical_details": "Check self.plan_data for keys: 'META_DOCUMENTATION', 'UNIVERSAL_PLANNING_STRUCTURE'. Within UNIVERSAL_PLANNING_STRUCTURE, check for '0_preparation', '1_executive_summary', ..., '9_implementation_checklist'. For each missing section, create ValidationIssueDict with severity='critical', section='structure', issue='Missing section X', suggestion='Add section X to plan'.", "code_template": [ "def validate_structure(self):", " \"\"\"Validate plan has all required sections.\"\"\"", " logger.debug('Validating structure...')", " ", " required_sections = [", " '0_preparation',", " '1_executive_summary',", " '2_risk_assessment',", " '3_current_state_analysis',", " '4_key_features',", " '5_task_id_system',", " '6_implementation_phases',", " '7_testing_strategy',", " '8_success_criteria',", " '9_implementation_checklist'", " ]", " ", " # Check META_DOCUMENTATION", " if 'META_DOCUMENTATION' not in self.plan_data:", " self.issues.append({", " 'severity': 'critical',", " 'section': 'structure',", " 'issue': 'Missing META_DOCUMENTATION section',", " 'suggestion': 'Add META_DOCUMENTATION section with plan_id, plan_name, status, estimated_effort'", " })", " ", " # Check UNIVERSAL_PLANNING_STRUCTURE", " if 'UNIVERSAL_PLANNING_STRUCTURE' not in self.plan_data:", " self.issues.append({", " 'severity': 'critical',", " 'section': 'structure',", " 'issue': 'Missing UNIVERSAL_PLANNING_STRUCTURE section',", " 'suggestion': 'Add UNIVERSAL_PLANNING_STRUCTURE section containing sections 0-9'", " })", " return # Can't check subsections if parent missing", " ", " # Check each required section 0-9", " structure = self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", " for section in required_sections:", " if section not in structure:", " self.issues.append({", " 'severity': 'critical',", " 'section': 'structure',", " 'issue': f'Missing section {section}',", " 'suggestion': f'Add section {section} to UNIVERSAL_PLANNING_STRUCTURE'", " })" ], "depends_on": ["INFRA-003"], "acceptance_criteria": [ "Checks for META_DOCUMENTATION presence", "Checks for UNIVERSAL_PLANNING_STRUCTURE presence", "Checks for all 10 required sections (0-9)", "Creates ValidationIssueDict for each missing section", "Issues have severity='critical', section='structure'", "Suggestions are specific and actionable" ], "estimated_effort": "30 minutes" }, { "id": "VALID-002", "title": "Implement validate_completeness() method", "description": "Validates no placeholder text (TBD, TODO, [placeholder], etc.). Checks all task IDs are present and unique. Validates all depends_on references point to valid task IDs.", "technical_details": "Regex search plan_data JSON string for placeholder patterns → flag as MAJOR. Extract all task IDs from implementation_phases → check uniqueness → flag duplicates as CRITICAL. Extract all depends_on values → verify each references existing task ID → flag invalid as CRITICAL.", "code_template": [ "def validate_completeness(self):", " \"\"\"Validate no placeholders, all task IDs valid.\"\"\"", " logger.debug('Validating completeness...')", " ", " # Check for placeholder text", " plan_json_str = json.dumps(self.plan_data)", " placeholder_pattern = r'\\b(TBD|TODO|\\[placeholder\\]|Coming soon|Fill this in|to be determined)\\b'", " matches = re.finditer(placeholder_pattern, plan_json_str, re.IGNORECASE)", " for match in matches:", " self.issues.append({", " 'severity': 'major',", " 'section': 'completeness',", " 'issue': f'Placeholder text found: \"{match.group()}\"',", " 'suggestion': 'Replace placeholder with actual content'", " })", " ", " # Validate task IDs if implementation_phases exists", " if 'UNIVERSAL_PLANNING_STRUCTURE' in self.plan_data:", " structure = self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", " if '6_implementation_phases' in structure:", " self._validate_task_ids(structure['6_implementation_phases'])", "", "def _validate_task_ids(self, phases_data):", " \"\"\"Validate task IDs are unique and dependencies are valid.\"\"\"", " task_ids = set()", " dependencies = []", " ", " # Extract all task IDs and dependencies", " for phase_key, phase in phases_data.items():", " if isinstance(phase, dict) and 'tasks' in phase:", " for task in phase['tasks']:", " if isinstance(task, dict) and 'id' in task:", " task_id = task['id']", " ", " # Check uniqueness", " if task_id in task_ids:", " self.issues.append({", " 'severity': 'critical',", " 'section': 'completeness',", " 'issue': f'Duplicate task ID: {task_id}',", " 'suggestion': 'Each task ID must be unique'", " })", " task_ids.add(task_id)", " ", " # Collect dependencies", " if 'depends_on' in task and task['depends_on']:", " for dep in task['depends_on']:", " dependencies.append((task_id, dep))", " ", " # Validate dependencies reference existing tasks", " for task_id, dep_id in dependencies:", " if dep_id not in task_ids:", " self.issues.append({", " 'severity': 'critical',", " 'section': 'completeness',", " 'issue': f'Task {task_id} depends on non-existent task {dep_id}',", " 'suggestion': f'Ensure task {dep_id} exists or remove dependency'", " })" ], "depends_on": ["VALID-001"], "acceptance_criteria": [ "Detects placeholder text: TBD, TODO, [placeholder], Coming soon, Fill this in", "Case-insensitive matching", "Creates MAJOR severity issue for each placeholder found", "Validates task IDs are unique", "Validates depends_on references point to existing tasks", "Creates CRITICAL severity issue for duplicate IDs or invalid dependencies" ], "estimated_effort": "45 minutes" }, { "id": "VALID-003", "title": "Implement validate_task_id_dependencies_no_cycles() helper", "description": "Implements circular dependency detection using DFS. Checks that task dependencies form a valid DAG (directed acyclic graph). Flags circular dependencies as CRITICAL.", "technical_details": "Build dependency graph from task IDs and depends_on → use DFS with recursion stack to detect cycles → flag circular dependencies. Well-known cycle detection algorithm.", "code_template": [ "def _validate_no_circular_dependencies(self, phases_data):", " \"\"\"Detect circular dependencies using DFS.\"\"\"", " # Build adjacency list", " graph = {}", " task_ids = set()", " ", " for phase_key, phase in phases_data.items():", " if isinstance(phase, dict) and 'tasks' in phase:", " for task in phase['tasks']:", " if isinstance(task, dict) and 'id' in task:", " task_id = task['id']", " task_ids.add(task_id)", " graph[task_id] = task.get('depends_on', [])", " ", " # DFS cycle detection", " visited = set()", " rec_stack = set()", " ", " def has_cycle(node):", " visited.add(node)", " rec_stack.add(node)", " ", " for neighbor in graph.get(node, []):", " if neighbor not in visited:", " if has_cycle(neighbor):", " return True", " elif neighbor in rec_stack:", " # Cycle detected", " self.issues.append({", " 'severity': 'critical',", " 'section': 'completeness',", " 'issue': f'Circular dependency detected involving task {node} and {neighbor}',", " 'suggestion': 'Remove circular dependency to create valid execution order'", " })", " return True", " ", " rec_stack.remove(node)", " return False", " ", " for task_id in task_ids:", " if task_id not in visited:", " has_cycle(task_id)" ], "depends_on": ["VALID-002"], "acceptance_criteria": [ "Builds dependency graph from task IDs and depends_on", "Implements DFS with recursion stack for cycle detection", "Detects circular dependencies (A→B→A, A→B→C→A)", "Creates CRITICAL severity issue for each cycle found", "Does not create false positives (valid DAGs pass)" ], "estimated_effort": "30 minutes" } ] }, "phase_3_quality_and_autonomy_validation": { "goal": "Implement quality and autonomy validators", "duration": "1.5 hours", "tasks": [ { "id": "VALID-004", "title": "Implement validate_quality() method", "description": "Validates task descriptions are clear (>20 words), success criteria are measurable (contain numbers/metrics), edge cases are documented (5-10 scenarios).", "technical_details": "Iterate through tasks → count words in description → flag if <10 (MAJOR) or 10-20 (MINOR). Check success_criteria for numbers/metrics → flag if none (MAJOR). Count edge case scenarios in testing_strategy → flag if <5 (MAJOR) or 5-10 but sparse (MINOR).", "code_template": [ "def validate_quality(self):", " \"\"\"Validate task descriptions clear, success criteria measurable.\"\"\"", " logger.debug('Validating quality...')", " ", " if 'UNIVERSAL_PLANNING_STRUCTURE' not in self.plan_data:", " return", " ", " structure = self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", " ", " # Validate task descriptions", " if '6_implementation_phases' in structure:", " self._validate_task_descriptions(structure['6_implementation_phases'])", " ", " # Validate success criteria", " if '8_success_criteria' in structure:", " self._validate_success_criteria(structure['8_success_criteria'])", " ", " # Validate edge cases", " if '7_testing_strategy' in structure:", " self._validate_edge_cases(structure['7_testing_strategy'])", "", "def _validate_task_descriptions(self, phases_data):", " \"\"\"Check task descriptions are clear and specific.\"\"\"", " for phase_key, phase in phases_data.items():", " if isinstance(phase, dict) and 'tasks' in phase:", " for task in phase['tasks']:", " if isinstance(task, dict) and 'description' in task:", " desc = task['description']", " word_count = len(desc.split())", " task_id = task.get('id', 'unknown')", " ", " if word_count < 10:", " self.issues.append({", " 'severity': 'major',", " 'section': 'quality',", " 'issue': f'Task {task_id} description too short ({word_count} words)',", " 'suggestion': 'Expand description to at least 10 words with specific details'", " })", " elif word_count < 20:", " self.issues.append({", " 'severity': 'minor',", " 'section': 'quality',", " 'issue': f'Task {task_id} description could be more detailed ({word_count} words)',", " 'suggestion': 'Consider expanding description to 20+ words for clarity'", " })", "", "def _validate_success_criteria(self, criteria_data):", " \"\"\"Check success criteria are measurable.\"\"\"", " criteria_json = json.dumps(criteria_data)", " # Look for numbers, percentages, time units", " measurable_pattern = r'\\d+|\\b(\\d+\\.\\d+|percent|%|seconds?|ms|minutes?|hours?|>=|<=|>|<)\\b'", " matches = re.findall(measurable_pattern, criteria_json, re.IGNORECASE)", " ", " if len(matches) < 3: # Should have several measurable criteria", " self.issues.append({", " 'severity': 'major',", " 'section': 'quality',", " 'issue': 'Success criteria lack measurable metrics',", " 'suggestion': 'Add specific metrics (numbers, percentages, thresholds) to success criteria'", " })", "", "def _validate_edge_cases(self, testing_data):", " \"\"\"Check edge cases are documented.\"\"\"", " edge_case_json = json.dumps(testing_data)", " # Look for edge case mentions", " edge_case_pattern = r'edge.?case|scenario|boundary|invalid|empty|null|error|exception'", " matches = re.findall(edge_case_pattern, edge_case_json, re.IGNORECASE)", " ", " if len(matches) < 5:", " self.issues.append({", " 'severity': 'major',", " 'section': 'quality',", " 'issue': f'Insufficient edge case coverage (found {len(matches)} mentions, need 5+)',", " 'suggestion': 'Document at least 5-10 edge case scenarios in testing strategy'", " })" ], "depends_on": ["VALID-003"], "acceptance_criteria": [ "Validates task description word count", "Flags <10 words as MAJOR, 10-20 as MINOR", "Validates success criteria contain measurable metrics", "Flags lack of metrics (numbers, %, time units) as MAJOR", "Validates edge case coverage", "Flags <5 edge case mentions as MAJOR" ], "estimated_effort": "45 minutes" }, { "id": "VALID-005", "title": "Implement validate_autonomy() method", "description": "Validates no ambiguous phrases (might, could, maybe, TBD). Checks for no questions in task descriptions. Validates all technical decisions documented.", "technical_details": "Regex search for ambiguous phrases → flag as CRITICAL in exec summary/tasks, MAJOR elsewhere. Regex for questions ('Should we...?', 'What about...?') → flag as MAJOR. Check for decision documentation keywords.", "code_template": [ "def validate_autonomy(self):", " \"\"\"Validate no ambiguity, implementable without clarification.\"\"\"", " logger.debug('Validating autonomy...')", " ", " plan_json_str = json.dumps(self.plan_data)", " ", " # Check for ambiguous phrases", " ambiguous_pattern = r'\\b(might|could|maybe|possibly|perhaps|unclear|TBD|to be determined|needs clarification)\\b'", " matches = list(re.finditer(ambiguous_pattern, plan_json_str, re.IGNORECASE))", " ", " for match in matches[:5]: # Limit to first 5 to avoid spam", " self.issues.append({", " 'severity': 'major', # Could be CRITICAL in exec summary/tasks", " 'section': 'autonomy',", " 'issue': f'Ambiguous phrase found: \"{match.group()}\"',", " 'suggestion': 'Replace with definitive language - make clear decisions'", " })", " ", " # Check for questions", " question_pattern = r'(Should we|What about|What if|How do we|Which|\\?)'", " matches = list(re.finditer(question_pattern, plan_json_str, re.IGNORECASE))", " ", " for match in matches[:5]: # Limit to first 5", " self.issues.append({", " 'severity': 'major',", " 'section': 'autonomy',", " 'issue': f'Question found in plan: \"{match.group()}\"',", " 'suggestion': 'Answer the question in the plan - no unresolved questions'", " })" ], "depends_on": ["VALID-004"], "acceptance_criteria": [ "Detects ambiguous phrases: might, could, maybe, possibly, perhaps, unclear, TBD", "Case-insensitive matching", "Creates MAJOR severity issue for ambiguous phrases", "Detects questions: Should we, What about, What if, ?", "Creates MAJOR severity issue for questions", "Limits issue spam (max 5 per category)" ], "estimated_effort": "30 minutes" } ] }, "phase_4_scoring_and_checklist": { "goal": "Finalize scoring and checklist results", "duration": "45 minutes", "tasks": [ { "id": "VALID-006", "title": "Implement _build_checklist_results() method", "description": "Builds checklist_results dict mapping each quality checklist item to pass/fail. Provides transparency in scoring beyond just numeric score.", "technical_details": "For each checklist item (23 total from template), determine pass/fail based on validation results → return dict with item names as keys, boolean pass/fail as values.", "code_template": [ "def _build_checklist_results(self) -> dict:", " \"\"\"Build checklist results dict mapping all 23 checklist items to pass/fail.\"\"\"", " results = {}", " ", " # Completeness checklist items (9 items)", " results['executive_summary_complete'] = self._check_executive_summary_complete()", " results['risk_assessment_present'] = self._check_section_present('2_risk_assessment')", " results['current_state_documented'] = self._check_section_present('3_current_state_analysis')", " results['key_features_defined'] = self._check_section_present('4_key_features')", " results['all_tasks_have_ids'] = not self._has_issues_matching('task.*without.*id', ignore_case=True)", " results['phases_defined'] = self._check_section_present('6_implementation_phases')", " results['testing_strategy_present'] = self._check_section_present('7_testing_strategy')", " results['success_criteria_defined'] = self._check_section_present('8_success_criteria')", " results['implementation_checklist_present'] = self._check_section_present('9_implementation_checklist')", " ", " # Quality checklist items (8 items)", " results['no_placeholder_text'] = not self._has_issues_matching('Placeholder text found')", " results['task_descriptions_imperative'] = not self._has_issues_matching('description.*not.*imperative', ignore_case=True)", " results['success_criteria_measurable'] = not self._has_issues_matching('Success criteria lack')", " results['edge_cases_comprehensive'] = not self._has_issues_matching('Insufficient edge case')", " results['effort_estimates_realistic'] = not self._has_issues_matching('effort.*unrealistic', ignore_case=True)", " results['dependencies_valid'] = not self._has_issues_matching('depends on non-existent')", " results['security_addressed'] = not self._has_issues_matching('security.*not.*addressed', ignore_case=True)", " results['performance_targets_specified'] = not self._has_issues_matching('performance.*not.*specified', ignore_case=True)", " ", " # Autonomy checklist items (6 items)", " results['no_ambiguous_phrases'] = not self._has_issues_matching('Ambiguous phrase')", " results['no_questions'] = not self._has_issues_matching('Question found')", " results['edge_case_behavior_defined'] = not self._has_issues_matching('edge case.*undefined', ignore_case=True)", " results['acceptance_criteria_clear'] = not self._has_issues_matching('acceptance.*unclear', ignore_case=True)", " results['review_gates_specified'] = not self._has_issues_matching('review.*gate.*missing', ignore_case=True)", " results['technical_decisions_documented'] = not self._has_issues_matching('decision.*not.*documented', ignore_case=True)", " ", " # Additional validation", " results['no_circular_dependencies'] = not self._has_issues_matching('Circular dependency')", " ", " return results", "", "def _check_executive_summary_complete(self) -> bool:", " \"\"\"Check if executive summary has all required fields.\"\"\"", " if 'UNIVERSAL_PLANNING_STRUCTURE' not in self.plan_data:", " return False", " structure = self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", " if '1_executive_summary' not in structure:", " return False", " summary = structure['1_executive_summary']", " required_fields = ['feature_overview', 'value_proposition', 'real_world_analogy', 'primary_use_cases', 'success_metrics']", " return all(field in summary for field in required_fields)", "", "def _check_section_present(self, section_name: str) -> bool:", " \"\"\"Check if a specific section is present.\"\"\"", " if 'UNIVERSAL_PLANNING_STRUCTURE' not in self.plan_data:", " return False", " return section_name in self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", "", "def _all_sections_present(self) -> bool:", " \"\"\"Check if all required sections are present.\"\"\"", " if 'UNIVERSAL_PLANNING_STRUCTURE' not in self.plan_data:", " return False", " structure = self.plan_data['UNIVERSAL_PLANNING_STRUCTURE']", " required = ['0_preparation', '1_executive_summary', '2_risk_assessment',", " '3_current_state_analysis', '4_key_features', '5_task_id_system',", " '6_implementation_phases', '7_testing_strategy', '8_success_criteria',", " '9_implementation_checklist']", " return all(section in structure for section in required)", "", "def _has_issues_in_section(self, section: str) -> bool:", " \"\"\"Check if any issues in given section.\"\"\"", " return any(issue['section'] == section for issue in self.issues)", "", "def _has_critical_in_section(self, section: str) -> bool:", " \"\"\"Check if any critical issues in given section.\"\"\"", " return any(issue['section'] == section and issue['severity'] == 'critical' ", " for issue in self.issues)", "", "def _has_issues_matching(self, pattern: str, ignore_case: bool = False) -> bool:", " \"\"\"Check if any issues match pattern.\"\"\"", " import re", " flags = re.IGNORECASE if ignore_case else 0", " regex = re.compile(pattern, flags)", " return any(regex.search(issue['issue']) for issue in self.issues)" ], "depends_on": ["VALID-005"], "acceptance_criteria": [ "Returns dict with checklist item names as keys", "Each value is boolean (True = pass, False = fail)", "Maps all 24 checklist items (23 from template + circular dependencies)", "Completeness items: 9 checks (executive_summary_complete, risk_assessment_present, etc.)", "Quality items: 8 checks (no_placeholder_text, task_descriptions_imperative, etc.)", "Autonomy items: 6 checks (no_ambiguous_phrases, no_questions, etc.)", "Additional: 1 check (no_circular_dependencies)", "Uses validation results to determine pass/fail", "Helper methods work correctly (_check_executive_summary_complete, _check_section_present, etc.)" ], "estimated_effort": "30 minutes" }, { "id": "VALID-007", "title": "Test scoring algorithm with edge cases", "description": "Verify scoring algorithm handles edge cases: no issues (score 100), many critical issues (score 0), mixed issues (calculate correctly).", "technical_details": "Create test cases with known issue counts → verify calculate_score() returns expected values → verify determine_result() returns correct result strings.", "code_template": [ "# Test in test_validate_plan.py:", "def test_calculate_score_perfect():", " validator = PlanValidator(Path('perfect_plan.json'))", " validator.issues = [] # No issues", " assert validator.calculate_score() == 100", "", "def test_calculate_score_critical_issues():", " validator = PlanValidator(Path('test.json'))", " validator.issues = [", " {'severity': 'critical', 'section': 'test', 'issue': 'test', 'suggestion': 'test'},", " {'severity': 'critical', 'section': 'test', 'issue': 'test', 'suggestion': 'test'},", " {'severity': 'critical', 'section': 'test', 'issue': 'test', 'suggestion': 'test'}", " ]", " assert validator.calculate_score() == 70 # 100 - (3 * 10)", "", "def test_calculate_score_mixed_issues():", " validator = PlanValidator(Path('test.json'))", " validator.issues = [", " {'severity': 'critical', 'section': 'test', 'issue': 'test', 'suggestion': 'test'},", " {'severity': 'major', 'section': 'test', 'issue': 'test', 'suggestion': 'test'},", " {'severity': 'major', 'section': 'test', 'issue': 'test', 'suggestion': 'test'},", " {'severity': 'minor', 'section': 'test', 'issue': 'test', 'suggestion': 'test'}", " ]", " assert validator.calculate_score() == 79 # 100 - 10 - 5 - 5 - 1", "", "def test_determine_result_thresholds():", " validator = PlanValidator(Path('test.json'))", " assert validator.determine_result(95) == 'PASS'", " assert validator.determine_result(87) == 'PASS_WITH_WARNINGS'", " assert validator.determine_result(75) == 'NEEDS_REVISION'", " assert validator.determine_result(65) == 'FAIL'", "", "def test_calculate_score_excessive_issues():", " \"\"\"Test that score is clamped to 0, not negative.\"\"\"", " validator = PlanValidator(Path('test.json'))", " validator.issues = [", " {'severity': 'critical', 'section': 'test', 'issue': 'test', 'suggestion': 'test'}", " for _ in range(15) # 15 critical = -150 points", " ]", " score = validator.calculate_score()", " assert score == 0 # Should be clamped to 0, not -50" ], "depends_on": ["VALID-006"], "acceptance_criteria": [ "Scoring algorithm produces correct results for edge cases", "Score of 100 with 0 issues", "Score of 70 with 3 critical issues", "Score of 79 with mixed issues (1 critical, 2 major, 1 minor)", "Score clamped to 0 (not negative) with excessive issues (15 critical = -150 points)", "Result determination thresholds correct (90+, 85-89, 70-84, <70)" ], "estimated_effort": "15 minutes" } ] }, "phase_5_mcp_tool_integration": { "goal": "Add MCP tool definition and handler", "duration": "1 hour", "tasks": [ { "id": "TOOL-001", "title": "Add validate_implementation_plan tool definition in server.py", "description": "Add Tool object to list_tools() for validate_implementation_plan. Define input schema (project_path, plan_file_path required). Add comprehensive description.", "technical_details": "Add Tool definition after analyze_project_for_planning. Input schema: project_path (string), plan_file_path (string). Both required. Description explains validation, scoring, and review loop purpose.", "code_template": [ "Tool(", " name='validate_implementation_plan',", " description='Validates implementation plan JSON against feature-implementation-planning-standard.json quality checklist. Scores plan 0-100 based on completeness, quality, and autonomy. Identifies issues by severity (critical/major/minor) with specific fix suggestions. Enables iterative review loop - AI validates plan, refines based on feedback, re-validates until score >= 85 before presenting to user.',", " inputSchema={", " 'type': 'object',", " 'properties': {", " 'project_path': {", " 'type': 'string',", " 'description': 'Absolute path to project directory containing the plan file'", " },", " 'plan_file_path': {", " 'type': 'string',", " 'description': 'Relative path to plan JSON file within project (e.g., feature-auth-plan.json)'", " }", " },", " 'required': ['project_path', 'plan_file_path']", " }", ")" ], "depends_on": ["VALID-007"], "acceptance_criteria": [ "Tool definition added to server.py list_tools()", "Input schema requires project_path and plan_file_path (both strings)", "Description clearly explains validation purpose and review loop", "Tool appears in MCP tool list when server starts" ], "estimated_effort": "15 minutes" }, { "id": "TOOL-002", "title": "Implement handle_validate_implementation_plan in tool_handlers.py", "description": "Create handler function following standard pattern: validate inputs, create PlanValidator instance, call validate(), return JSON response. Handle errors (ValueError, FileNotFoundError, json.JSONDecodeError, Exception).", "technical_details": "async def handle_validate_implementation_plan(arguments: dict) -> list[TextContent]: Validate inputs → build plan path → create PlanValidator → call validate() → return ValidationResultDict as JSON. Catch errors and use ErrorResponse factory.", "code_template": [ "async def handle_validate_implementation_plan(arguments: dict) -> list[TextContent]:", " \"\"\"Handle validate_implementation_plan tool call.\"\"\"", " try:", " # Log invocation", " log_tool_call('validate_implementation_plan', args_keys=list(arguments.keys()))", " ", " # Validate inputs", " project_path_str = arguments.get('project_path', '')", " plan_file_str = arguments.get('plan_file_path', '')", " ", " project_path = Path(validate_project_path_input(project_path_str)).resolve()", " plan_path = validate_plan_file_path(project_path, plan_file_str)", " ", " logger.info(f'Validating plan: {plan_path}')", " ", " # Check plan file exists", " if not plan_path.exists():", " return ErrorResponse.not_found(", " f'Plan file not found: {plan_file_str}',", " 'Ensure plan file exists in project directory'", " )", " ", " # Create validator and validate", " validator = PlanValidator(plan_path)", " result = validator.validate()", " ", " logger.info(", " f'Validation complete: score={result[\"score\"]}, result={result[\"validation_result\"]}, issues={len(result[\"issues\"])}',", " extra={'score': result['score'], 'result': result['validation_result']}", " )", " ", " # Return result as JSON", " return [TextContent(type='text', text=json.dumps(result, indent=2))]", " ", " except ValueError as e:", " log_error('validate_plan_validation_error', str(e))", " return ErrorResponse.invalid_input(", " str(e),", " 'Check project_path and plan_file_path values'", " )", " except json.JSONDecodeError as e:", " log_error('validate_plan_json_error', str(e))", " return ErrorResponse.malformed_json(", " f'Plan file has invalid JSON: {str(e)}'", " )", " except FileNotFoundError as e:", " log_error('validate_plan_file_not_found', str(e))", " return ErrorResponse.not_found(", " str(e),", " 'Ensure plan file exists'", " )", " except Exception as e:", " log_error('validate_plan_error', str(e))", " return ErrorResponse.generic_error(", " f'Failed to validate plan: {str(e)}'", " )" ], "depends_on": ["TOOL-001"], "acceptance_criteria": [ "Handler validates inputs using validate_project_path_input, validate_plan_file_path", "Creates PlanValidator instance with plan_path", "Calls validator.validate() and gets ValidationResultDict", "Returns JSON-formatted TextContent", "Handles all error types with appropriate ErrorResponse", "Logs tool invocation, validation results, and errors" ], "estimated_effort": "30 minutes" }, { "id": "TOOL-003", "title": "Register handler in TOOL_HANDLERS dict", "description": "Add 'validate_implementation_plan': handle_validate_implementation_plan to TOOL_HANDLERS dict in tool_handlers.py", "technical_details": "Add registration after analyze_project_for_planning handler registration", "code_template": [ "TOOL_HANDLERS = {", " # ... existing handlers ...", " 'analyze_project_for_planning': handle_analyze_project_for_planning,", " 'validate_implementation_plan': handle_validate_implementation_plan,", "}" ], "depends_on": ["TOOL-002"], "acceptance_criteria": [ "Handler registered in TOOL_HANDLERS dict", "Key matches tool name exactly", "Tool can be invoked via MCP" ], "estimated_effort": "5 minutes" }, { "id": "TOOL-004", "title": "Add required imports to tool_handlers.py", "description": "Add import for PlanValidator in tool_handlers.py", "technical_details": "Add 'from generators.plan_validator import PlanValidator' to imports section", "code_template": [ "from generators.plan_validator import PlanValidator" ], "depends_on": ["TOOL-003"], "acceptance_criteria": [ "Import added to tool_handlers.py", "No import errors when server starts" ], "estimated_effort": "2 minutes" } ] }, "phase_6_comprehensive_testing": { "goal": "Create comprehensive test suite for Tool #3", "duration": "1 hour", "tasks": [ { "id": "TEST-001", "title": "Create test_validate_plan.py with test fixtures", "description": "Create test file with async test functions. Create test fixtures: perfect_plan.json (score 100), flawed_plan.json (score 60), minimal_plan.json (score 40) with specific characteristics.", "technical_details": "Create test file following test_analyze_project_basic.py pattern. Create test_fixtures/ directory with 3 plan JSON files representing different quality levels.", "test_fixture_specifications": { "perfect_plan.json": { "target_score": 100, "characteristics": [ "All 11 sections present (META_DOCUMENTATION + 0-9)", "No placeholders (no TBD, TODO, [placeholder])", "All task descriptions >20 words", "5+ edge case scenarios documented", "Measurable success criteria with specific metrics (numbers, percentages)", "No ambiguous phrases (no 'might', 'could', 'maybe')", "No questions in descriptions", "No circular dependencies", "All task IDs unique following PREFIX-NNN format", "All depends_on references valid" ] }, "flawed_plan.json": { "target_score": "55-65 (multiple major/critical issues)", "characteristics": [ "Missing section 5 (5_task_id_system) - CRITICAL issue", "Contains 'TBD' placeholder text in 2 locations - MAJOR issues", "2 tasks with <10 word descriptions - MAJOR issues", "Only 2 edge cases documented (need 5+) - MAJOR issue", "Success criteria lack metrics (no numbers) - MAJOR issue", "Contains ambiguous phrases: 'might' and 'maybe' in 2 places - MAJOR issues", "One circular dependency: SETUP-001→SETUP-002→SETUP-001 - CRITICAL issue", "All other sections present to avoid too many critical issues" ] }, "minimal_plan.json": { "target_score": "35-45 (barely functional)", "characteristics": [ "Only META_DOCUMENTATION + sections 0, 1, and 6 present", "Missing sections 2-5, 7-9 - 7 CRITICAL issues", "Multiple placeholders: 5+ instances of TBD, TODO - MAJOR issues", "All task descriptions <10 words - MAJOR issues", "No edge cases documented - MAJOR issue", "Generic success criteria with no metrics - MAJOR issue", "Many questions like 'Should we...?' - MAJOR issues", "Ambiguous language throughout" ] } }, "depends_on": ["TOOL-004"], "acceptance_criteria": [ "test_validate_plan.py file exists", "Imports tool_handlers and asyncio", "3 test fixture files created (perfect, flawed, minimal)", "perfect_plan.json scores 100 with all sections, no issues", "flawed_plan.json scores 55-65 with specific intentional flaws", "minimal_plan.json scores 35-45 with many missing sections", "Each fixture matches specification characteristics exactly" ], "estimated_effort": "30 minutes" }, { "id": "TEST-002", "title": "Test perfect plan validation", "description": "Test that a perfect plan (all sections, no issues) scores 100 with validation_result='PASS', issues=[], approved=true.", "technical_details": "Create perfect_plan.json with all sections, no placeholders, clear descriptions. Call tool. Assert score=100, result=PASS, issues=[], approved=true.", "depends_on": ["TEST-001"], "acceptance_criteria": [ "Test calls handle_validate_implementation_plan with perfect plan", "Verifies score=100", "Verifies validation_result='PASS'", "Verifies issues=[] (empty list)", "Verifies approved=true", "Test passes" ], "estimated_effort": "10 minutes" }, { "id": "TEST-003", "title": "Test flawed plan validation", "description": "Test that a flawed plan (missing sections, placeholders, vague descriptions) scores low with specific issues identified.", "technical_details": "Create flawed_plan.json with missing section 5, placeholder text 'TBD', short descriptions. Call tool. Assert score < 70, result=FAIL or NEEDS_REVISION, issues contains specific problems.", "depends_on": ["TEST-002"], "acceptance_criteria": [ "Test verifies low score (< 70)", "Verifies validation_result='FAIL' or 'NEEDS_REVISION'", "Verifies issues list is non-empty", "Verifies issues contain: missing section, placeholder text, description quality", "Test passes" ], "estimated_effort": "15 minutes" }, { "id": "TEST-004", "title": "Test circular dependency detection", "description": "Test that circular dependencies are detected and flagged as CRITICAL. Create plan with A→B→A dependency cycle.", "technical_details": "Create plan with tasks: SETUP-001 depends_on SETUP-002, SETUP-002 depends_on SETUP-001 (circular). Call tool. Assert issues contains circular dependency error with severity='critical'.", "depends_on": ["TEST-003"], "acceptance_criteria": [ "Test creates plan with circular dependency", "Verifies circular dependency issue is detected", "Verifies severity='critical'", "Verifies issue message mentions both task IDs", "Test passes" ], "estimated_effort": "15 minutes" } ] } }, "7_testing_strategy": { "unit_testing": { "approach": "Test each validator method independently with controlled inputs", "test_cases": [ "validate_structure: Plan with all sections present → no issues", "validate_structure: Plan missing section 5 → critical issue", "validate_completeness: Plan with no placeholders → no issues", "validate_completeness: Plan with 'TBD' text → major issue", "validate_completeness: Task IDs all unique → no issues", "validate_completeness: Duplicate task ID → critical issue", "validate_completeness: Circular dependency A→B→A → critical issue", "validate_quality: Task descriptions >20 words → no issues", "validate_quality: Task description <10 words → major issue", "validate_autonomy: No ambiguous phrases → no issues", "validate_autonomy: Contains 'might' → major issue", "calculate_score: 0 issues → score 100", "calculate_score: 3 critical issues → score 70", "calculate_score: Mixed issues (1 critical, 2 major, 1 minor) → score 79", "determine_result: score 95 → PASS", "determine_result: score 87 → PASS_WITH_WARNINGS", "determine_result: score 75 → NEEDS_REVISION", "determine_result: score 65 → FAIL" ], "assertions": [ "Return types match ValidationResultDict structure", "Scoring algorithm is consistent (same plan = same score)", "Issue messages are specific and helpful", "Suggestions are actionable", "No false positives on perfect plans" ] }, "integration_testing": { "approach": "Test complete validate() workflow with realistic plan fixtures", "test_cases": [ "Validate perfect plan (all sections, no issues) → score 100, PASS", "Validate flawed plan (missing sections, placeholders) → score 60, FAIL", "Validate minimal plan (barely functional) → score 40, FAIL", "Validate plan with circular dependencies → critical issue, low score", "Validate plan with missing sections → critical issues" ], "assertions": [ "All 5 keys present in ValidationResultDict", "Results are coherent (low score = many issues)", "Issues identified are accurate", "Performance < 2 seconds for typical plans" ] }, "error_handling_testing": { "test_cases": [ "Invalid project_path (relative path) → ErrorResponse.invalid_input", "Plan file not found → ErrorResponse.not_found", "Malformed JSON in plan file → ErrorResponse.malformed_json", "Path traversal attempt (../../../etc/passwd) → ErrorResponse.invalid_input" ], "assertions": [ "All errors use ErrorResponse factory", "Errors are logged with log_error()", "Validation doesn't crash on errors", "Useful error messages returned to user" ] }, "edge_case_testing": { "test_cases": [ "Empty plan file (0 bytes) → malformed JSON error", "Plan with only META_DOCUMENTATION (no UNIVERSAL_PLANNING_STRUCTURE) → critical issue", "Plan with 1000+ tasks → validates in < 10 seconds", "Plan with very long task descriptions (500+ words) → no issues", "Plan with unicode characters in descriptions → no issues", "Plan with nested circular dependencies (A→B→C→A) → critical issue" ] }, "performance_testing": { "test_cases": [ "Small plan (< 10 tasks) → < 0.5 seconds", "Medium plan (10-100 tasks) → < 2 seconds", "Large plan (100-1000 tasks) → < 10 seconds" ], "metrics": [ "Total validation duration", "Duration per validator method", "Issues processed per second" ], "performance_targets": { "small_plan": "< 0.5 seconds", "medium_plan": "< 2 seconds", "large_plan": "< 10 seconds" } } }, "8_success_criteria": { "functional_requirements": [ { "criterion": "Tool validates Phase 1 plan correctly", "validation": "Run validate_implementation_plan on Phase 1 plan; verify score 95+, no false issues", "priority": "CRITICAL" }, { "criterion": "Tool catches all intentional flaws in test plans", "validation": "Create test plan with 10 known flaws; verify all 10 detected", "priority": "CRITICAL" }, { "criterion": "Circular dependency detection works", "validation": "Test with A→B→A, A→B→C→A; verify both detected", "priority": "HIGH" }, { "criterion": "Scoring algorithm is consistent", "validation": "Validate same plan 10 times; verify score is identical each time", "priority": "HIGH" }, { "criterion": "Tool handles errors gracefully", "validation": "Test with invalid paths, malformed JSON; verify no crashes, helpful error messages", "priority": "HIGH" } ], "quality_requirements": [ { "criterion": "Code follows all architecture patterns", "validation": "Code review: ARCH-001 (ErrorResponse), QUA-001 (TypedDict), QUA-002 (handler registry), REF-002 (constants), REF-003 (validation), ARCH-003 (logging)", "priority": "CRITICAL" }, { "criterion": "All error paths use ErrorResponse factory", "validation": "Verify 4 error types handled: ValueError, FileNotFoundError, json.JSONDecodeError, Exception", "priority": "CRITICAL" }, { "criterion": "Returns valid ValidationResultDict", "validation": "Call tool; verify return type has all 5 required keys: validation_result, score, issues, checklist_results, approved", "priority": "CRITICAL" }, { "criterion": "Issue suggestions are actionable", "validation": "Review 10 sample issues; verify each has specific suggestion with examples", "priority": "HIGH" } ], "performance_requirements": [ { "criterion": "Validation completes in < 2s for typical plans", "validation": "Test with 50-task plan; measure duration; assert < 2 seconds", "priority": "HIGH" }, { "criterion": "Validation completes in < 10s for large plans", "validation": "Test with 500-task plan; measure duration; assert < 10 seconds", "priority": "MEDIUM" } ], "security_requirements": [ { "criterion": "Path traversal prevention", "validation": "Test with ../../../etc/passwd; verify blocked by validation", "priority": "CRITICAL" }, { "criterion": "Plan path validation", "validation": "Test with relative paths, non-existent paths; verify ErrorResponse.invalid_input", "priority": "CRITICAL" } ] }, "9_implementation_checklist": { "pre_implementation": [ "☐ Review Phase 3 plan for completeness", "☐ Get user approval on Phase 3 approach" ], "infrastructure_setup": [ "☐ INFRA-001: Create generators/plan_validator.py with PlanValidator class", "☐ INFRA-002: Implement validate() main method signature", "☐ INFRA-003: Add method stubs for all validator methods" ], "structure_and_completeness": [ "☐ VALID-001: Implement validate_structure() method", "☐ VALID-002: Implement validate_completeness() method", "☐ VALID-003: Implement circular dependency detection" ], "quality_and_autonomy": [ "☐ VALID-004: Implement validate_quality() method", "☐ VALID-005: Implement validate_autonomy() method" ], "scoring_and_checklist": [ "☐ VALID-006: Implement _build_checklist_results() method", "☐ VALID-007: Test scoring algorithm with edge cases" ], "mcp_integration": [ "☐ TOOL-001: Add validate_implementation_plan tool definition in server.py", "☐ TOOL-002: Implement handle_validate_implementation_plan handler", "☐ TOOL-003: Register handler in TOOL_HANDLERS dict", "☐ TOOL-004: Add required imports to tool_handlers.py" ], "testing": [ "☐ TEST-001: Create test_validate_plan.py with fixtures", "☐ TEST-002: Test perfect plan validation", "☐ TEST-003: Test flawed plan validation", "☐ TEST-004: Test circular dependency detection" ], "validation": [ "☐ Run all tests and verify 100% pass", "☐ Test on Phase 1 plan - verify score 95+", "☐ Test on flawed plan - verify issues detected", "☐ Test circular dependencies - verify detection", "☐ Performance test: verify < 2s for typical plans", "☐ Security test: verify path traversal blocked", "☐ Error handling test: verify all error types handled" ], "finalization": [ "☐ Code review for architecture compliance", "☐ Commit Phase 3 implementation", "☐ Update meta plan: mark Phase 3 tasks complete", "☐ Proceed to Phase 4 (Tool #4: generate_plan_review_report)" ] } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/srwlli/docs-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server