executor.py•69.6 kB
# /src/executor.py
import json
import logging
import time
import os
from patchright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError, Error as PlaywrightError, expect
from typing import Optional, Dict, Any, Tuple, List
from pydantic import BaseModel, Field
import re
from PIL import Image
from pixelmatch.contrib.PIL import pixelmatch
import io
from ..browser.browser_controller import BrowserController # Re-use for browser setup/teardown
from ..llm.llm_client import LLMClient
from ..agents.recorder_agent import WebAgent
from ..utils.image_utils import compare_images
# Define a short timeout specifically for selector validation during healing
HEALING_SELECTOR_VALIDATION_TIMEOUT_MS = 2000
class HealingSelectorSuggestion(BaseModel):
"""Schema for the LLM's suggested replacement selector during healing."""
new_selector: Optional[str] = Field(None, description="The best suggested alternative CSS selector based on visual and DOM context, or null if no suitable alternative is found.")
reasoning: str = Field(..., description="Explanation for the suggested selector choice or the reason why healing could not determine a better selector.")
logger = logging.getLogger(__name__)
class TestExecutor:
"""
Executes a recorded test case from a JSON file deterministically using Playwright.
"""
def __init__(self,
llm_client: Optional[LLMClient],
headless: bool = True,
default_timeout: int = 5000, # Default timeout for actions/assertions
enable_healing: bool = False, # Flag for healing
healing_mode: str = 'soft', # Healing mode ('soft' or 'hard')
healing_retries: int = 1, # Max soft healing attempts per step
baseline_dir: str = "./visual_baselines", # Add baseline dir
pixel_threshold: float = 0.01, # Default 1% pixel difference threshold
get_performance: bool = False,
get_network_requests: bool = False
):
self.headless = headless
self.default_timeout = default_timeout # Milliseconds
self.llm_client = llm_client
self.browser_controller: Optional[BrowserController] = None
self.page: Optional[Page] = None
self.enable_healing = enable_healing
self.healing_mode = healing_mode
self.healing_retries_per_step = healing_retries
self.healing_attempts_log: List[Dict] = [] # To store healing attempts info
self.get_performance = get_performance
self.get_network_requests = get_network_requests
logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms).")
log_message = ""
if self.enable_healing:
log_message += f" with Healing ENABLED (mode={self.healing_mode}, retries={self.healing_retries_per_step})"
if not self.llm_client:
logger.warning("Self-healing enabled, but LLMClient not provided. Healing will not function.")
else:
log_message += f" using LLM provider '{self.llm_client.provider}'."
else:
log_message += "."
logger.info(log_message)
if not self.llm_client and not headless: # Vision verification needs LLM
logger.warning("TestExecutor initialized without LLMClient. Vision-based assertions ('assert_passed_verification') will fail.")
elif self.llm_client:
logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms) with LLMClient for provider '{self.llm_client.provider}'.")
else:
logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms). LLMClient not provided (headless mode or vision assertions not needed).")
self.baseline_dir = os.path.abspath(baseline_dir)
self.pixel_threshold = pixel_threshold # Store threshold
logger.info(f"TestExecutor initialized (visual baseline dir: {self.baseline_dir}, pixel threshold: {self.pixel_threshold*100:.2f}%)")
os.makedirs(self.baseline_dir, exist_ok=True) # Ensure baseline dir exists
def _get_locator(self, selector: str):
"""Helper to get a Playwright locator, handling potential errors."""
if not self.page:
raise PlaywrightError("Page is not initialized.")
if not selector:
raise ValueError("Selector cannot be empty.")
is_likely_xpath = selector.startswith(('/', '(', '//')) or \
('/' in selector and not any(c in selector for c in ['#', '.', '[', '>', '+', '~']))
# If it looks like XPath but doesn't have a prefix, add 'css='
# Playwright's locator treats "css=<xpath>" as an XPath selector.
processed_selector = selector
if is_likely_xpath and not selector.startswith(('css=', 'xpath=')):
logger.warning(f"Selector '{selector}' looks like XPath but lacks prefix. Assuming XPath and adding 'css=' prefix.")
processed_selector = f"xpath={selector}"
try:
logger.debug(f"Attempting to locate using: '{processed_selector}'")
return self.page.locator(processed_selector).first
except Exception as e:
# Catch errors during locator creation itself (e.g., invalid selector syntax)
logger.error(f"Failed to create locator for processed selector: '{processed_selector}'. Original: '{selector}'. Error: {e}")
# Re-raise using the processed selector in the message for clarity
raise PlaywrightError(f"Invalid selector syntax or error creating locator: '{processed_selector}'. Error: {e}") from e
def _load_baseline(self, baseline_id: str) -> Tuple[Optional[Image.Image], Optional[Dict]]:
"""Loads the baseline image and metadata."""
metadata_path = os.path.join(self.baseline_dir, f"{baseline_id}.json")
image_path = os.path.join(self.baseline_dir, f"{baseline_id}.png") # Assume PNG
if not os.path.exists(metadata_path) or not os.path.exists(image_path):
logger.error(f"Baseline files not found for ID '{baseline_id}' in {self.baseline_dir}")
return None, None
try:
with open(metadata_path, 'r', encoding='utf-8') as f:
metadata = json.load(f)
baseline_img = Image.open(image_path).convert("RGBA") # Load and ensure RGBA
logger.info(f"Loaded baseline '{baseline_id}' (Image: {image_path}, Metadata: {metadata_path})")
return baseline_img, metadata
except Exception as e:
logger.error(f"Error loading baseline files for ID '{baseline_id}': {e}", exc_info=True)
return None, None
def _attempt_soft_healing(
self,
failed_step: Dict[str, Any],
failed_selector: Optional[str],
error_message: str
) -> Tuple[bool, Optional[str], str]:
"""
Attempts to find a new selector using the LLM based on the failed step's context and validate it.
Returns:
Tuple[bool, Optional[str], str]: (healing_success, new_selector, reasoning)
"""
if not self.llm_client:
logger.error("Soft Healing: LLMClient not available.")
return False, None, "LLMClient not configured for healing."
if not self.browser_controller or not self.page:
logger.error("Soft Healing: BrowserController or Page not available.")
return False, None, "Browser state unavailable for healing."
logger.info(f"Soft Healing: Gathering context for step {failed_step.get('step_id')}")
try:
current_url = self.browser_controller.get_current_url()
screenshot_bytes = self.browser_controller.take_screenshot()
dom_state = self.browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1)
dom_context_str = "DOM context could not be retrieved."
if dom_state and dom_state.element_tree:
dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification')
else:
logger.warning("Soft Healing: Failed to get valid DOM state.")
if not screenshot_bytes:
logger.error("Soft Healing: Failed to capture screenshot.")
return False, None, "Failed to capture screenshot for context."
except Exception as e:
logger.error(f"Soft Healing: Error gathering context: {e}", exc_info=True)
return False, None, f"Error gathering context: {e}"
# Construct the prompt
prompt = f"""You are an AI Test Self-Healing Assistant. A step in an automated test failed, likely due to an incorrect or outdated CSS selector. Your goal is to analyze the current page state and suggest a more robust replacement selector for the intended element.
**Failed Test Step Information:**
- Step Description: "{failed_step.get('description', 'N/A')}"
- Original Action: "{failed_step.get('action', 'N/A')}"
- Failed Selector: `{failed_selector or 'N/A'}`
- Error Message: "{error_message}"
**Current Page State:**
- URL: {current_url}
- Attached Screenshot: Analyze the visual layout to identify the target element corresponding to the step description.
- HTML Context (Visible elements, interactive `[index]`, static `(Static)`):
```html
{dom_context_str}
```
**Your Task:**
1. Based on the step description, the original action, the visual screenshot, AND the HTML context, identify the element the test likely intended to interact with.
2. Suggest a **single, robust CSS selector** for this element using **NATIVE attributes** (like `id`, `name`, `data-testid`, `data-cy`, `aria-label`, `placeholder`, unique visible text combined with tag, stable class combinations).
3. **CRITICAL: Do NOT suggest selectors based on `data-ai-id` or unstable attributes (e.g., dynamic classes, complex positional selectors like :nth-child unless absolutely necessary and combined with other stable attributes).**
4. Prioritize standard, semantic, and test-specific attributes (`id`, `data-testid`, `name`).
5. If you cannot confidently identify the intended element or find a robust selector, return `null` for `new_selector`.
**Output Format:** Respond ONLY with a JSON object matching the following schema:
```json
{{
"new_selector": "YOUR_SUGGESTED_CSS_SELECTOR_OR_NULL",
"reasoning": "Explain your choice of selector, referencing visual cues, HTML attributes, and the original step description. If returning null, explain why."
}}
```
"""
try:
logger.info("Soft Healing: Requesting selector suggestion from LLM...")
response_obj = self.llm_client.generate_json(
HealingSelectorSuggestion,
prompt,
image_bytes=screenshot_bytes
)
if isinstance(response_obj, HealingSelectorSuggestion):
if response_obj.new_selector:
suggested_selector = response_obj.new_selector
logger.info(f"Soft Healing: LLM suggested new selector: '{response_obj.new_selector}'. Reasoning: {response_obj.reasoning}")
logger.info(f"Soft Healing: Validating suggested selector '{suggested_selector}'...")
validation_passed = False
validation_reasoning_suffix = ""
try:
# Use page.locator() with a short timeout for existence check
count = self.page.locator(suggested_selector).count()
if count > 0:
validation_passed = True
logger.info(f"Soft Healing: Validation PASSED. Selector '{suggested_selector}' found {count} element(s).")
if count > 1:
logger.warning(f"Soft Healing: Suggested selector '{suggested_selector}' found {count} elements (expected 1). Will target the first.")
else: # count == 0
logger.warning(f"Soft Healing: Validation FAILED. Selector '{suggested_selector}' found 0 elements within {HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms.")
validation_reasoning_suffix = " [Validation Failed: Selector found 0 elements]"
except PlaywrightTimeoutError:
logger.warning(f"Soft Healing: Validation TIMEOUT ({HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms) checking selector '{suggested_selector}'.")
validation_reasoning_suffix = f" [Validation Failed: Timeout after {HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms]"
except PlaywrightError as e: # Catch invalid selector syntax errors
logger.warning(f"Soft Healing: Validation FAILED. Invalid selector syntax for '{suggested_selector}'. Error: {e}")
validation_reasoning_suffix = f" [Validation Failed: Invalid selector syntax - {e}]"
except Exception as e:
logger.error(f"Soft Healing: Unexpected error during selector validation for '{suggested_selector}': {e}", exc_info=True)
validation_reasoning_suffix = f" [Validation Error: {type(e).__name__}]"
# --- End Validation Step ---
# Return success only if validation passed
if validation_passed:
return True, suggested_selector, response_obj.reasoning
else:
# Update reasoning with validation failure details
return False, None, response_obj.reasoning + validation_reasoning_suffix
else:
logger.warning(f"Soft Healing: LLM could not suggest a new selector. Reasoning: {response_obj.reasoning}")
return False, None, response_obj.reasoning
elif isinstance(response_obj, str): # LLM returned an error string
logger.error(f"Soft Healing: LLM returned an error: {response_obj}")
return False, None, f"LLM Error: {response_obj}"
else: # Unexpected response type
logger.error(f"Soft Healing: Unexpected response type from LLM: {type(response_obj)}")
return False, None, f"Unexpected LLM response type: {type(response_obj)}"
except Exception as llm_e:
logger.error(f"Soft Healing: Error during LLM communication: {llm_e}", exc_info=True)
return False, None, f"LLM communication error: {llm_e}"
def _trigger_hard_healing(self, feature_description: str, original_file_path: str) -> None:
"""
Closes the current browser and triggers the WebAgent to re-record the test.
"""
logger.warning("--- Triggering Hard Healing (Re-Recording) ---")
if not feature_description:
logger.error("Hard Healing: Cannot re-record without the original feature description.")
return
if not self.llm_client:
logger.error("Hard Healing: Cannot re-record without an LLMClient.")
return
# 1. Close current browser
try:
if self.browser_controller:
self.browser_controller.close()
self.browser_controller = None
self.page = None
logger.info("Hard Healing: Closed executor browser.")
except Exception as close_err:
logger.error(f"Hard Healing: Error closing executor browser: {close_err}")
# Continue anyway, try to re-record
# 2. Instantiate Recorder Agent
# NOTE: Assume re-recording is automated. Add flag if interactive needed.
try:
logger.info("Hard Healing: Initializing WebAgent for automated re-recording...")
# Use the existing LLM client
recorder_agent = WebAgent(
llm_client=self.llm_client,
headless=False, # Re-recording needs visible browser initially
is_recorder_mode=True,
automated_mode=True, # Run re-recording automatically
# Pass original filename stem to maybe overwrite or create variant
filename=os.path.splitext(os.path.basename(original_file_path))[0] + "_healed_"
)
# 3. Run Recorder
logger.info(f"Hard Healing: Starting re-recording for feature: '{feature_description}'")
recording_result = recorder_agent.record(feature_description)
# 4. Log Outcome
if recording_result.get("success"):
logger.info(f"✅ Hard Healing: Re-recording successful. New test file saved to: {recording_result.get('output_file')}")
else:
logger.error(f"❌ Hard Healing: Re-recording FAILED. Message: {recording_result.get('message')}")
except Exception as record_err:
logger.critical(f"❌ Hard Healing: Critical error during re-recording setup or execution: {record_err}", exc_info=True)
def run_test(self, json_file_path: str) -> Dict[str, Any]:
"""Loads and executes the test steps from the JSON file."""
start_time = time.time()
self.healing_attempts_log = [] # Reset log for this run
any_step_successfully_healed = False
run_status = {
"test_file": json_file_path,
"status": "FAIL", # Default to fail
"message": "Execution initiated.",
"steps_executed": 0,
"failed_step": None,
"error_details": None,
"screenshot_on_failure": None,
"console_messages_on_failure": [],
"all_console_messages": [],
"performance_timing": None,
"network_requests": [],
"duration_seconds": 0.0,
"healing_enabled": self.enable_healing,
"healing_mode": self.healing_mode if self.enable_healing else "disabled",
"healing_attempts": self.healing_attempts_log, # Reference the list
"healed_file_saved": False,
"healed_steps_count": 0,
"visual_assertion_results": []
}
try:
# --- Load Test Data ---
logger.info(f"Loading test case from: {json_file_path}")
if not os.path.exists(json_file_path):
raise FileNotFoundError(f"Test file not found: {json_file_path}")
with open(json_file_path, 'r', encoding='utf-8') as f:
test_data = json.load(f)
modified_test_data = test_data.copy()
steps = modified_test_data.get("steps", [])
viewport = next((json.load(open(os.path.join(self.baseline_dir, f"{step.get('parameters', {}).get('baseline_id')}.json"))).get("viewport_size") for step in steps if step.get("action") == "assert_visual_match" and step.get('parameters', {}).get('baseline_id') and os.path.exists(os.path.join(self.baseline_dir, f"{step.get('parameters', {}).get('baseline_id')}.json"))), None)
test_name = modified_test_data.get("test_name", "Unnamed Test")
feature_description = modified_test_data.get("feature_description", "")
first_navigation_done = False
run_status["test_name"] = test_name
logger.info(f"Executing test: '{test_name}' with {len(steps)} steps.")
if not steps:
raise ValueError("No steps found in the test file.")
# --- Setup Browser ---
self.browser_controller = BrowserController(headless=self.headless, viewport_size=viewport)
# Set default timeout before starting the page
self.browser_controller.default_action_timeout = self.default_timeout
self.browser_controller.default_navigation_timeout = max(self.default_timeout, 30000) # Ensure navigation timeout is reasonable
self.browser_controller.start()
self.page = self.browser_controller.page
if not self.page:
raise PlaywrightError("Failed to initialize browser page.")
# Re-apply default timeout to the page context AFTER it's created
self.page.set_default_timeout(self.default_timeout)
logger.info(f"Browser page initialized with default action timeout: {self.default_timeout}ms")
self.browser_controller.clear_console_messages()
self.browser_controller.clear_network_requests()
# --- Execute Steps ---
for i, step in enumerate(steps):
step_id = step.get("step_id", i + 1)
action = step.get("action")
original_selector = step.get("selector")
params = step.get("parameters", {})
description = step.get("description", f"Step {step_id}")
wait_after = step.get("wait_after_secs", 0) # Get wait time
run_status["steps_executed"] = i + 1 # Track steps attempted
logger.info(f"--- Executing Step {step_id}: {action} - {description} ---")
if original_selector: logger.info(f"Original Selector: {original_selector}")
if params: logger.info(f"Parameters: {params}")
# --- Healing Loop ---
step_healed = False
current_healing_attempts = 0
current_selector = original_selector # Start with the recorded selector
last_error = None # Store the last error encountered
successful_healed_selector_for_step = None
run_status["visual_assertion_results"] = []
while not step_healed and current_healing_attempts <= self.healing_retries_per_step:
try:
if action == "navigate":
url = params.get("url")
if not url: raise ValueError("Missing 'url' parameter for navigate.")
self.browser_controller.goto(url)# Uses default navigation timeout from context
if not first_navigation_done:
if self.get_performance:
run_status["performance_timing"] = self.browser_controller.page_performance_timing
first_navigation_done = True
elif action == "click":
if not current_selector: raise ValueError("Missing 'current_selector' for click.")
locator = self._get_locator(current_selector)
locator.click(timeout=self.default_timeout) # Explicit timeout for action
elif action == "type":
text = params.get("text")
if not current_selector: raise ValueError("Missing 'current_selector' for type.")
if text is None: raise ValueError("Missing 'text' parameter for type.")
locator = self._get_locator(current_selector)
locator.fill(text, timeout=self.default_timeout) # Use fill for robustness
elif action == "scroll": # Less common, but support if recorded
direction = params.get("direction")
if direction not in ["up", "down"]: raise ValueError("Invalid 'direction'.")
amount = "window.innerHeight" if direction=="down" else "-window.innerHeight"
self.page.evaluate(f"window.scrollBy(0, {amount})")
elif action == "check":
if not current_selector: raise ValueError("Missing 'current_selector' for check action.")
# Use the browser_controller method which handles locator/timeout
self.browser_controller.check(current_selector)
elif action == "uncheck":
if not current_selector: raise ValueError("Missing 'current_selector' for uncheck action.")
# Use the browser_controller method
self.browser_controller.uncheck(current_selector)
elif action == "select":
option_label = params.get("option_label")
option_value = params.get("option_value") # Support value too if recorded
option_index_str = params.get("option_index") # Support index if recorded
option_param = None
param_type = None
if option_label is not None:
option_param = {"label": option_label}
param_type = f"label '{option_label}'"
elif option_value is not None:
option_param = {"value": option_value}
param_type = f"value '{option_value}'"
elif option_index_str is not None and option_index_str.isdigit():
option_param = {"index": int(option_index_str)}
param_type = f"index {option_index_str}"
else:
raise ValueError("Missing 'option_label', 'option_value', or 'option_index' parameter for select action.")
if not current_selector: raise ValueError("Missing 'current_selector' for select action.")
logger.info(f"Selecting option by {param_type} in element: {current_selector}")
locator = self._get_locator(current_selector)
locator.select_option(**option_param, timeout=self.default_timeout)
elif action == "wait": # Generic wait action
timeout_s = params.get("timeout_seconds")
target_url = params.get("url")
element_state = params.get("state") # e.g., 'visible', 'hidden'
wait_selector = current_selector # Use current (potentially healed) selector if waiting for element
if timeout_s is not None and not target_url and not element_state:
# Simple time wait
logger.info(f"Waiting for {timeout_s} seconds...")
self.page.wait_for_timeout(timeout_s * 1000)
elif wait_selector and element_state:
# Wait for element state
logger.info(f"Waiting for element '{wait_selector}' to be '{element_state}' (max {self.default_timeout}ms)...")
locator = self._get_locator(wait_selector)
locator.wait_for(state=element_state, timeout=self.default_timeout)
elif target_url:
# Wait for URL
logger.info(f"Waiting for URL matching '{target_url}' (max {self.browser_controller.default_navigation_timeout}ms)...")
self.page.wait_for_url(target_url, timeout=self.browser_controller.default_navigation_timeout)
else:
raise ValueError("Invalid parameters for 'wait' action. Need timeout_seconds OR (selector and state) OR url.")
elif action == "wait_for_load_state":
state = params.get("state", "load")
self.page.wait_for_load_state(state, timeout=self.browser_controller.default_navigation_timeout) # Use navigation timeout
elif action == "wait_for_selector": # Explicit wait
wait_state = params.get("state", "visible")
timeout = params.get("timeout_ms", self.default_timeout)
if not current_selector: raise ValueError("Missing 'current_selector' for wait_for_selector.")
locator = self._get_locator(current_selector)
locator.wait_for(state=wait_state, timeout=timeout)
elif action == "key_press":
keys = params.get("keys")
if not current_selector: raise ValueError("Missing 'selector' for key_press.")
if not keys: raise ValueError("Missing 'keys' parameter for key_press.")
# Use controller method or locator directly
locator = self._get_locator(current_selector)
locator.press(keys, timeout=self.default_timeout)
# self.browser_controller.press(current_selector, keys) # Alt: if using controller method
elif action == "drag_and_drop":
target_selector = params.get("target_selector")
source_selector = current_selector # Source is in the main 'selector' field
if not source_selector: raise ValueError("Missing source 'selector' for drag_and_drop.")
if not target_selector: raise ValueError("Missing 'target_selector' in parameters for drag_and_drop.")
# Use controller method or locators directly
source_locator = self._get_locator(source_selector)
target_locator = self._get_locator(target_selector)
source_locator.drag_to(target_locator, timeout=self.default_timeout)
# self.browser_controller.drag_and_drop(source_selector, target_selector) # Alt: if using controller
# --- Assertions ---
elif action == "assert_text_contains":
expected_text = params.get("expected_text")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if expected_text is None: raise ValueError("Missing 'expected_text'.")
locator = self._get_locator(current_selector)
expect(locator).to_contain_text(expected_text, timeout=self.default_timeout)
elif action == "assert_text_equals":
expected_text = params.get("expected_text")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if expected_text is None: raise ValueError("Missing 'expected_text'.")
locator = self._get_locator(current_selector)
expect(locator).to_have_text(expected_text, timeout=self.default_timeout)
elif action == "assert_visible":
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
locator = self._get_locator(current_selector)
expect(locator).to_be_visible(timeout=self.default_timeout)
elif action == "assert_hidden":
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
locator = self._get_locator(current_selector)
expect(locator).to_be_hidden(timeout=self.default_timeout)
elif action == "assert_attribute_equals":
attr_name = params.get("attribute_name")
expected_value = params.get("expected_value")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if not attr_name: raise ValueError("Missing 'attribute_name'.")
if expected_value is None: raise ValueError("Missing 'expected_value'.")
locator = self._get_locator(current_selector)
expect(locator).to_have_attribute(attr_name, expected_value, timeout=self.default_timeout)
elif action == "assert_element_count":
expected_count = params.get("expected_count")
if not current_selector: raise ValueError("Missing 'current_selector' for assertion.")
if expected_count is None: raise ValueError("Missing 'expected_count'.")
if not isinstance(expected_count, int): raise ValueError("'expected_count' must be an integer.") # Add type check
# --- FIX: Get locator for count without using .first ---
# Apply the same current_selector processing as in _get_locator if needed
is_likely_xpath = current_selector.startswith(('/', '(', '//')) or \
('/' in current_selector and not any(c in current_selector for c in ['#', '.', '[', '>', '+', '~']))
processed_selector = current_selector
if is_likely_xpath and not current_selector.startswith(('css=', 'xpath=')):
processed_selector = f"xpath={current_selector}"
# Get the locator for potentially MULTIPLE elements
count_locator = self.page.locator(processed_selector)
# --- End FIX ---
logger.info(f"Asserting count of elements matching '{processed_selector}' to be {expected_count}")
expect(count_locator).to_have_count(expected_count, timeout=self.default_timeout)
elif action == "assert_checked":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_checked.")
locator = self._get_locator(current_selector)
# Use Playwright's dedicated assertion for checked state
expect(locator).to_be_checked(timeout=self.default_timeout)
elif action == "assert_not_checked":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_not_checked.")
locator = self._get_locator(current_selector)
# Use .not modifier with the checked assertion
expect(locator).not_to_be_checked(timeout=self.default_timeout)
elif action == "assert_disabled":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_disabled.")
locator = self._get_locator(current_selector)
# Use Playwright's dedicated assertion for disabled state
expect(locator).to_be_disabled(timeout=self.default_timeout)
elif action == "assert_enabled":
if not current_selector: raise ValueError("Missing 'current_selector' for assert_enabled.")
locator = self._get_locator(current_selector)
expect(locator).to_be_enabled(timeout=self.default_timeout)
elif action == "task_replanned":
pass
elif action == "assert_visual_match":
baseline_id = params.get("baseline_id")
element_selector = step.get("selector") # Use step's selector if available
use_llm = params.get("use_llm_fallback", True)
# Allow overriding threshold per step
step_threshold = params.get("pixel_threshold", self.pixel_threshold)
if not baseline_id:
raise ValueError("Missing 'baseline_id' parameter for assert_visual_match.")
logger.info(f"--- Performing Visual Assertion: '{baseline_id}' (Selector: {element_selector}, Threshold: {step_threshold*100:.2f}%, LLM: {use_llm}) ---")
# 1. Load Baseline
baseline_img, baseline_meta = self._load_baseline(baseline_id)
if not baseline_img or not baseline_meta:
raise FileNotFoundError(f"Baseline '{baseline_id}' not found or failed to load.")
# 2. Capture Current State
current_screenshot_bytes = None
if element_selector:
current_screenshot_bytes = self.browser_controller.take_screenshot_element(element_selector)
else:
current_screenshot_bytes = self.browser_controller.take_screenshot() # Full page
if not current_screenshot_bytes:
raise PlaywrightError("Failed to capture current screenshot for visual comparison.")
try:
# Create a BytesIO buffer to treat the bytes like a file
buffer = io.BytesIO(current_screenshot_bytes)
# Open the image from the buffer using Pillow
img = Image.open(buffer)
# Ensure the image is in RGBA format for consistency,
# especially important for pixel comparisons that might expect an alpha channel.
logger.info("received")
current_img = img.convert("RGBA")
except Exception as e:
logger.error(f"Failed to convert bytes to PIL Image: {e}", exc_info=True)
current_img = None
if not current_img:
raise RuntimeError("Failed to process current screenshot bytes into an image.")
# 3. Pre-check Dimensions
if baseline_img.size != current_img.size:
size_mismatch_msg = f"Visual Assertion Failed: Image dimensions mismatch for '{baseline_id}'. Baseline: {baseline_img.size}, Current: {current_img.size}."
logger.error(size_mismatch_msg)
# Save current image for debugging
ts = time.strftime("%Y%m%d_%H%M%S")
current_img_path = os.path.join("output", f"visual_fail_{baseline_id}_current_{ts}.png")
current_img.save(current_img_path)
logger.info(f"Saved current image (dimension mismatch) to: {current_img_path}")
raise AssertionError(size_mismatch_msg) # Fail the assertion
# 4. Pixel Comparison
img_diff = Image.new("RGBA", baseline_img.size) # Image to store diff pixels
try:
mismatched_pixels = pixelmatch(baseline_img, current_img, img_diff, includeAA=True, threshold=0.1) # Use default pixelmatch threshold first
except Exception as pm_error:
logger.error(f"Error during pixelmatch comparison for '{baseline_id}': {pm_error}", exc_info=True)
raise RuntimeError(f"Pixelmatch library error: {pm_error}") from pm_error
total_pixels = baseline_img.width * baseline_img.height
diff_ratio = mismatched_pixels / total_pixels if total_pixels > 0 else 0
logger.info(f"Pixel comparison for '{baseline_id}': Mismatched Pixels = {mismatched_pixels}, Total Pixels = {total_pixels}, Difference = {diff_ratio*100:.4f}%")
# 5. Check against threshold
pixel_match_passed = diff_ratio <= step_threshold
llm_reasoning = None
diff_image_path = None
if pixel_match_passed:
logger.info(f"✅ Visual Assertion PASSED (Pixel Diff <= Threshold) for '{baseline_id}'.")
# Step completed successfully
else:
logger.warning(f"Visual Assertion: Pixel difference ({diff_ratio*100:.4f}%) exceeds threshold ({step_threshold*100:.2f}%) for '{baseline_id}'.")
# Save diff image regardless of LLM outcome
ts = time.strftime("%Y%m%d_%H%M%S")
diff_image_path = os.path.join("output", f"visual_diff_{baseline_id}_{ts}.png")
try:
img_diff.save(diff_image_path)
logger.info(f"Saved pixel difference image to: {diff_image_path}")
except Exception as save_err:
logger.error(f"Failed to save diff image: {save_err}")
diff_image_path = None # Mark as failed
# 6. LLM Fallback
if use_llm and self.llm_client:
logger.info(f"Attempting LLM visual comparison fallback for '{baseline_id}'...")
baseline_bytes = io.BytesIO()
baseline_img.save(baseline_bytes, format='PNG')
baseline_bytes = baseline_bytes.getvalue()
# --- UPDATED LLM PROMPT for Stitched Image ---
llm_prompt = f"""Analyze the combined image provided below for the purpose of automated software testing.
The LEFT half (labeled '1: Baseline') is the established baseline screenshot.
The RIGHT half (labeled '2: Current') is the current state screenshot.
Compare these two halves to determine if they are SEMANTICALLY equivalent from a user's perspective.
IGNORE minor differences like:
- Anti-aliasing variations
- Single-pixel shifts
- Tiny rendering fluctuations
- Small, insignificant dynamic content changes (e.g., blinking cursors, exact timestamps if not the focus).
FOCUS ON significant differences like:
- Layout changes (elements moved, resized, missing, added)
- Major color changes of key elements
- Text content changes (errors, different labels, etc.)
- Missing or fundamentally different images/icons.
Baseline ID: "{baseline_id}"
Captured URL (Baseline): "{baseline_meta.get('url_captured', 'N/A')}"
Selector (Baseline): "{baseline_meta.get('selector_captured', 'Full Page')}"
Based on these criteria, are the two halves (baseline vs. current) functionally and visually equivalent enough to PASS a visual regression test?
Respond ONLY with "YES" or "NO", followed by a brief explanation justifying your answer by referencing differences between the left and right halves.
Example YES: YES - The left (baseline) and right (current) images are visually equivalent. Minor text rendering differences are ignored.
Example NO: NO - The primary call-to-action button visible on the left (baseline) is missing on the right (current).
"""
# --- END UPDATED PROMPT ---
try:
# No change here, compare_images handles the stitching internally
llm_response = compare_images(llm_prompt, baseline_bytes, current_screenshot_bytes, self.llm_client)
logger.info(f"LLM visual comparison response for '{baseline_id}': {llm_response}")
llm_reasoning = llm_response # Store reasoning
if llm_response.strip().upper().startswith("YES"):
logger.info(f"✅ Visual Assertion PASSED (LLM Override) for '{baseline_id}'.")
pixel_match_passed = True # Override pixel result
elif llm_response.strip().upper().startswith("NO"):
logger.warning(f"Visual Assertion: LLM confirmed significant difference for '{baseline_id}'.")
pixel_match_passed = False # Confirm failure
else:
logger.warning(f"Visual Assertion: LLM response unclear for '{baseline_id}'. Treating as failure.")
pixel_match_passed = False
except Exception as llm_err:
logger.error(f"LLM visual comparison failed: {llm_err}", exc_info=True)
llm_reasoning = f"LLM Error: {llm_err}"
pixel_match_passed = False # Treat LLM error as failure
else: # LLM fallback not enabled or LLM not available
logger.warning(f"Visual Assertion: LLM fallback skipped for '{baseline_id}'. Failing based on pixel difference.")
pixel_match_passed = False
# 7. Handle Final Failure
if not pixel_match_passed:
failure_msg = f"Visual Assertion Failed for '{baseline_id}'. Pixel diff: {diff_ratio*100:.4f}% (Threshold: {step_threshold*100:.2f}%)."
if llm_reasoning: failure_msg += f" LLM Reason: {llm_reasoning}"
logger.error(failure_msg)
# Add details to run_status before raising
visual_failure_details = {
"baseline_id": baseline_id,
"pixel_difference_ratio": diff_ratio,
"pixel_threshold": step_threshold,
"mismatched_pixels": mismatched_pixels,
"diff_image_path": diff_image_path,
"llm_reasoning": llm_reasoning
}
# We need to store this somewhere accessible when raising the final error
# Let's add it directly to the step dict temporarily? Or a dedicated failure context?
# For now, log it and include basics in the AssertionError
run_status["visual_failure_details"] = visual_failure_details # Add to main run status
raise AssertionError(failure_msg) # Fail the step
visual_result = {
"step_id": step_id,
"baseline_id": baseline_id,
"status": "PASS" if pixel_match_passed else "FAIL",
"pixel_difference_ratio": diff_ratio,
"mismatched_pixels": mismatched_pixels,
"pixel_threshold": step_threshold,
"llm_override": use_llm and not pixel_match_passed and llm_response.strip().upper().startswith("YES") if 'llm_response' in locals() else False,
"llm_reasoning": llm_reasoning,
"diff_image_path": diff_image_path,
"element_selector": element_selector
}
run_status["visual_assertion_results"].append(visual_result)
elif action == "assert_passed_verification" or action == "assert_llm_verification":
if not self.llm_client:
raise PlaywrightError("LLMClient not available for vision-based verification step.")
if not description:
raise ValueError("Missing 'description' field for 'assert_passed_verification' step.")
if not self.browser_controller:
raise PlaywrightError("BrowserController not available for state gathering.")
logger.info("Performing vision-based verification with DOM context...")
# --- Gather Context ---
screenshot_bytes = self.browser_controller.take_screenshot()
current_url = self.browser_controller.get_current_url()
dom_context_str = "DOM context could not be retrieved." # Default
try:
dom_state = self.browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1) # No highlight during execution verification
if dom_state and dom_state.element_tree:
# Use 'verification' purpose for potentially richer context
dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification')
else:
logger.warning("Failed to get valid DOM state for vision verification.")
except Exception as dom_err:
logger.error(f"Error getting DOM context for vision verification: {dom_err}", exc_info=True)
# --------------------
if not screenshot_bytes:
raise PlaywrightError("Failed to capture screenshot for vision verification.")
prompt = f"""Analyze the provided webpage screenshot AND the accompanying HTML context.
The goal during testing was to verify the following condition: "{description}"
Current URL: {current_url}
HTML Context (Visible elements, interactive elements marked with `[index]`, static with `(Static)`):
```html
{dom_context_str}
```
Based on BOTH the visual evidence in the screenshot AND the HTML context (Prioritize html context more as screenshot will have some delay from when it was asked and when it was taken), is the verification condition "{description}" currently met?
If you think due to the delay in html AND screenshot, state might have changed from where the condition was met, then also respond with YES
IMPORTANT: Consider that elements might be in a loading state (e.g., placeholders described) OR a fully loaded state (e.g., actual images shown visually). If the current state reasonably fulfills the ultimate goal implied by the description (even if the exact visual differs due to loading, like placeholders becoming images), respond YES.
Respond with only "YES" or "NO", followed by a brief explanation justifying your answer using evidence from the screenshot and/or HTML context.
Example Response (Success): YES - The 'Welcome, User!' message [Static id='s15'] is visible in the HTML and visually present at the top of the screenshot.
Example Response (Failure): NO - The HTML context shows an error message element [12] and the screenshot visually confirms the 'Invalid credentials' error.
Example Response (Success - Placeholder Intent): YES - The description asked for 5 placeholders, but the screenshot and HTML show 5 fully loaded images within the expected containers ('div.image-container'). This fulfills the intent of ensuring the 5 image sections are present and populated.
"""
llm_response = self.llm_client.generate_multimodal(prompt, screenshot_bytes)
logger.debug(f"Vision verification LLM response: {llm_response}")
if llm_response.strip().upper().startswith("YES"):
logger.info("✅ Vision verification PASSED (with DOM context).")
elif llm_response.strip().upper().startswith("NO"):
logger.error(f"❌ Vision verification FAILED (with DOM context). LLM Reasoning: {llm_response}")
raise AssertionError(f"Vision verification failed: Condition '{description}' not met. LLM Reason: {llm_response}")
elif llm_response.startswith("Error:"):
logger.error(f"❌ Vision verification FAILED due to LLM error: {llm_response}")
raise PlaywrightError(f"Vision verification LLM error: {llm_response}")
else:
logger.error(f"❌ Vision verification FAILED due to unclear LLM response: {llm_response}")
raise AssertionError(f"Vision verification failed: Unclear LLM response. Response: {llm_response}")
# --- Add more actions/assertions as needed ---
else:
logger.warning(f"Unsupported action type '{action}' found in step {step_id}. Skipping.")
# Optionally treat as failure: raise ValueError(f"Unsupported action: {action}")
step_healed = True
log_suffix = ""
if current_healing_attempts > 0:
# Store the selector that *worked* (which is current_selector)
successful_healed_selector_for_step = current_selector
log_suffix = f" (Healed after {current_healing_attempts} attempt(s) using selector '{current_selector}')"
logger.info(f"Step {step_id} completed successfully{log_suffix}.")
logger.info(f"Step {step_id} completed successfully.")
# Optional wait after successful step execution
if wait_after > 0:
logger.debug(f"Waiting for {wait_after}s after step {step_id}...")
time.sleep(wait_after)
except (PlaywrightError, PlaywrightTimeoutError, ValueError, AssertionError) as e:
# Catch Playwright errors, input errors, and assertion failures (from expect)
last_error = e # Store the error
error_type = type(e).__name__
error_msg = str(e)
logger.warning(f"Attempt {current_healing_attempts + 1} for Step {step_id} failed. Error: {error_type}: {error_msg}")
# --- Healing Decision Logic ---
is_healable_error = isinstance(e, (PlaywrightTimeoutError, PlaywrightError)) and current_selector is not None
# Refine healable conditions:
# - Timeout finding/interacting with an element
# - Element detached, not visible, not interactable (if selector exists)
# - Exclude navigation errors, value errors from missing params, count mismatches
if isinstance(e, ValueError) or (isinstance(e, AssertionError) and "count" in error_msg.lower()):
is_healable_error = False
if action == "navigate":
is_healable_error = False
if action == "assert_visual_match":
is_healable_error = False
can_attempt_healing = self.enable_healing and is_healable_error and current_healing_attempts < self.healing_retries_per_step
if can_attempt_healing:
logger.info(f"Attempting Healing (Mode: {self.healing_mode}) for Step {step_id}...")
healing_success = False
new_selector = None
healing_log_entry = {
"step_id": step_id,
"attempt": current_healing_attempts + 1,
"mode": self.healing_mode,
"success": False,
"original_selector": original_selector,
"failed_selector": current_selector,
"error": f"{error_type}: {error_msg}",
"new_selector": None,
"reasoning": None,
}
if self.healing_mode == 'soft':
healing_success, new_selector, reasoning = self._attempt_soft_healing(step, current_selector, error_msg)
healing_log_entry["new_selector"] = new_selector
healing_log_entry["reasoning"] = reasoning
if healing_success:
logger.info(f"Soft healing successful for Step {step_id}. New selector: '{new_selector}'")
current_selector = new_selector # Update selector for the next loop iteration
healing_log_entry["success"] = True
else:
logger.warning(f"Soft healing failed for Step {step_id}. Reason: {reasoning}")
# Let the loop proceed to final failure state below
elif self.healing_mode == 'hard':
logger.warning(f"Hard Healing triggered for Step {step_id} due to error: {error_msg}")
if self.browser_controller:
self.browser_controller.clear_console_messages()
self.browser_controller.clear_network_requests()
healing_log_entry["mode"] = "hard" # Log mode
healing_log_entry["success"] = True # Mark attempt as 'successful' in triggering re-record
self.healing_attempts_log.append(healing_log_entry) # Log before triggering
self._trigger_hard_healing(feature_description, json_file_path)
run_status["status"] = "HEALING_TRIGGERED"
run_status["message"] = f"Hard Healing (re-recording) triggered on Step {step_id}."
run_status["failed_step"] = step # Store the step that triggered it
run_status["error_details"] = f"Hard healing triggered by {error_type}: {error_msg}"
return run_status # Stop execution and return status
self.healing_attempts_log.append(healing_log_entry) # Log soft healing attempt
if healing_success:
current_healing_attempts += 1
continue # Go to the next iteration of the while loop to retry with new selector
else:
# Soft healing failed, break the while loop to handle final failure
current_healing_attempts = self.healing_retries_per_step + 1
else:
# Healing not enabled, max attempts reached, or not a healable error
logger.error(f"❌ Step {step_id} failed permanently. Healing skipped or failed.")
raise last_error # Re-raise the last error to trigger final failure handling
# --- End Healing Loop ---
if successful_healed_selector_for_step:
logger.info(f"Persisting healed selector for Step {step_id}: '{successful_healed_selector_for_step}'")
# Modify the step in the IN-MEMORY list 'steps'
if i < len(steps): # Check index boundary
steps[i]['selector'] = successful_healed_selector_for_step
any_step_successfully_healed = True
run_status["healed_steps_count"] += 1
else:
logger.error(f"Index {i} out of bounds for steps list while persisting healed selector for step {step_id}.")
# If the while loop finished because max attempts were reached without success
if not step_healed:
logger.error(f"❌ Step {step_id} ('{description}') Failed definitively after {current_healing_attempts} attempt(s).")
run_status["status"] = "FAIL"
run_status["message"] = f"Test failed on step {step_id}: {description}"
run_status["failed_step"] = step
# Use the last captured error
error_type = type(last_error).__name__ if last_error else "UnknownError"
error_msg = str(last_error) if last_error else "Step failed after healing attempts."
run_status["error_details"] = f"{error_type}: {error_msg}"
if run_status["status"] == "FAIL" and step.get("action") == "assert_visual_match" and "visual_failure_details" in run_status:
run_status["error_details"] += f"\nVisual Failure Details: {run_status['visual_failure_details']}"
# Failure Handling (Screenshot/Logs)
try:
ts = time.strftime("%Y%m%d_%H%M%S")
safe_test_name = re.sub(r'[^\w\-]+', '_', test_name)[:50]
screenshot_path = os.path.join("output", f"failure_{safe_test_name}_step{step_id}_{ts}.png")
if self.browser_controller and self.browser_controller.save_screenshot(screenshot_path):
run_status["screenshot_on_failure"] = screenshot_path
logger.info(f"Failure screenshot saved to: {screenshot_path}")
if self.browser_controller:
run_status["all_console_messages"] = self.browser_controller.get_console_messages()
run_status["console_messages_on_failure"] = [
msg for msg in run_status["all_console_messages"]
if msg['type'] in ['error', 'warning']
][-5:]
except Exception as fail_handle_e:
logger.error(f"Error during failure handling: {fail_handle_e}")
# Stop the entire test execution
logger.info("Stopping test execution due to permanent step failure.")
return run_status # Return immediately
# If loop completes without breaking due to permanent failure
logger.info("--- Setting final status to PASS ---")
run_status["status"] = "PASS"
run_status["message"] = "✅ Test executed successfully."
if any_step_successfully_healed:
run_status["message"] += f" ({run_status['healed_steps_count']} step(s) healed)."
logger.info(run_status["message"])
except (FileNotFoundError, ValueError, json.JSONDecodeError) as e:
logger.error(f"Error loading or parsing test file '{json_file_path}': {e}")
run_status["message"] = f"Failed to load/parse test file: {e}"
run_status["error_details"] = f"{type(e).__name__}: {str(e)}"
# status is already FAIL by default
except PlaywrightError as e:
logger.critical(f"A Playwright error occurred during execution: {e}", exc_info=True)
if run_status["error_details"] is None: # If this is the first detailed error
run_status["message"] = f"Playwright error: {str(e)}"
run_status["error_details"] = f"{type(e).__name__}: {str(e)}"
run_status["status"] = "FAIL" # Ensure status is Fail
except Exception as e:
logger.critical(f"An unexpected error occurred during execution: {e}", exc_info=True)
if run_status["error_details"] is None: # If this is the first detailed error
run_status["message"] = f"Unexpected execution error: {str(e)}"
run_status["error_details"] = f"{type(e).__name__}: {str(e)}" # Ensure error_details is set
run_status["status"] = "FAIL" # Ensure status is Fail
finally:
logger.info("--- Ending Test Execution ---")
if self.browser_controller:
if self.get_network_requests:
try: run_status["network_requests"] = self.browser_controller.get_network_requests()
except: logger.error("Failed to retrieve final network requests.")
# Performance timing is captured after navigation, check if it exists
if run_status.get("performance_timing") is None and self.get_performance is not False:
try: run_status["performance_timing"] = self.browser_controller.get_performance_timing()
except: logger.error("Failed to retrieve final performance timing.")
# Console messages captured on failure or here
if "all_console_messages" not in run_status or not run_status["all_console_messages"]:
try: run_status["all_console_messages"] = self.browser_controller.get_console_messages()
except: logger.error("Failed to retrieve final console messages.")
self.browser_controller.close()
self.browser_controller = None
self.page = None
end_time = time.time()
run_status["duration_seconds"] = round(end_time - start_time, 2)
run_status["healing_attempts"] = self.healing_attempts_log
if any_step_successfully_healed and run_status["status"] != "HEALING_TRIGGERED" and run_status["status"] == "PASS": # Save if healing occurred and not hard-healing
try:
logger.info(f"Saving updated test file with {run_status['healed_steps_count']} healed step(s) to: {json_file_path}")
# modified_test_data should contain the updated steps list
with open(json_file_path, 'w', encoding='utf-8') as f:
json.dump(modified_test_data, f, indent=2, ensure_ascii=False)
run_status["healed_file_saved"] = True
logger.info(f"Successfully saved healed test file: {json_file_path}")
# Adjust final message if test passed after healing
if run_status["status"] == "PASS":
run_status["message"] = f"✅ Test passed with {run_status['healed_steps_count']} step(s) healed. Updated test file saved."
except Exception as save_err:
logger.error(f"Failed to save healed test file '{json_file_path}': {save_err}", exc_info=True)
run_status["healed_file_saved"] = False
# Add warning to message if save failed
if run_status["status"] == "PASS":
run_status["message"] += " (Warning: Failed to save healed selectors)"
logger.info(f"Execution finished in {run_status['duration_seconds']:.2f} seconds. Status: {run_status['status']}")
return run_status