Skip to main content
Glama

Web-QA

by GroundNG
executor.py69.6 kB
# /src/executor.py import json import logging import time import os from patchright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError, Error as PlaywrightError, expect from typing import Optional, Dict, Any, Tuple, List from pydantic import BaseModel, Field import re from PIL import Image from pixelmatch.contrib.PIL import pixelmatch import io from ..browser.browser_controller import BrowserController # Re-use for browser setup/teardown from ..llm.llm_client import LLMClient from ..agents.recorder_agent import WebAgent from ..utils.image_utils import compare_images # Define a short timeout specifically for selector validation during healing HEALING_SELECTOR_VALIDATION_TIMEOUT_MS = 2000 class HealingSelectorSuggestion(BaseModel): """Schema for the LLM's suggested replacement selector during healing.""" new_selector: Optional[str] = Field(None, description="The best suggested alternative CSS selector based on visual and DOM context, or null if no suitable alternative is found.") reasoning: str = Field(..., description="Explanation for the suggested selector choice or the reason why healing could not determine a better selector.") logger = logging.getLogger(__name__) class TestExecutor: """ Executes a recorded test case from a JSON file deterministically using Playwright. """ def __init__(self, llm_client: Optional[LLMClient], headless: bool = True, default_timeout: int = 5000, # Default timeout for actions/assertions enable_healing: bool = False, # Flag for healing healing_mode: str = 'soft', # Healing mode ('soft' or 'hard') healing_retries: int = 1, # Max soft healing attempts per step baseline_dir: str = "./visual_baselines", # Add baseline dir pixel_threshold: float = 0.01, # Default 1% pixel difference threshold get_performance: bool = False, get_network_requests: bool = False ): self.headless = headless self.default_timeout = default_timeout # Milliseconds self.llm_client = llm_client self.browser_controller: Optional[BrowserController] = None self.page: Optional[Page] = None self.enable_healing = enable_healing self.healing_mode = healing_mode self.healing_retries_per_step = healing_retries self.healing_attempts_log: List[Dict] = [] # To store healing attempts info self.get_performance = get_performance self.get_network_requests = get_network_requests logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms).") log_message = "" if self.enable_healing: log_message += f" with Healing ENABLED (mode={self.healing_mode}, retries={self.healing_retries_per_step})" if not self.llm_client: logger.warning("Self-healing enabled, but LLMClient not provided. Healing will not function.") else: log_message += f" using LLM provider '{self.llm_client.provider}'." else: log_message += "." logger.info(log_message) if not self.llm_client and not headless: # Vision verification needs LLM logger.warning("TestExecutor initialized without LLMClient. Vision-based assertions ('assert_passed_verification') will fail.") elif self.llm_client: logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms) with LLMClient for provider '{self.llm_client.provider}'.") else: logger.info(f"TestExecutor initialized (headless={headless}, timeout={default_timeout}ms). LLMClient not provided (headless mode or vision assertions not needed).") self.baseline_dir = os.path.abspath(baseline_dir) self.pixel_threshold = pixel_threshold # Store threshold logger.info(f"TestExecutor initialized (visual baseline dir: {self.baseline_dir}, pixel threshold: {self.pixel_threshold*100:.2f}%)") os.makedirs(self.baseline_dir, exist_ok=True) # Ensure baseline dir exists def _get_locator(self, selector: str): """Helper to get a Playwright locator, handling potential errors.""" if not self.page: raise PlaywrightError("Page is not initialized.") if not selector: raise ValueError("Selector cannot be empty.") is_likely_xpath = selector.startswith(('/', '(', '//')) or \ ('/' in selector and not any(c in selector for c in ['#', '.', '[', '>', '+', '~'])) # If it looks like XPath but doesn't have a prefix, add 'css=' # Playwright's locator treats "css=<xpath>" as an XPath selector. processed_selector = selector if is_likely_xpath and not selector.startswith(('css=', 'xpath=')): logger.warning(f"Selector '{selector}' looks like XPath but lacks prefix. Assuming XPath and adding 'css=' prefix.") processed_selector = f"xpath={selector}" try: logger.debug(f"Attempting to locate using: '{processed_selector}'") return self.page.locator(processed_selector).first except Exception as e: # Catch errors during locator creation itself (e.g., invalid selector syntax) logger.error(f"Failed to create locator for processed selector: '{processed_selector}'. Original: '{selector}'. Error: {e}") # Re-raise using the processed selector in the message for clarity raise PlaywrightError(f"Invalid selector syntax or error creating locator: '{processed_selector}'. Error: {e}") from e def _load_baseline(self, baseline_id: str) -> Tuple[Optional[Image.Image], Optional[Dict]]: """Loads the baseline image and metadata.""" metadata_path = os.path.join(self.baseline_dir, f"{baseline_id}.json") image_path = os.path.join(self.baseline_dir, f"{baseline_id}.png") # Assume PNG if not os.path.exists(metadata_path) or not os.path.exists(image_path): logger.error(f"Baseline files not found for ID '{baseline_id}' in {self.baseline_dir}") return None, None try: with open(metadata_path, 'r', encoding='utf-8') as f: metadata = json.load(f) baseline_img = Image.open(image_path).convert("RGBA") # Load and ensure RGBA logger.info(f"Loaded baseline '{baseline_id}' (Image: {image_path}, Metadata: {metadata_path})") return baseline_img, metadata except Exception as e: logger.error(f"Error loading baseline files for ID '{baseline_id}': {e}", exc_info=True) return None, None def _attempt_soft_healing( self, failed_step: Dict[str, Any], failed_selector: Optional[str], error_message: str ) -> Tuple[bool, Optional[str], str]: """ Attempts to find a new selector using the LLM based on the failed step's context and validate it. Returns: Tuple[bool, Optional[str], str]: (healing_success, new_selector, reasoning) """ if not self.llm_client: logger.error("Soft Healing: LLMClient not available.") return False, None, "LLMClient not configured for healing." if not self.browser_controller or not self.page: logger.error("Soft Healing: BrowserController or Page not available.") return False, None, "Browser state unavailable for healing." logger.info(f"Soft Healing: Gathering context for step {failed_step.get('step_id')}") try: current_url = self.browser_controller.get_current_url() screenshot_bytes = self.browser_controller.take_screenshot() dom_state = self.browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1) dom_context_str = "DOM context could not be retrieved." if dom_state and dom_state.element_tree: dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification') else: logger.warning("Soft Healing: Failed to get valid DOM state.") if not screenshot_bytes: logger.error("Soft Healing: Failed to capture screenshot.") return False, None, "Failed to capture screenshot for context." except Exception as e: logger.error(f"Soft Healing: Error gathering context: {e}", exc_info=True) return False, None, f"Error gathering context: {e}" # Construct the prompt prompt = f"""You are an AI Test Self-Healing Assistant. A step in an automated test failed, likely due to an incorrect or outdated CSS selector. Your goal is to analyze the current page state and suggest a more robust replacement selector for the intended element. **Failed Test Step Information:** - Step Description: "{failed_step.get('description', 'N/A')}" - Original Action: "{failed_step.get('action', 'N/A')}" - Failed Selector: `{failed_selector or 'N/A'}` - Error Message: "{error_message}" **Current Page State:** - URL: {current_url} - Attached Screenshot: Analyze the visual layout to identify the target element corresponding to the step description. - HTML Context (Visible elements, interactive `[index]`, static `(Static)`): ```html {dom_context_str} ``` **Your Task:** 1. Based on the step description, the original action, the visual screenshot, AND the HTML context, identify the element the test likely intended to interact with. 2. Suggest a **single, robust CSS selector** for this element using **NATIVE attributes** (like `id`, `name`, `data-testid`, `data-cy`, `aria-label`, `placeholder`, unique visible text combined with tag, stable class combinations). 3. **CRITICAL: Do NOT suggest selectors based on `data-ai-id` or unstable attributes (e.g., dynamic classes, complex positional selectors like :nth-child unless absolutely necessary and combined with other stable attributes).** 4. Prioritize standard, semantic, and test-specific attributes (`id`, `data-testid`, `name`). 5. If you cannot confidently identify the intended element or find a robust selector, return `null` for `new_selector`. **Output Format:** Respond ONLY with a JSON object matching the following schema: ```json {{ "new_selector": "YOUR_SUGGESTED_CSS_SELECTOR_OR_NULL", "reasoning": "Explain your choice of selector, referencing visual cues, HTML attributes, and the original step description. If returning null, explain why." }} ``` """ try: logger.info("Soft Healing: Requesting selector suggestion from LLM...") response_obj = self.llm_client.generate_json( HealingSelectorSuggestion, prompt, image_bytes=screenshot_bytes ) if isinstance(response_obj, HealingSelectorSuggestion): if response_obj.new_selector: suggested_selector = response_obj.new_selector logger.info(f"Soft Healing: LLM suggested new selector: '{response_obj.new_selector}'. Reasoning: {response_obj.reasoning}") logger.info(f"Soft Healing: Validating suggested selector '{suggested_selector}'...") validation_passed = False validation_reasoning_suffix = "" try: # Use page.locator() with a short timeout for existence check count = self.page.locator(suggested_selector).count() if count > 0: validation_passed = True logger.info(f"Soft Healing: Validation PASSED. Selector '{suggested_selector}' found {count} element(s).") if count > 1: logger.warning(f"Soft Healing: Suggested selector '{suggested_selector}' found {count} elements (expected 1). Will target the first.") else: # count == 0 logger.warning(f"Soft Healing: Validation FAILED. Selector '{suggested_selector}' found 0 elements within {HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms.") validation_reasoning_suffix = " [Validation Failed: Selector found 0 elements]" except PlaywrightTimeoutError: logger.warning(f"Soft Healing: Validation TIMEOUT ({HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms) checking selector '{suggested_selector}'.") validation_reasoning_suffix = f" [Validation Failed: Timeout after {HEALING_SELECTOR_VALIDATION_TIMEOUT_MS}ms]" except PlaywrightError as e: # Catch invalid selector syntax errors logger.warning(f"Soft Healing: Validation FAILED. Invalid selector syntax for '{suggested_selector}'. Error: {e}") validation_reasoning_suffix = f" [Validation Failed: Invalid selector syntax - {e}]" except Exception as e: logger.error(f"Soft Healing: Unexpected error during selector validation for '{suggested_selector}': {e}", exc_info=True) validation_reasoning_suffix = f" [Validation Error: {type(e).__name__}]" # --- End Validation Step --- # Return success only if validation passed if validation_passed: return True, suggested_selector, response_obj.reasoning else: # Update reasoning with validation failure details return False, None, response_obj.reasoning + validation_reasoning_suffix else: logger.warning(f"Soft Healing: LLM could not suggest a new selector. Reasoning: {response_obj.reasoning}") return False, None, response_obj.reasoning elif isinstance(response_obj, str): # LLM returned an error string logger.error(f"Soft Healing: LLM returned an error: {response_obj}") return False, None, f"LLM Error: {response_obj}" else: # Unexpected response type logger.error(f"Soft Healing: Unexpected response type from LLM: {type(response_obj)}") return False, None, f"Unexpected LLM response type: {type(response_obj)}" except Exception as llm_e: logger.error(f"Soft Healing: Error during LLM communication: {llm_e}", exc_info=True) return False, None, f"LLM communication error: {llm_e}" def _trigger_hard_healing(self, feature_description: str, original_file_path: str) -> None: """ Closes the current browser and triggers the WebAgent to re-record the test. """ logger.warning("--- Triggering Hard Healing (Re-Recording) ---") if not feature_description: logger.error("Hard Healing: Cannot re-record without the original feature description.") return if not self.llm_client: logger.error("Hard Healing: Cannot re-record without an LLMClient.") return # 1. Close current browser try: if self.browser_controller: self.browser_controller.close() self.browser_controller = None self.page = None logger.info("Hard Healing: Closed executor browser.") except Exception as close_err: logger.error(f"Hard Healing: Error closing executor browser: {close_err}") # Continue anyway, try to re-record # 2. Instantiate Recorder Agent # NOTE: Assume re-recording is automated. Add flag if interactive needed. try: logger.info("Hard Healing: Initializing WebAgent for automated re-recording...") # Use the existing LLM client recorder_agent = WebAgent( llm_client=self.llm_client, headless=False, # Re-recording needs visible browser initially is_recorder_mode=True, automated_mode=True, # Run re-recording automatically # Pass original filename stem to maybe overwrite or create variant filename=os.path.splitext(os.path.basename(original_file_path))[0] + "_healed_" ) # 3. Run Recorder logger.info(f"Hard Healing: Starting re-recording for feature: '{feature_description}'") recording_result = recorder_agent.record(feature_description) # 4. Log Outcome if recording_result.get("success"): logger.info(f"✅ Hard Healing: Re-recording successful. New test file saved to: {recording_result.get('output_file')}") else: logger.error(f"❌ Hard Healing: Re-recording FAILED. Message: {recording_result.get('message')}") except Exception as record_err: logger.critical(f"❌ Hard Healing: Critical error during re-recording setup or execution: {record_err}", exc_info=True) def run_test(self, json_file_path: str) -> Dict[str, Any]: """Loads and executes the test steps from the JSON file.""" start_time = time.time() self.healing_attempts_log = [] # Reset log for this run any_step_successfully_healed = False run_status = { "test_file": json_file_path, "status": "FAIL", # Default to fail "message": "Execution initiated.", "steps_executed": 0, "failed_step": None, "error_details": None, "screenshot_on_failure": None, "console_messages_on_failure": [], "all_console_messages": [], "performance_timing": None, "network_requests": [], "duration_seconds": 0.0, "healing_enabled": self.enable_healing, "healing_mode": self.healing_mode if self.enable_healing else "disabled", "healing_attempts": self.healing_attempts_log, # Reference the list "healed_file_saved": False, "healed_steps_count": 0, "visual_assertion_results": [] } try: # --- Load Test Data --- logger.info(f"Loading test case from: {json_file_path}") if not os.path.exists(json_file_path): raise FileNotFoundError(f"Test file not found: {json_file_path}") with open(json_file_path, 'r', encoding='utf-8') as f: test_data = json.load(f) modified_test_data = test_data.copy() steps = modified_test_data.get("steps", []) viewport = next((json.load(open(os.path.join(self.baseline_dir, f"{step.get('parameters', {}).get('baseline_id')}.json"))).get("viewport_size") for step in steps if step.get("action") == "assert_visual_match" and step.get('parameters', {}).get('baseline_id') and os.path.exists(os.path.join(self.baseline_dir, f"{step.get('parameters', {}).get('baseline_id')}.json"))), None) test_name = modified_test_data.get("test_name", "Unnamed Test") feature_description = modified_test_data.get("feature_description", "") first_navigation_done = False run_status["test_name"] = test_name logger.info(f"Executing test: '{test_name}' with {len(steps)} steps.") if not steps: raise ValueError("No steps found in the test file.") # --- Setup Browser --- self.browser_controller = BrowserController(headless=self.headless, viewport_size=viewport) # Set default timeout before starting the page self.browser_controller.default_action_timeout = self.default_timeout self.browser_controller.default_navigation_timeout = max(self.default_timeout, 30000) # Ensure navigation timeout is reasonable self.browser_controller.start() self.page = self.browser_controller.page if not self.page: raise PlaywrightError("Failed to initialize browser page.") # Re-apply default timeout to the page context AFTER it's created self.page.set_default_timeout(self.default_timeout) logger.info(f"Browser page initialized with default action timeout: {self.default_timeout}ms") self.browser_controller.clear_console_messages() self.browser_controller.clear_network_requests() # --- Execute Steps --- for i, step in enumerate(steps): step_id = step.get("step_id", i + 1) action = step.get("action") original_selector = step.get("selector") params = step.get("parameters", {}) description = step.get("description", f"Step {step_id}") wait_after = step.get("wait_after_secs", 0) # Get wait time run_status["steps_executed"] = i + 1 # Track steps attempted logger.info(f"--- Executing Step {step_id}: {action} - {description} ---") if original_selector: logger.info(f"Original Selector: {original_selector}") if params: logger.info(f"Parameters: {params}") # --- Healing Loop --- step_healed = False current_healing_attempts = 0 current_selector = original_selector # Start with the recorded selector last_error = None # Store the last error encountered successful_healed_selector_for_step = None run_status["visual_assertion_results"] = [] while not step_healed and current_healing_attempts <= self.healing_retries_per_step: try: if action == "navigate": url = params.get("url") if not url: raise ValueError("Missing 'url' parameter for navigate.") self.browser_controller.goto(url)# Uses default navigation timeout from context if not first_navigation_done: if self.get_performance: run_status["performance_timing"] = self.browser_controller.page_performance_timing first_navigation_done = True elif action == "click": if not current_selector: raise ValueError("Missing 'current_selector' for click.") locator = self._get_locator(current_selector) locator.click(timeout=self.default_timeout) # Explicit timeout for action elif action == "type": text = params.get("text") if not current_selector: raise ValueError("Missing 'current_selector' for type.") if text is None: raise ValueError("Missing 'text' parameter for type.") locator = self._get_locator(current_selector) locator.fill(text, timeout=self.default_timeout) # Use fill for robustness elif action == "scroll": # Less common, but support if recorded direction = params.get("direction") if direction not in ["up", "down"]: raise ValueError("Invalid 'direction'.") amount = "window.innerHeight" if direction=="down" else "-window.innerHeight" self.page.evaluate(f"window.scrollBy(0, {amount})") elif action == "check": if not current_selector: raise ValueError("Missing 'current_selector' for check action.") # Use the browser_controller method which handles locator/timeout self.browser_controller.check(current_selector) elif action == "uncheck": if not current_selector: raise ValueError("Missing 'current_selector' for uncheck action.") # Use the browser_controller method self.browser_controller.uncheck(current_selector) elif action == "select": option_label = params.get("option_label") option_value = params.get("option_value") # Support value too if recorded option_index_str = params.get("option_index") # Support index if recorded option_param = None param_type = None if option_label is not None: option_param = {"label": option_label} param_type = f"label '{option_label}'" elif option_value is not None: option_param = {"value": option_value} param_type = f"value '{option_value}'" elif option_index_str is not None and option_index_str.isdigit(): option_param = {"index": int(option_index_str)} param_type = f"index {option_index_str}" else: raise ValueError("Missing 'option_label', 'option_value', or 'option_index' parameter for select action.") if not current_selector: raise ValueError("Missing 'current_selector' for select action.") logger.info(f"Selecting option by {param_type} in element: {current_selector}") locator = self._get_locator(current_selector) locator.select_option(**option_param, timeout=self.default_timeout) elif action == "wait": # Generic wait action timeout_s = params.get("timeout_seconds") target_url = params.get("url") element_state = params.get("state") # e.g., 'visible', 'hidden' wait_selector = current_selector # Use current (potentially healed) selector if waiting for element if timeout_s is not None and not target_url and not element_state: # Simple time wait logger.info(f"Waiting for {timeout_s} seconds...") self.page.wait_for_timeout(timeout_s * 1000) elif wait_selector and element_state: # Wait for element state logger.info(f"Waiting for element '{wait_selector}' to be '{element_state}' (max {self.default_timeout}ms)...") locator = self._get_locator(wait_selector) locator.wait_for(state=element_state, timeout=self.default_timeout) elif target_url: # Wait for URL logger.info(f"Waiting for URL matching '{target_url}' (max {self.browser_controller.default_navigation_timeout}ms)...") self.page.wait_for_url(target_url, timeout=self.browser_controller.default_navigation_timeout) else: raise ValueError("Invalid parameters for 'wait' action. Need timeout_seconds OR (selector and state) OR url.") elif action == "wait_for_load_state": state = params.get("state", "load") self.page.wait_for_load_state(state, timeout=self.browser_controller.default_navigation_timeout) # Use navigation timeout elif action == "wait_for_selector": # Explicit wait wait_state = params.get("state", "visible") timeout = params.get("timeout_ms", self.default_timeout) if not current_selector: raise ValueError("Missing 'current_selector' for wait_for_selector.") locator = self._get_locator(current_selector) locator.wait_for(state=wait_state, timeout=timeout) elif action == "key_press": keys = params.get("keys") if not current_selector: raise ValueError("Missing 'selector' for key_press.") if not keys: raise ValueError("Missing 'keys' parameter for key_press.") # Use controller method or locator directly locator = self._get_locator(current_selector) locator.press(keys, timeout=self.default_timeout) # self.browser_controller.press(current_selector, keys) # Alt: if using controller method elif action == "drag_and_drop": target_selector = params.get("target_selector") source_selector = current_selector # Source is in the main 'selector' field if not source_selector: raise ValueError("Missing source 'selector' for drag_and_drop.") if not target_selector: raise ValueError("Missing 'target_selector' in parameters for drag_and_drop.") # Use controller method or locators directly source_locator = self._get_locator(source_selector) target_locator = self._get_locator(target_selector) source_locator.drag_to(target_locator, timeout=self.default_timeout) # self.browser_controller.drag_and_drop(source_selector, target_selector) # Alt: if using controller # --- Assertions --- elif action == "assert_text_contains": expected_text = params.get("expected_text") if not current_selector: raise ValueError("Missing 'current_selector' for assertion.") if expected_text is None: raise ValueError("Missing 'expected_text'.") locator = self._get_locator(current_selector) expect(locator).to_contain_text(expected_text, timeout=self.default_timeout) elif action == "assert_text_equals": expected_text = params.get("expected_text") if not current_selector: raise ValueError("Missing 'current_selector' for assertion.") if expected_text is None: raise ValueError("Missing 'expected_text'.") locator = self._get_locator(current_selector) expect(locator).to_have_text(expected_text, timeout=self.default_timeout) elif action == "assert_visible": if not current_selector: raise ValueError("Missing 'current_selector' for assertion.") locator = self._get_locator(current_selector) expect(locator).to_be_visible(timeout=self.default_timeout) elif action == "assert_hidden": if not current_selector: raise ValueError("Missing 'current_selector' for assertion.") locator = self._get_locator(current_selector) expect(locator).to_be_hidden(timeout=self.default_timeout) elif action == "assert_attribute_equals": attr_name = params.get("attribute_name") expected_value = params.get("expected_value") if not current_selector: raise ValueError("Missing 'current_selector' for assertion.") if not attr_name: raise ValueError("Missing 'attribute_name'.") if expected_value is None: raise ValueError("Missing 'expected_value'.") locator = self._get_locator(current_selector) expect(locator).to_have_attribute(attr_name, expected_value, timeout=self.default_timeout) elif action == "assert_element_count": expected_count = params.get("expected_count") if not current_selector: raise ValueError("Missing 'current_selector' for assertion.") if expected_count is None: raise ValueError("Missing 'expected_count'.") if not isinstance(expected_count, int): raise ValueError("'expected_count' must be an integer.") # Add type check # --- FIX: Get locator for count without using .first --- # Apply the same current_selector processing as in _get_locator if needed is_likely_xpath = current_selector.startswith(('/', '(', '//')) or \ ('/' in current_selector and not any(c in current_selector for c in ['#', '.', '[', '>', '+', '~'])) processed_selector = current_selector if is_likely_xpath and not current_selector.startswith(('css=', 'xpath=')): processed_selector = f"xpath={current_selector}" # Get the locator for potentially MULTIPLE elements count_locator = self.page.locator(processed_selector) # --- End FIX --- logger.info(f"Asserting count of elements matching '{processed_selector}' to be {expected_count}") expect(count_locator).to_have_count(expected_count, timeout=self.default_timeout) elif action == "assert_checked": if not current_selector: raise ValueError("Missing 'current_selector' for assert_checked.") locator = self._get_locator(current_selector) # Use Playwright's dedicated assertion for checked state expect(locator).to_be_checked(timeout=self.default_timeout) elif action == "assert_not_checked": if not current_selector: raise ValueError("Missing 'current_selector' for assert_not_checked.") locator = self._get_locator(current_selector) # Use .not modifier with the checked assertion expect(locator).not_to_be_checked(timeout=self.default_timeout) elif action == "assert_disabled": if not current_selector: raise ValueError("Missing 'current_selector' for assert_disabled.") locator = self._get_locator(current_selector) # Use Playwright's dedicated assertion for disabled state expect(locator).to_be_disabled(timeout=self.default_timeout) elif action == "assert_enabled": if not current_selector: raise ValueError("Missing 'current_selector' for assert_enabled.") locator = self._get_locator(current_selector) expect(locator).to_be_enabled(timeout=self.default_timeout) elif action == "task_replanned": pass elif action == "assert_visual_match": baseline_id = params.get("baseline_id") element_selector = step.get("selector") # Use step's selector if available use_llm = params.get("use_llm_fallback", True) # Allow overriding threshold per step step_threshold = params.get("pixel_threshold", self.pixel_threshold) if not baseline_id: raise ValueError("Missing 'baseline_id' parameter for assert_visual_match.") logger.info(f"--- Performing Visual Assertion: '{baseline_id}' (Selector: {element_selector}, Threshold: {step_threshold*100:.2f}%, LLM: {use_llm}) ---") # 1. Load Baseline baseline_img, baseline_meta = self._load_baseline(baseline_id) if not baseline_img or not baseline_meta: raise FileNotFoundError(f"Baseline '{baseline_id}' not found or failed to load.") # 2. Capture Current State current_screenshot_bytes = None if element_selector: current_screenshot_bytes = self.browser_controller.take_screenshot_element(element_selector) else: current_screenshot_bytes = self.browser_controller.take_screenshot() # Full page if not current_screenshot_bytes: raise PlaywrightError("Failed to capture current screenshot for visual comparison.") try: # Create a BytesIO buffer to treat the bytes like a file buffer = io.BytesIO(current_screenshot_bytes) # Open the image from the buffer using Pillow img = Image.open(buffer) # Ensure the image is in RGBA format for consistency, # especially important for pixel comparisons that might expect an alpha channel. logger.info("received") current_img = img.convert("RGBA") except Exception as e: logger.error(f"Failed to convert bytes to PIL Image: {e}", exc_info=True) current_img = None if not current_img: raise RuntimeError("Failed to process current screenshot bytes into an image.") # 3. Pre-check Dimensions if baseline_img.size != current_img.size: size_mismatch_msg = f"Visual Assertion Failed: Image dimensions mismatch for '{baseline_id}'. Baseline: {baseline_img.size}, Current: {current_img.size}." logger.error(size_mismatch_msg) # Save current image for debugging ts = time.strftime("%Y%m%d_%H%M%S") current_img_path = os.path.join("output", f"visual_fail_{baseline_id}_current_{ts}.png") current_img.save(current_img_path) logger.info(f"Saved current image (dimension mismatch) to: {current_img_path}") raise AssertionError(size_mismatch_msg) # Fail the assertion # 4. Pixel Comparison img_diff = Image.new("RGBA", baseline_img.size) # Image to store diff pixels try: mismatched_pixels = pixelmatch(baseline_img, current_img, img_diff, includeAA=True, threshold=0.1) # Use default pixelmatch threshold first except Exception as pm_error: logger.error(f"Error during pixelmatch comparison for '{baseline_id}': {pm_error}", exc_info=True) raise RuntimeError(f"Pixelmatch library error: {pm_error}") from pm_error total_pixels = baseline_img.width * baseline_img.height diff_ratio = mismatched_pixels / total_pixels if total_pixels > 0 else 0 logger.info(f"Pixel comparison for '{baseline_id}': Mismatched Pixels = {mismatched_pixels}, Total Pixels = {total_pixels}, Difference = {diff_ratio*100:.4f}%") # 5. Check against threshold pixel_match_passed = diff_ratio <= step_threshold llm_reasoning = None diff_image_path = None if pixel_match_passed: logger.info(f"✅ Visual Assertion PASSED (Pixel Diff <= Threshold) for '{baseline_id}'.") # Step completed successfully else: logger.warning(f"Visual Assertion: Pixel difference ({diff_ratio*100:.4f}%) exceeds threshold ({step_threshold*100:.2f}%) for '{baseline_id}'.") # Save diff image regardless of LLM outcome ts = time.strftime("%Y%m%d_%H%M%S") diff_image_path = os.path.join("output", f"visual_diff_{baseline_id}_{ts}.png") try: img_diff.save(diff_image_path) logger.info(f"Saved pixel difference image to: {diff_image_path}") except Exception as save_err: logger.error(f"Failed to save diff image: {save_err}") diff_image_path = None # Mark as failed # 6. LLM Fallback if use_llm and self.llm_client: logger.info(f"Attempting LLM visual comparison fallback for '{baseline_id}'...") baseline_bytes = io.BytesIO() baseline_img.save(baseline_bytes, format='PNG') baseline_bytes = baseline_bytes.getvalue() # --- UPDATED LLM PROMPT for Stitched Image --- llm_prompt = f"""Analyze the combined image provided below for the purpose of automated software testing. The LEFT half (labeled '1: Baseline') is the established baseline screenshot. The RIGHT half (labeled '2: Current') is the current state screenshot. Compare these two halves to determine if they are SEMANTICALLY equivalent from a user's perspective. IGNORE minor differences like: - Anti-aliasing variations - Single-pixel shifts - Tiny rendering fluctuations - Small, insignificant dynamic content changes (e.g., blinking cursors, exact timestamps if not the focus). FOCUS ON significant differences like: - Layout changes (elements moved, resized, missing, added) - Major color changes of key elements - Text content changes (errors, different labels, etc.) - Missing or fundamentally different images/icons. Baseline ID: "{baseline_id}" Captured URL (Baseline): "{baseline_meta.get('url_captured', 'N/A')}" Selector (Baseline): "{baseline_meta.get('selector_captured', 'Full Page')}" Based on these criteria, are the two halves (baseline vs. current) functionally and visually equivalent enough to PASS a visual regression test? Respond ONLY with "YES" or "NO", followed by a brief explanation justifying your answer by referencing differences between the left and right halves. Example YES: YES - The left (baseline) and right (current) images are visually equivalent. Minor text rendering differences are ignored. Example NO: NO - The primary call-to-action button visible on the left (baseline) is missing on the right (current). """ # --- END UPDATED PROMPT --- try: # No change here, compare_images handles the stitching internally llm_response = compare_images(llm_prompt, baseline_bytes, current_screenshot_bytes, self.llm_client) logger.info(f"LLM visual comparison response for '{baseline_id}': {llm_response}") llm_reasoning = llm_response # Store reasoning if llm_response.strip().upper().startswith("YES"): logger.info(f"✅ Visual Assertion PASSED (LLM Override) for '{baseline_id}'.") pixel_match_passed = True # Override pixel result elif llm_response.strip().upper().startswith("NO"): logger.warning(f"Visual Assertion: LLM confirmed significant difference for '{baseline_id}'.") pixel_match_passed = False # Confirm failure else: logger.warning(f"Visual Assertion: LLM response unclear for '{baseline_id}'. Treating as failure.") pixel_match_passed = False except Exception as llm_err: logger.error(f"LLM visual comparison failed: {llm_err}", exc_info=True) llm_reasoning = f"LLM Error: {llm_err}" pixel_match_passed = False # Treat LLM error as failure else: # LLM fallback not enabled or LLM not available logger.warning(f"Visual Assertion: LLM fallback skipped for '{baseline_id}'. Failing based on pixel difference.") pixel_match_passed = False # 7. Handle Final Failure if not pixel_match_passed: failure_msg = f"Visual Assertion Failed for '{baseline_id}'. Pixel diff: {diff_ratio*100:.4f}% (Threshold: {step_threshold*100:.2f}%)." if llm_reasoning: failure_msg += f" LLM Reason: {llm_reasoning}" logger.error(failure_msg) # Add details to run_status before raising visual_failure_details = { "baseline_id": baseline_id, "pixel_difference_ratio": diff_ratio, "pixel_threshold": step_threshold, "mismatched_pixels": mismatched_pixels, "diff_image_path": diff_image_path, "llm_reasoning": llm_reasoning } # We need to store this somewhere accessible when raising the final error # Let's add it directly to the step dict temporarily? Or a dedicated failure context? # For now, log it and include basics in the AssertionError run_status["visual_failure_details"] = visual_failure_details # Add to main run status raise AssertionError(failure_msg) # Fail the step visual_result = { "step_id": step_id, "baseline_id": baseline_id, "status": "PASS" if pixel_match_passed else "FAIL", "pixel_difference_ratio": diff_ratio, "mismatched_pixels": mismatched_pixels, "pixel_threshold": step_threshold, "llm_override": use_llm and not pixel_match_passed and llm_response.strip().upper().startswith("YES") if 'llm_response' in locals() else False, "llm_reasoning": llm_reasoning, "diff_image_path": diff_image_path, "element_selector": element_selector } run_status["visual_assertion_results"].append(visual_result) elif action == "assert_passed_verification" or action == "assert_llm_verification": if not self.llm_client: raise PlaywrightError("LLMClient not available for vision-based verification step.") if not description: raise ValueError("Missing 'description' field for 'assert_passed_verification' step.") if not self.browser_controller: raise PlaywrightError("BrowserController not available for state gathering.") logger.info("Performing vision-based verification with DOM context...") # --- Gather Context --- screenshot_bytes = self.browser_controller.take_screenshot() current_url = self.browser_controller.get_current_url() dom_context_str = "DOM context could not be retrieved." # Default try: dom_state = self.browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1) # No highlight during execution verification if dom_state and dom_state.element_tree: # Use 'verification' purpose for potentially richer context dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification') else: logger.warning("Failed to get valid DOM state for vision verification.") except Exception as dom_err: logger.error(f"Error getting DOM context for vision verification: {dom_err}", exc_info=True) # -------------------- if not screenshot_bytes: raise PlaywrightError("Failed to capture screenshot for vision verification.") prompt = f"""Analyze the provided webpage screenshot AND the accompanying HTML context. The goal during testing was to verify the following condition: "{description}" Current URL: {current_url} HTML Context (Visible elements, interactive elements marked with `[index]`, static with `(Static)`): ```html {dom_context_str} ``` Based on BOTH the visual evidence in the screenshot AND the HTML context (Prioritize html context more as screenshot will have some delay from when it was asked and when it was taken), is the verification condition "{description}" currently met? If you think due to the delay in html AND screenshot, state might have changed from where the condition was met, then also respond with YES IMPORTANT: Consider that elements might be in a loading state (e.g., placeholders described) OR a fully loaded state (e.g., actual images shown visually). If the current state reasonably fulfills the ultimate goal implied by the description (even if the exact visual differs due to loading, like placeholders becoming images), respond YES. Respond with only "YES" or "NO", followed by a brief explanation justifying your answer using evidence from the screenshot and/or HTML context. Example Response (Success): YES - The 'Welcome, User!' message [Static id='s15'] is visible in the HTML and visually present at the top of the screenshot. Example Response (Failure): NO - The HTML context shows an error message element [12] and the screenshot visually confirms the 'Invalid credentials' error. Example Response (Success - Placeholder Intent): YES - The description asked for 5 placeholders, but the screenshot and HTML show 5 fully loaded images within the expected containers ('div.image-container'). This fulfills the intent of ensuring the 5 image sections are present and populated. """ llm_response = self.llm_client.generate_multimodal(prompt, screenshot_bytes) logger.debug(f"Vision verification LLM response: {llm_response}") if llm_response.strip().upper().startswith("YES"): logger.info("✅ Vision verification PASSED (with DOM context).") elif llm_response.strip().upper().startswith("NO"): logger.error(f"❌ Vision verification FAILED (with DOM context). LLM Reasoning: {llm_response}") raise AssertionError(f"Vision verification failed: Condition '{description}' not met. LLM Reason: {llm_response}") elif llm_response.startswith("Error:"): logger.error(f"❌ Vision verification FAILED due to LLM error: {llm_response}") raise PlaywrightError(f"Vision verification LLM error: {llm_response}") else: logger.error(f"❌ Vision verification FAILED due to unclear LLM response: {llm_response}") raise AssertionError(f"Vision verification failed: Unclear LLM response. Response: {llm_response}") # --- Add more actions/assertions as needed --- else: logger.warning(f"Unsupported action type '{action}' found in step {step_id}. Skipping.") # Optionally treat as failure: raise ValueError(f"Unsupported action: {action}") step_healed = True log_suffix = "" if current_healing_attempts > 0: # Store the selector that *worked* (which is current_selector) successful_healed_selector_for_step = current_selector log_suffix = f" (Healed after {current_healing_attempts} attempt(s) using selector '{current_selector}')" logger.info(f"Step {step_id} completed successfully{log_suffix}.") logger.info(f"Step {step_id} completed successfully.") # Optional wait after successful step execution if wait_after > 0: logger.debug(f"Waiting for {wait_after}s after step {step_id}...") time.sleep(wait_after) except (PlaywrightError, PlaywrightTimeoutError, ValueError, AssertionError) as e: # Catch Playwright errors, input errors, and assertion failures (from expect) last_error = e # Store the error error_type = type(e).__name__ error_msg = str(e) logger.warning(f"Attempt {current_healing_attempts + 1} for Step {step_id} failed. Error: {error_type}: {error_msg}") # --- Healing Decision Logic --- is_healable_error = isinstance(e, (PlaywrightTimeoutError, PlaywrightError)) and current_selector is not None # Refine healable conditions: # - Timeout finding/interacting with an element # - Element detached, not visible, not interactable (if selector exists) # - Exclude navigation errors, value errors from missing params, count mismatches if isinstance(e, ValueError) or (isinstance(e, AssertionError) and "count" in error_msg.lower()): is_healable_error = False if action == "navigate": is_healable_error = False if action == "assert_visual_match": is_healable_error = False can_attempt_healing = self.enable_healing and is_healable_error and current_healing_attempts < self.healing_retries_per_step if can_attempt_healing: logger.info(f"Attempting Healing (Mode: {self.healing_mode}) for Step {step_id}...") healing_success = False new_selector = None healing_log_entry = { "step_id": step_id, "attempt": current_healing_attempts + 1, "mode": self.healing_mode, "success": False, "original_selector": original_selector, "failed_selector": current_selector, "error": f"{error_type}: {error_msg}", "new_selector": None, "reasoning": None, } if self.healing_mode == 'soft': healing_success, new_selector, reasoning = self._attempt_soft_healing(step, current_selector, error_msg) healing_log_entry["new_selector"] = new_selector healing_log_entry["reasoning"] = reasoning if healing_success: logger.info(f"Soft healing successful for Step {step_id}. New selector: '{new_selector}'") current_selector = new_selector # Update selector for the next loop iteration healing_log_entry["success"] = True else: logger.warning(f"Soft healing failed for Step {step_id}. Reason: {reasoning}") # Let the loop proceed to final failure state below elif self.healing_mode == 'hard': logger.warning(f"Hard Healing triggered for Step {step_id} due to error: {error_msg}") if self.browser_controller: self.browser_controller.clear_console_messages() self.browser_controller.clear_network_requests() healing_log_entry["mode"] = "hard" # Log mode healing_log_entry["success"] = True # Mark attempt as 'successful' in triggering re-record self.healing_attempts_log.append(healing_log_entry) # Log before triggering self._trigger_hard_healing(feature_description, json_file_path) run_status["status"] = "HEALING_TRIGGERED" run_status["message"] = f"Hard Healing (re-recording) triggered on Step {step_id}." run_status["failed_step"] = step # Store the step that triggered it run_status["error_details"] = f"Hard healing triggered by {error_type}: {error_msg}" return run_status # Stop execution and return status self.healing_attempts_log.append(healing_log_entry) # Log soft healing attempt if healing_success: current_healing_attempts += 1 continue # Go to the next iteration of the while loop to retry with new selector else: # Soft healing failed, break the while loop to handle final failure current_healing_attempts = self.healing_retries_per_step + 1 else: # Healing not enabled, max attempts reached, or not a healable error logger.error(f"❌ Step {step_id} failed permanently. Healing skipped or failed.") raise last_error # Re-raise the last error to trigger final failure handling # --- End Healing Loop --- if successful_healed_selector_for_step: logger.info(f"Persisting healed selector for Step {step_id}: '{successful_healed_selector_for_step}'") # Modify the step in the IN-MEMORY list 'steps' if i < len(steps): # Check index boundary steps[i]['selector'] = successful_healed_selector_for_step any_step_successfully_healed = True run_status["healed_steps_count"] += 1 else: logger.error(f"Index {i} out of bounds for steps list while persisting healed selector for step {step_id}.") # If the while loop finished because max attempts were reached without success if not step_healed: logger.error(f"❌ Step {step_id} ('{description}') Failed definitively after {current_healing_attempts} attempt(s).") run_status["status"] = "FAIL" run_status["message"] = f"Test failed on step {step_id}: {description}" run_status["failed_step"] = step # Use the last captured error error_type = type(last_error).__name__ if last_error else "UnknownError" error_msg = str(last_error) if last_error else "Step failed after healing attempts." run_status["error_details"] = f"{error_type}: {error_msg}" if run_status["status"] == "FAIL" and step.get("action") == "assert_visual_match" and "visual_failure_details" in run_status: run_status["error_details"] += f"\nVisual Failure Details: {run_status['visual_failure_details']}" # Failure Handling (Screenshot/Logs) try: ts = time.strftime("%Y%m%d_%H%M%S") safe_test_name = re.sub(r'[^\w\-]+', '_', test_name)[:50] screenshot_path = os.path.join("output", f"failure_{safe_test_name}_step{step_id}_{ts}.png") if self.browser_controller and self.browser_controller.save_screenshot(screenshot_path): run_status["screenshot_on_failure"] = screenshot_path logger.info(f"Failure screenshot saved to: {screenshot_path}") if self.browser_controller: run_status["all_console_messages"] = self.browser_controller.get_console_messages() run_status["console_messages_on_failure"] = [ msg for msg in run_status["all_console_messages"] if msg['type'] in ['error', 'warning'] ][-5:] except Exception as fail_handle_e: logger.error(f"Error during failure handling: {fail_handle_e}") # Stop the entire test execution logger.info("Stopping test execution due to permanent step failure.") return run_status # Return immediately # If loop completes without breaking due to permanent failure logger.info("--- Setting final status to PASS ---") run_status["status"] = "PASS" run_status["message"] = "✅ Test executed successfully." if any_step_successfully_healed: run_status["message"] += f" ({run_status['healed_steps_count']} step(s) healed)." logger.info(run_status["message"]) except (FileNotFoundError, ValueError, json.JSONDecodeError) as e: logger.error(f"Error loading or parsing test file '{json_file_path}': {e}") run_status["message"] = f"Failed to load/parse test file: {e}" run_status["error_details"] = f"{type(e).__name__}: {str(e)}" # status is already FAIL by default except PlaywrightError as e: logger.critical(f"A Playwright error occurred during execution: {e}", exc_info=True) if run_status["error_details"] is None: # If this is the first detailed error run_status["message"] = f"Playwright error: {str(e)}" run_status["error_details"] = f"{type(e).__name__}: {str(e)}" run_status["status"] = "FAIL" # Ensure status is Fail except Exception as e: logger.critical(f"An unexpected error occurred during execution: {e}", exc_info=True) if run_status["error_details"] is None: # If this is the first detailed error run_status["message"] = f"Unexpected execution error: {str(e)}" run_status["error_details"] = f"{type(e).__name__}: {str(e)}" # Ensure error_details is set run_status["status"] = "FAIL" # Ensure status is Fail finally: logger.info("--- Ending Test Execution ---") if self.browser_controller: if self.get_network_requests: try: run_status["network_requests"] = self.browser_controller.get_network_requests() except: logger.error("Failed to retrieve final network requests.") # Performance timing is captured after navigation, check if it exists if run_status.get("performance_timing") is None and self.get_performance is not False: try: run_status["performance_timing"] = self.browser_controller.get_performance_timing() except: logger.error("Failed to retrieve final performance timing.") # Console messages captured on failure or here if "all_console_messages" not in run_status or not run_status["all_console_messages"]: try: run_status["all_console_messages"] = self.browser_controller.get_console_messages() except: logger.error("Failed to retrieve final console messages.") self.browser_controller.close() self.browser_controller = None self.page = None end_time = time.time() run_status["duration_seconds"] = round(end_time - start_time, 2) run_status["healing_attempts"] = self.healing_attempts_log if any_step_successfully_healed and run_status["status"] != "HEALING_TRIGGERED" and run_status["status"] == "PASS": # Save if healing occurred and not hard-healing try: logger.info(f"Saving updated test file with {run_status['healed_steps_count']} healed step(s) to: {json_file_path}") # modified_test_data should contain the updated steps list with open(json_file_path, 'w', encoding='utf-8') as f: json.dump(modified_test_data, f, indent=2, ensure_ascii=False) run_status["healed_file_saved"] = True logger.info(f"Successfully saved healed test file: {json_file_path}") # Adjust final message if test passed after healing if run_status["status"] == "PASS": run_status["message"] = f"✅ Test passed with {run_status['healed_steps_count']} step(s) healed. Updated test file saved." except Exception as save_err: logger.error(f"Failed to save healed test file '{json_file_path}': {save_err}", exc_info=True) run_status["healed_file_saved"] = False # Add warning to message if save failed if run_status["status"] == "PASS": run_status["message"] += " (Warning: Failed to save healed selectors)" logger.info(f"Execution finished in {run_status['duration_seconds']:.2f} seconds. Status: {run_status['status']}") return run_status

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/GroundNG/QA-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server