auth_agent.py•11.3 kB
# File: record_auth_state_selectors.py
import time
import os
import logging
import getpass
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field
from patchright.sync_api import Error as PlaywrightError, TimeoutError as PlaywrightTimeoutError
# Import necessary components from your project structure
from ..browser.browser_controller import BrowserController
from ..llm.llm_client import LLMClient # Assuming you have this initialized
from ..dom.views import DOMState # To type hint DOM state
# Configure basic logging for this script
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Generic descriptions for LLM to find elements
USERNAME_FIELD_DESC = "the username input field"
PASSWORD_FIELD_DESC = "the password input field"
SUBMIT_BUTTON_DESC = "the login or submit button"
# Element to verify login success
LOGIN_SUCCESS_SELECTOR_DESC = "the logout button or link" # Description for verification element
# --- Output file path ---
AUTH_STATE_FILE = "auth_state.json"
# ---------------------
# --- Pydantic Schema for LLM Selector Response ---
class LLMSelectorResponse(BaseModel):
    selector: Optional[str] = Field(..., description="The best CSS selector found for the described element, or null if not found/identifiable.")
    reasoning: str = Field(..., description="Explanation for the chosen selector or why none was found.")
# -----------------------------------------------
# --- Helper Function to Find Selector via LLM ---
def find_element_selector_via_llm(
    llm_client: LLMClient,
    element_description: str,
    dom_state: Optional[DOMState],
    page: Any # Playwright Page object for validation
) -> Optional[str]:
    """
    Uses LLM to find a selector for a described element based on DOM context.
    Validates the selector before returning.
    """
    if not llm_client:
        logger.error("LLMClient is not available.")
        return None
    if not dom_state or not dom_state.element_tree:
        logger.error(f"Cannot find selector for '{element_description}': DOM state is not available.")
        return None
    try:
        dom_context_str, _ = dom_state.element_tree.generate_llm_context_string(context_purpose='verification')
        current_url = page.url if page else "Unknown"
        prompt = f"""
You are an AI assistant identifying CSS selectors for web automation.
Based on the following HTML context and the element description, provide the most robust CSS selector.
**Current URL:** {current_url}
**Element to Find:** "{element_description}"
**HTML Context (Visible elements, interactive `[index]`, static `(Static)`):**
```html
{dom_context_str}
\```
**Your Task:**
1. Analyze the HTML context to find the single element that best matches the description "{element_description}".
2. Provide the most stable and specific CSS selector for that element. Prioritize IDs, unique data attributes (like data-testid), or name attributes. Avoid relying solely on text or highly dynamic classes if possible.
3. If no suitable element is found, return null for the selector.
**Output Format:** Respond ONLY with a JSON object matching the following schema:
```json
{{
  "selector": "YOUR_SUGGESTED_CSS_SELECTOR_OR_NULL",
  "reasoning": "Explain your choice or why none was found."
}}
\```
"""
        logger.debug(f"Sending prompt to LLM to find selector for: '{element_description}'")
        response_obj = llm_client.generate_json(LLMSelectorResponse, prompt)
        if isinstance(response_obj, LLMSelectorResponse):
            selector = response_obj.selector
            reasoning = response_obj.reasoning
            if selector:
                logger.info(f"LLM suggested selector '{selector}' for '{element_description}'. Reasoning: {reasoning}")
                # --- Validate Selector ---
                try:
                    handles = page.query_selector_all(selector)
                    count = len(handles)
                    if count == 1:
                        logger.info(f"✅ Validation PASSED: Selector '{selector}' uniquely found the element.")
                        return selector
                    elif count > 1:
                        logger.warning(f"⚠️ Validation WARNING: Selector '{selector}' matched {count} elements. Using the first one.")
                        return selector # Still return it, maybe it's okay
                    else: # count == 0
                        logger.error(f"❌ Validation FAILED: Selector '{selector}' did not find any elements.")
                        return None
                except Exception as validate_err:
                    logger.error(f"❌ Validation ERROR for selector '{selector}': {validate_err}")
                    return None
                # --- End Validation ---
            else:
                logger.error(f"LLM could not find a selector for '{element_description}'. Reasoning: {reasoning}")
                return None
        elif isinstance(response_obj, str): # LLM Error string
             logger.error(f"LLM returned an error finding selector for '{element_description}': {response_obj}")
             return None
        else:
            logger.error(f"Unexpected response type from LLM finding selector for '{element_description}': {type(response_obj)}")
            return None
    except Exception as e:
        logger.error(f"Error during LLM selector identification for '{element_description}': {e}", exc_info=True)
        return None
# --- End Helper Function ---
# --- Main Function ---
def record_selectors_and_save_auth_state(llm_client: LLMClient, login_url: str, auth_state_file: str = AUTH_STATE_FILE):
    """
    Uses LLM to find login selectors, gets credentials securely, performs login,
    and saves the authentication state.
    """
    logger.info("--- Authentication State Generation (Recorder-Assisted Selectors) ---")
    if not login_url:
        logger.error(f"Login url not provided. Exiting...")
        return False
    
    # Get credentials securely first
    try:
        username = input(f"Enter username (will be visible): ")
        if not username: raise ValueError("Username cannot be empty.")
        password = getpass.getpass(f"Enter password for '{username}' (input will be hidden): ")
        if not password: raise ValueError("Password cannot be empty.")
    except (EOFError, ValueError) as e:
        logger.error(f"\n❌ Input error: {e}. Aborting.")
        return False
    except Exception as e:
        logger.error(f"\n❌ Error reading input: {e}")
        return False
    logger.info("Initializing BrowserController (visible browser)...")
    # Must run non-headless for user interaction/visibility AND selector validation
    browser_controller = BrowserController(headless=False)
    final_success = False
    try:
        browser_controller.start()
        page = browser_controller.page
        if not page: raise RuntimeError("Failed to initialize browser page.")
        logger.info(f"Navigating browser to login page: {login_url}")
        browser_controller.goto(login_url)
        logger.info("Attempting to identify login form selectors using LLM...")
        # Give the page a moment to settle before getting DOM
        time.sleep(1)
        dom_state = browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1)
        # Find Selectors using the helper function
        username_selector = find_element_selector_via_llm(llm_client, USERNAME_FIELD_DESC, dom_state, page)
        if not username_selector: return False # Abort if not found
        password_selector = find_element_selector_via_llm(llm_client, PASSWORD_FIELD_DESC, dom_state, page)
        if not password_selector: return False
        submit_selector = find_element_selector_via_llm(llm_client, SUBMIT_BUTTON_DESC, dom_state, page)
        if not submit_selector: return False
        logger.info("Successfully identified all necessary login selectors.")
        logger.info(f"  Username Field: '{username_selector}'")
        logger.info(f"  Password Field: '{password_selector}'")
        logger.info(f"  Submit Button:  '{submit_selector}'")
        input("\n-> Press Enter to proceed with login using these selectors and your credentials...")
        # --- Execute Login (using identified selectors and secure credentials) ---
        logger.info(f"Typing username into: {username_selector}")
        browser_controller.type(username_selector, username)
        time.sleep(0.3)
        logger.info(f"Typing password into: {password_selector}")
        browser_controller.type(password_selector, password)
        time.sleep(0.3)
        logger.info(f"Clicking submit button: {submit_selector}")
        browser_controller.click(submit_selector)
        # --- Verify Login Success ---
        logger.info("Attempting to identify login success element selector using LLM...")
        # Re-fetch DOM state after potential page change/update
        time.sleep(1) # Wait briefly for page update
        post_login_dom_state = browser_controller.get_structured_dom(highlight_all_clickable_elements=False, viewport_expansion=-1)
        login_success_selector = find_element_selector_via_llm(llm_client, LOGIN_SUCCESS_SELECTOR_DESC, post_login_dom_state, page)
        if not login_success_selector:
            logger.error("❌ Login Verification Failed: Could not identify the confirmation element via LLM.")
            raise RuntimeError("Failed to identify login confirmation element.") # Treat as failure
        logger.info(f"Waiting for login confirmation element ({login_success_selector}) to appear...")
        try:
            page.locator(login_success_selector).wait_for(state="visible", timeout=15000)
            logger.info("✅ Login successful! Confirmation element found.")
        except PlaywrightTimeoutError:
            logger.error(f"❌ Login Failed: Confirmation element '{login_success_selector}' did not appear within timeout.")
            raise # Re-raise to be caught by the main handler
        # --- Save the storage state ---
        if browser_controller.context:
            logger.info(f"Saving authentication state to {auth_state_file}...")
            browser_controller.context.storage_state(path=auth_state_file)
            logger.info(f"✅ Successfully saved authentication state.")
            final_success = True
        else:
            logger.error("❌ Cannot save state: Browser context is not available.")
    except (PlaywrightError, ValueError, RuntimeError) as e:
        logger.error(f"❌ An error occurred: {type(e).__name__}: {e}", exc_info=False)
        if browser_controller and browser_controller.page:
            ts = time.strftime("%Y%m%d_%H%M%S")
            fail_path = f"output/record_auth_error_{ts}.png"
            browser_controller.save_screenshot(fail_path)
            logger.info(f"Saved error screenshot to: {fail_path}")
    except Exception as e:
        logger.critical(f"❌ An unexpected critical error occurred: {e}", exc_info=True)
    finally:
        logger.info("Closing browser...")
        if browser_controller:
            browser_controller.close()
    return final_success
# --- End Main Function ---