views.py•18.1 kB
# /src/dom/views.py
from dataclasses import dataclass, field, KW_ONLY # Use field for default_factory
from functools import cached_property
from typing import TYPE_CHECKING, Dict, List, Optional, Union, Literal, Tuple
import re # Added for selector generation
# Use relative imports if within the same package structure
from .history.view import CoordinateSet, HashedDomElement, ViewportInfo # Adjusted import
# Placeholder decorator if not using utils.time_execution_sync
def time_execution_sync(label):
def decorator(func):
def wrapper(*args, **kwargs):
# Basic logging
# logger.debug(f"Executing {label}...")
result = func(*args, **kwargs)
# logger.debug(f"Finished {label}.")
return result
return wrapper
return decorator
# Avoid circular import issues
if TYPE_CHECKING:
# This creates a forward reference issue if DOMElementNode itself is in this file.
# We need to define DOMElementNode before DOMBaseNode if DOMBaseNode references it.
# Let's adjust the structure slightly or use string hints.
pass # Forward reference handled by structure/string hints below
@dataclass(frozen=False)
class DOMBaseNode:
# Parent needs to be Optional and potentially use string hint if defined later
parent: Optional['DOMElementNode'] = None # Default to None
is_visible: bool = False # Provide default
@dataclass(frozen=False)
class DOMTextNode(DOMBaseNode):
# --- Field ordering within subclass matters less with KW_ONLY ---
# --- but arguments after the marker MUST be passed by keyword ---
_ : KW_ONLY # <--- Add KW_ONLY marker
# Fields defined in this class (now keyword-only)
text: str
type: str = 'TEXT_NODE'
def has_parent_with_highlight_index(self) -> bool:
current = self.parent
while current is not None:
if current.highlight_index is not None:
return True
current = current.parent
return False
# These visibility checks might be less useful now that JS handles it, but keep for now
def is_parent_in_viewport(self) -> bool:
if self.parent is None:
return False
return self.parent.is_in_viewport
def is_parent_top_element(self) -> bool:
if self.parent is None:
return False
return self.parent.is_top_element
# Define DOMElementNode *before* DOMBaseNode references it fully, or ensure Optional['DOMElementNode'] works
@dataclass(frozen=False)
class DOMElementNode(DOMBaseNode):
"""
Represents an element node in the processed DOM tree.
Includes information about interactivity, visibility, and structure.
"""
tag_name: str = ""
xpath: str = ""
attributes: Dict[str, str] = field(default_factory=dict)
# Use Union with string hint for forward reference if needed, or ensure DOMTextNode is defined first
children: List[Union['DOMElementNode', DOMTextNode]] = field(default_factory=list)
is_interactive: bool = False
is_top_element: bool = False
is_in_viewport: bool = False
shadow_root: bool = False
highlight_index: Optional[int] = None
page_coordinates: Optional[CoordinateSet] = None
viewport_coordinates: Optional[CoordinateSet] = None
viewport_info: Optional[ViewportInfo] = None
css_selector: Optional[str] = None # Added field for robust selector
def __repr__(self) -> str:
# ... (repr logic remains the same) ...
tag_str = f'<{self.tag_name}'
for key, value in self.attributes.items():
# Shorten long values in repr
value_repr = value if len(value) < 50 else value[:47] + '...'
tag_str += f' {key}="{value_repr}"'
tag_str += '>'
extras = []
if self.is_interactive: extras.append('interactive')
if self.is_top_element: extras.append('top')
if self.is_in_viewport: extras.append('in-viewport')
if self.shadow_root: extras.append('shadow-root')
if self.highlight_index is not None: extras.append(f'highlight:{self.highlight_index}')
if self.css_selector: extras.append(f'css:"{self.css_selector[:50]}..."') # Show generated selector
if extras:
tag_str += f' [{", ".join(extras)}]'
return tag_str
@cached_property
def hash(self) -> HashedDomElement:
""" Lazily computes and caches the hash of the element using HistoryTreeProcessor. """
# Use relative import within the method to avoid top-level circular dependencies
from .history.service import HistoryTreeProcessor
# Ensure HistoryTreeProcessor._hash_dom_element exists and is static or accessible
return HistoryTreeProcessor._hash_dom_element(self)
def get_all_text_till_next_clickable_element(self, max_depth: int = -1) -> str:
"""
Recursively collects all text content within this element, stopping descent
if a nested interactive element (with a highlight_index) is encountered.
"""
text_parts = []
def collect_text(node: Union['DOMElementNode', DOMTextNode], current_depth: int) -> None:
if max_depth != -1 and current_depth > max_depth:
return
# Check if the node itself is interactive and not the starting node
if isinstance(node, DOMElementNode) and node is not self and node.highlight_index is not None:
# Stop recursion down this path if we hit an interactive element
return
if isinstance(node, DOMTextNode):
# Only include visible text nodes
if node.is_visible:
text_parts.append(node.text)
elif isinstance(node, DOMElementNode):
# Recursively process children
for child in node.children:
collect_text(child, current_depth + 1)
# Start collection from the element itself
collect_text(self, 0)
# Join collected parts and clean up whitespace
return '\n'.join(filter(None, (tp.strip() for tp in text_parts))).strip()
@time_execution_sync('--clickable_elements_to_string')
def generate_llm_context_string(self,
include_attributes: Optional[List[str]] = None,
max_static_elements_action: int = 50, # Max static elements for action context
max_static_elements_verification: int = 150, # Allow more static elements for verification context
context_purpose: Literal['action', 'verification'] = 'action' # New parameter
) -> Tuple[str, Dict[str, 'DOMElementNode']]:
"""
Generates a string representation of VISIBLE elements tree for LLM context.
Clearly distinguishes interactive elements (with index) from static ones.
Assigns temporary IDs to static elements for later lookup.
Args:
include_attributes: List of specific attributes to include. If None, uses defaults.
max_static_elements_action: Max static elements for 'action' purpose.
max_static_elements_verification: Max static elements for 'verification' purpose.
context_purpose: 'action' (concise) or 'verification' (more inclusive static).
Returns:
Tuple containing:
- The formatted context string.
- A dictionary mapping temporary static IDs (e.g., "s1", "s2")
to the corresponding DOMElementNode objects.
"""
formatted_lines = []
processed_node_ids = set()
static_element_count = 0
nodes_processed_count = 0
static_id_counter = 1 # Counter for temporary static IDs
temp_static_id_map: Dict[str, 'DOMElementNode'] = {} # Map temporary ID to node
max_static_elements = max_static_elements_verification if context_purpose == 'verification' else max_static_elements_action
def get_direct_visible_text(node: DOMElementNode, max_len=10000) -> str:
"""Gets text directly within this node, ignoring children elements."""
texts = []
for child in node.children:
if isinstance(child, DOMTextNode) and child.is_visible:
texts.append(child.text.strip())
full_text = ' '.join(filter(None, texts))
if len(full_text) > max_len:
return full_text[:max_len-3] + "..."
return full_text
def get_parent_hint(node: DOMElementNode) -> Optional[str]:
"""Gets a hint string for the nearest identifiable parent."""
parent = node.parent
if isinstance(parent, DOMElementNode):
parent_attrs = parent.attributes
hint_parts = []
if parent_attrs.get('id'):
hint_parts.append(f"id=\"{parent_attrs['id'][:20]}\"") # Limit length
if parent_attrs.get('data-testid'):
hint_parts.append(f"data-testid=\"{parent_attrs['data-testid'][:20]}\"")
# Add class hint only if specific? Maybe too noisy. Start with id/testid.
# if parent_attrs.get('class'):
# stable_classes = [c for c in parent_attrs['class'].split() if len(c) > 3 and not c.isdigit()]
# if stable_classes: hint_parts.append(f"class=\"{stable_classes[0][:15]}...\"") # Show first stable class
if hint_parts:
return f"(inside: <{parent.tag_name} {' '.join(hint_parts)}>)"
return None
def process_node(node: Union['DOMElementNode', DOMTextNode], depth: int) -> None:
nonlocal static_element_count, nodes_processed_count, static_id_counter # Allow modification
# Skip if already processed or not an element
if not isinstance(node, DOMElementNode): return
nodes_processed_count += 1
node_id = id(node)
if node_id in processed_node_ids: return
processed_node_ids.add(node_id)
is_node_visible = node.is_visible
visibility_marker = "" if is_node_visible else " (Not Visible)"
should_add_current_node = False
line_to_add = ""
is_interactive = node.highlight_index is not None
temp_static_id_assigned = None # Track if ID was assigned to this node
indent = ' ' * depth
# --- Attribute Extraction (Common logic) ---
attributes_to_show = {}
default_attrs = ['id', 'name', 'class', 'aria-label', 'placeholder', 'role', 'type', 'value', 'title', 'alt', 'href', 'data-testid', 'data-value']
attrs_to_check = include_attributes if include_attributes else default_attrs
extract_attrs_for_this_node = is_interactive or (context_purpose == 'verification')
if extract_attrs_for_this_node:
for attr_key in attrs_to_check:
if attr_key in node.attributes and node.attributes[attr_key] is not None: # Check for not None
# Simple check to exclude extremely long class lists for brevity, unless it's ID/testid
if attr_key == 'class' and len(node.attributes[attr_key]) > 20 and context_purpose == 'action':
attributes_to_show[attr_key] = node.attributes[attr_key][:97] + "..."
else:
attributes_to_show[attr_key] = node.attributes[attr_key]
attrs_str = ""
if attributes_to_show:
parts = []
for key, value in attributes_to_show.items():
value_str = str(value) # Ensure it's a string
# Limit length for display
display_value = value_str if len(value_str) < 50 else value_str[:47] + '...'
# *** CORRECT HTML ESCAPING for attribute value strings ***
display_value = display_value.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
parts.append(f'{key}="{display_value}"')
attrs_str = " ".join(parts)
# --- Format line based on Interactive vs. Static ---
if is_interactive:
# == INTERACTIVE ELEMENT == (Always include)
text_content = node.get_all_text_till_next_clickable_element()
text_content = ' '.join(text_content.split()) if text_content else ""
# Truncate long text for display
if len(text_content) > 150: text_content = text_content[:147] + "..."
line_to_add = f"{indent}[{node.highlight_index}]<{node.tag_name}"
if attrs_str: line_to_add += f" {attrs_str}"
if text_content: line_to_add += f">{text_content}</{node.tag_name}>"
else: line_to_add += " />"
line_to_add += visibility_marker
should_add_current_node = True
elif static_element_count < max_static_elements:
# == VISIBLE STATIC ELEMENT ==
text_content = get_direct_visible_text(node)
include_this_static = False
# Determine if static node is relevant for verification
if context_purpose == 'verification':
common_static_tags = {'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'span', 'div', 'li', 'label', 'td', 'th', 'strong', 'em', 'dt', 'dd'}
# Include if common tag OR has text OR *has attributes calculated in attrs_str*
if node.tag_name in common_static_tags or text_content or attrs_str:
include_this_static = True
if not text_content:
include_this_static = False
if include_this_static:
# --- Assign temporary static ID ---
current_static_id = f"s{static_id_counter}"
temp_static_id_map[current_static_id] = node
temp_static_id_assigned = current_static_id # Mark that ID was assigned
static_id_counter += 1
# *** Start building the line ***
line_to_add = f"{indent}<{node.tag_name}"
# *** CRUCIAL: Add the calculated attributes string ***
if attrs_str:
line_to_add += f" {attrs_str}"
# --- Add the static ID attribute to the string ---
line_to_add += f' data-static-id="{current_static_id}"'
# *** Add the static marker ***
line_to_add += " (Static)"
line_to_add += visibility_marker
# *** Add parent hint ONLY if element lacks key identifiers ***
node_attrs = node.attributes # Use original attributes for this check
has_key_identifier = node_attrs.get('id') or node_attrs.get('data-testid') or node_attrs.get('name')
if not has_key_identifier:
parent_hint = get_parent_hint(node)
if parent_hint:
line_to_add += f" {parent_hint}"
# *** Add text content and close tag ***
if text_content:
line_to_add += f">{text_content}</{node.tag_name}>"
else:
line_to_add += " />"
should_add_current_node = True
static_element_count += 1
# --- Add the formatted line if needed ---
if should_add_current_node:
formatted_lines.append(line_to_add)
# logger.debug(f"Added line: {line_to_add}") # Optional debug
# --- ALWAYS Recurse into children (unless static limit hit) ---
# We recurse even if the parent wasn't added, because children might be visible/interactive
if static_element_count >= max_static_elements:
# Stop recursing down static branches if limit is hit
pass
else:
for child in node.children:
# Pass DOMElementNode or DOMTextNode directly
process_node(child, depth + 1)
# Start processing from the root element
process_node(self, 0)
# logger.debug(f"Finished generate_llm_context_string. Processed {nodes_processed_count} nodes. Added {len(formatted_lines)} lines.") # Log summary
output_str = '\n'.join(formatted_lines)
if static_element_count >= max_static_elements:
output_str += f"\n{ ' ' * 0 }... (Static element list truncated after {max_static_elements} entries)"
return output_str, temp_static_id_map
def get_file_upload_element(self, check_siblings: bool = True) -> Optional['DOMElementNode']:
# Check if current element is a file input
if self.tag_name == 'input' and self.attributes.get('type') == 'file':
return self
# Check children
for child in self.children:
if isinstance(child, DOMElementNode):
result = child.get_file_upload_element(check_siblings=False)
if result:
return result
# Check siblings only for the initial call
if check_siblings and self.parent:
for sibling in self.parent.children:
if sibling is not self and isinstance(sibling, DOMElementNode):
result = sibling.get_file_upload_element(check_siblings=False)
if result:
return result
return None
# Type alias for the selector map
SelectorMap = Dict[int, DOMElementNode]
@dataclass
class DOMState:
"""Holds the state of the processed DOM at a point in time."""
element_tree: DOMElementNode
selector_map: SelectorMap