from dataclasses import dataclass, field
from typing import Any
from bubus import BaseEvent
from pydantic import BaseModel
from browser_use.dom.views import DOMInteractedElement, SerializedDOMState
# Known placeholder image data for about:blank pages - a 4x4 white PNG
PLACEHOLDER_4PX_SCREENSHOT = (
'iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAFElEQVR4nGP8//8/AwwwMSAB3BwAlm4DBfIlvvkAAAAASUVORK5CYII='
)
# Pydantic
class TabInfo(BaseModel):
"""Represents information about a browser tab"""
# Original fields
page_id: int
url: str
title: str
parent_page_id: int | None = None # parent page that contains this popup or cross-origin iframe
# Additional fields for compatibility with dictionary format
id: str | None = None # tab ID string like "tab_0"
index: int | None = None # tab index
class PageInfo(BaseModel):
"""Comprehensive page size and scroll information"""
# Current viewport dimensions
viewport_width: int
viewport_height: int
# Total page dimensions
page_width: int
page_height: int
# Current scroll position
scroll_x: int
scroll_y: int
# Calculated scroll information
pixels_above: int
pixels_below: int
pixels_left: int
pixels_right: int
# Page statistics are now computed dynamically instead of stored
@dataclass
class BrowserStateSummary:
"""The summary of the browser's current state designed for an LLM to process"""
# provided by SerializedDOMState:
dom_state: SerializedDOMState
url: str
title: str
tabs: list[TabInfo]
screenshot: str | None = field(default=None, repr=False)
page_info: PageInfo | None = None # Enhanced page information
# Keep legacy fields for backward compatibility
pixels_above: int = 0
pixels_below: int = 0
browser_errors: list[str] = field(default_factory=list)
is_pdf_viewer: bool = False # Whether the current page is a PDF viewer
recent_events: str | None = None # Text summary of recent browser events
@dataclass
class BrowserStateHistory:
"""The summary of the browser's state at a past point in time to usse in LLM message history"""
url: str
title: str
tabs: list[TabInfo]
interacted_element: list[DOMInteractedElement | None] | list[None]
screenshot_path: str | None = None
def get_screenshot(self) -> str | None:
"""Load screenshot from disk and return as base64 string"""
if not self.screenshot_path:
return None
import base64
from pathlib import Path
path_obj = Path(self.screenshot_path)
if not path_obj.exists():
return None
try:
with open(path_obj, 'rb') as f:
screenshot_data = f.read()
return base64.b64encode(screenshot_data).decode('utf-8')
except Exception:
return None
def to_dict(self) -> dict[str, Any]:
data = {}
data['tabs'] = [tab.model_dump() for tab in self.tabs]
data['screenshot_path'] = self.screenshot_path
data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element]
data['url'] = self.url
data['title'] = self.title
return data
class BrowserError(Exception):
"""Base class for all browser errors"""
message: str
details: dict[str, Any] | None = None
while_handling_event: BaseEvent[Any] | None = None
def __init__(self, message: str, details: dict[str, Any] | None = None, event: BaseEvent[Any] | None = None):
self.message = message
super().__init__(message)
self.details = details
self.while_handling_event = event
def __str__(self) -> str:
if self.details:
return f'{self.message} ({self.details}) during: {self.while_handling_event}'
else:
return f'{self.message} (while handling event: {self.while_handling_event})'
class URLNotAllowedError(BrowserError):
"""Error raised when a URL is not allowed"""