Browser Use Heroku

prompts.py•11.5 KiB

import importlib.resources from datetime import datetime from typing import TYPE_CHECKING, Literal, Optional from browser_use.llm.messages import ContentPartImageParam, ContentPartTextParam, ImageURL, SystemMessage, UserMessage from browser_use.observability import observe_debug from browser_use.utils import is_new_tab_page if TYPE_CHECKING: from browser_use.agent.views import AgentStepInfo from browser_use.browser.views import BrowserStateSummary from browser_use.filesystem.file_system import FileSystem class SystemPrompt: def __init__( self, action_description: str, max_actions_per_step: int = 10, override_system_message: str | None = None, extend_system_message: str | None = None, use_thinking: bool = True, flash_mode: bool = False, ): self.default_action_description = action_description self.max_actions_per_step = max_actions_per_step self.use_thinking = use_thinking self.flash_mode = flash_mode prompt = '' if override_system_message: prompt = override_system_message else: self._load_prompt_template() prompt = self.prompt_template.format(max_actions=self.max_actions_per_step) if extend_system_message: prompt += f'\n{extend_system_message}' self.system_message = SystemMessage(content=prompt, cache=True) def _load_prompt_template(self) -> None: """Load the prompt template from the markdown file.""" try: # Choose the appropriate template based on flash_mode and use_thinking settings if self.flash_mode: template_filename = 'system_prompt_flash.md' elif self.use_thinking: template_filename = 'system_prompt.md' else: template_filename = 'system_prompt_no_thinking.md' # This works both in development and when installed as a package with importlib.resources.files('browser_use.agent').joinpath(template_filename).open('r', encoding='utf-8') as f: self.prompt_template = f.read() except Exception as e: raise RuntimeError(f'Failed to load system prompt template: {e}') def get_system_message(self) -> SystemMessage: """ Get the system prompt for the agent. Returns: SystemMessage: Formatted system prompt """ return self.system_message # Functions: # {self.default_action_description} # Example: # {self.example_response()} # Your AVAILABLE ACTIONS: # {self.default_action_description} class AgentMessagePrompt: vision_detail_level: Literal['auto', 'low', 'high'] def __init__( self, browser_state_summary: 'BrowserStateSummary', file_system: 'FileSystem', agent_history_description: str | None = None, read_state_description: str | None = None, task: str | None = None, include_attributes: list[str] | None = None, step_info: Optional['AgentStepInfo'] = None, page_filtered_actions: str | None = None, max_clickable_elements_length: int = 40000, sensitive_data: str | None = None, available_file_paths: list[str] | None = None, screenshots: list[str] | None = None, vision_detail_level: Literal['auto', 'low', 'high'] = 'auto', include_recent_events: bool = False, ): self.browser_state: 'BrowserStateSummary' = browser_state_summary self.file_system: 'FileSystem | None' = file_system self.agent_history_description: str | None = agent_history_description self.read_state_description: str | None = read_state_description self.task: str | None = task self.include_attributes = include_attributes self.step_info = step_info self.page_filtered_actions: str | None = page_filtered_actions self.max_clickable_elements_length: int = max_clickable_elements_length self.sensitive_data: str | None = sensitive_data self.available_file_paths: list[str] | None = available_file_paths self.screenshots = screenshots or [] self.vision_detail_level = vision_detail_level self.include_recent_events = include_recent_events assert self.browser_state @observe_debug(ignore_input=True, ignore_output=True, name='_get_browser_state_description') def _get_browser_state_description(self) -> str: elements_text = self.browser_state.dom_state.llm_representation(include_attributes=self.include_attributes) if len(elements_text) > self.max_clickable_elements_length: elements_text = elements_text[: self.max_clickable_elements_length] truncated_text = f' (truncated to {self.max_clickable_elements_length} characters)' else: truncated_text = '' has_content_above = (self.browser_state.pixels_above or 0) > 0 has_content_below = (self.browser_state.pixels_below or 0) > 0 # Enhanced page information for the model page_info_text = '' if self.browser_state.page_info: pi = self.browser_state.page_info # Compute page statistics dynamically pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0 pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0 total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0 current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1) page_info_text = f'Page info: {pi.viewport_width}x{pi.viewport_height}px viewport, {pi.page_width}x{pi.page_height}px total page size, {pages_above:.1f} pages above, {pages_below:.1f} pages below, {total_pages:.1f} total pages, at {current_page_position:.0%} of page' if elements_text != '': if has_content_above: if self.browser_state.page_info: pi = self.browser_state.page_info pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0 elements_text = f'... {self.browser_state.pixels_above} pixels above ({pages_above:.1f} pages) - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' else: elements_text = f'... {self.browser_state.pixels_above} pixels above - scroll to see more or extract structured data if you are looking for specific information ...\n{elements_text}' else: elements_text = f'[Start of page]\n{elements_text}' if has_content_below: if self.browser_state.page_info: pi = self.browser_state.page_info pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0 elements_text = f'{elements_text}\n... {self.browser_state.pixels_below} pixels below ({pages_below:.1f} pages) - scroll to see more or extract structured data if you are looking for specific information ...' else: elements_text = f'{elements_text}\n... {self.browser_state.pixels_below} pixels below - scroll to see more or extract structured data if you are looking for specific information ...' else: elements_text = f'{elements_text}\n[End of page]' else: elements_text = 'empty page' tabs_text = '' current_tab_candidates = [] # Find tabs that match both URL and title to identify current tab more reliably for tab in self.browser_state.tabs: if tab.url == self.browser_state.url and tab.title == self.browser_state.title: current_tab_candidates.append(tab.page_id) # If we have exactly one match, mark it as current # Otherwise, don't mark any tab as current to avoid confusion current_tab_index = current_tab_candidates[0] if len(current_tab_candidates) == 1 else None for tab in self.browser_state.tabs: tabs_text += f'Tab {tab.page_id}: {tab.url} - {tab.title[:30]}\n' current_tab_text = f'Current tab: {current_tab_index}' if current_tab_index is not None else '' # Check if current page is a PDF viewer and add appropriate message pdf_message = '' if self.browser_state.is_pdf_viewer: pdf_message = 'PDF viewer cannot be rendered. In this page, DO NOT use the extract_structured_data action as PDF content cannot be rendered. Use the read_file action on the downloaded PDF in available_file_paths to read the full content.\n\n' # Add recent events if available and requested recent_events_text = '' if self.include_recent_events and self.browser_state.recent_events: recent_events_text = f'Recent browser events: {self.browser_state.recent_events}\n' browser_state = f"""{current_tab_text} Available tabs: {tabs_text} {page_info_text} {recent_events_text}{pdf_message}Interactive elements from top layer of the current page inside the viewport{truncated_text}: {elements_text} """ return browser_state def _get_agent_state_description(self) -> str: if self.step_info: step_info_description = f'Step {self.step_info.step_number + 1} of {self.step_info.max_steps} max possible steps\n' else: step_info_description = '' time_str = datetime.now().strftime('%Y-%m-%d %H:%M') step_info_description += f'Current date and time: {time_str}' _todo_contents = self.file_system.get_todo_contents() if self.file_system else '' if not len(_todo_contents): _todo_contents = '[Current todo.md is empty, fill it with your plan when applicable]' agent_state = f""" <user_request> {self.task} </user_request> <file_system> {self.file_system.describe() if self.file_system else 'No file system available'} </file_system> <todo_contents> {_todo_contents} </todo_contents> """ if self.sensitive_data: agent_state += f'<sensitive_data>\n{self.sensitive_data}\n</sensitive_data>\n' agent_state += f'<step_info>\n{step_info_description}\n</step_info>\n' if self.available_file_paths: agent_state += '<available_file_paths>\n' + '\n'.join(self.available_file_paths) + '\n</available_file_paths>\n' return agent_state @observe_debug(ignore_input=True, ignore_output=True, name='get_user_message') def get_user_message(self, use_vision: bool = True) -> UserMessage: """Get complete state as a single cached message""" # Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab if ( is_new_tab_page(self.browser_state.url) and self.step_info is not None and self.step_info.step_number == 0 and len(self.browser_state.tabs) == 1 ): use_vision = False # Build complete state description state_description = ( '<agent_history>\n' + (self.agent_history_description.strip('\n') if self.agent_history_description else '') + '\n</agent_history>\n' ) state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n' state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n' # Only add read_state if it has content read_state_description = self.read_state_description.strip('\n').strip() if self.read_state_description else '' if read_state_description: state_description += '<read_state>\n' + read_state_description + '\n</read_state>\n' if self.page_filtered_actions: state_description += '<page_specific_actions>\n' state_description += self.page_filtered_actions + '\n' state_description += '</page_specific_actions>\n' if use_vision is True and self.screenshots: # Start with text description content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=state_description)] # Add screenshots with labels for i, screenshot in enumerate(self.screenshots): if i == len(self.screenshots) - 1: label = 'Current screenshot:' else: # Use simple, accurate labeling since we don't have actual step timing info label = 'Previous screenshot:' # Add label as text content content_parts.append(ContentPartTextParam(text=label)) # Add the screenshot content_parts.append( ContentPartImageParam( image_url=ImageURL( url=f'data:image/png;base64,{screenshot}', media_type='image/png', detail=self.vision_detail_level, ), ) ) return UserMessage(content=content_parts, cache=True) return UserMessage(content=state_description, cache=True)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dsouza-anush/browser-use-heroku'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

prompts.py•11.5 KiB