Browser Use Heroku

gif.py•11.8 KiB

from __future__ import annotations import base64 import io import logging import os import platform from typing import TYPE_CHECKING from browser_use.agent.views import AgentHistoryList from browser_use.browser.views import PLACEHOLDER_4PX_SCREENSHOT from browser_use.config import CONFIG if TYPE_CHECKING: from PIL import Image, ImageFont logger = logging.getLogger(__name__) def decode_unicode_escapes_to_utf8(text: str) -> str: """Handle decoding any unicode escape sequences embedded in a string (needed to render non-ASCII languages like chinese or arabic in the GIF overlay text)""" if r'\u' not in text: # doesn't have any escape sequences that need to be decoded return text try: # Try to decode Unicode escape sequences return text.encode('latin1').decode('unicode_escape') except (UnicodeEncodeError, UnicodeDecodeError): # logger.debug(f"Failed to decode unicode escape sequences while generating gif text: {text}") return text def create_history_gif( task: str, history: AgentHistoryList, # output_path: str = 'agent_history.gif', duration: int = 3000, show_goals: bool = True, show_task: bool = True, show_logo: bool = False, font_size: int = 40, title_font_size: int = 56, goal_font_size: int = 44, margin: int = 40, line_spacing: float = 1.5, ) -> None: """Create a GIF from the agent's history with overlaid task and goal text.""" if not history.history: logger.warning('No history to create GIF from') return from PIL import Image, ImageFont images = [] # if history is empty, we can't create a gif if not history.history: logger.warning('No history to create GIF from') return # Get all screenshots from history (including None placeholders) screenshots = history.screenshots(return_none_if_not_screenshot=True) if not screenshots: logger.warning('No screenshots found in history') return # Find the first non-placeholder screenshot # A screenshot is considered a placeholder if: # 1. It's the exact 4px placeholder for about:blank pages, OR # 2. It comes from a new tab page (chrome://newtab/, about:blank, etc.) first_real_screenshot = None for screenshot in screenshots: if screenshot and screenshot != PLACEHOLDER_4PX_SCREENSHOT: first_real_screenshot = screenshot break if not first_real_screenshot: logger.warning('No valid screenshots found (all are placeholders or from new tab pages)') return # Try to load nicer fonts try: # Try different font options in order of preference # ArialUni is a font that comes with Office and can render most non-alphabet characters font_options = [ 'Microsoft YaHei', # 微软雅黑 'SimHei', # 黑体 'SimSun', # 宋体 'Noto Sans CJK SC', # 思源黑体 'WenQuanYi Micro Hei', # 文泉驿微米黑 'Helvetica', 'Arial', 'DejaVuSans', 'Verdana', ] font_loaded = False for font_name in font_options: try: if platform.system() == 'Windows': # Need to specify the abs font path on Windows font_name = os.path.join(CONFIG.WIN_FONT_DIR, font_name + '.ttf') regular_font = ImageFont.truetype(font_name, font_size) title_font = ImageFont.truetype(font_name, title_font_size) goal_font = ImageFont.truetype(font_name, goal_font_size) font_loaded = True break except OSError: continue if not font_loaded: raise OSError('No preferred fonts found') except OSError: regular_font = ImageFont.load_default() title_font = ImageFont.load_default() goal_font = regular_font # Load logo if requested logo = None if show_logo: try: logo = Image.open('./static/browser-use.png') # Resize logo to be small (e.g., 40px height) logo_height = 150 aspect_ratio = logo.width / logo.height logo_width = int(logo_height * aspect_ratio) logo = logo.resize((logo_width, logo_height), Image.Resampling.LANCZOS) except Exception as e: logger.warning(f'Could not load logo: {e}') # Create task frame if requested if show_task and task: # Find the first non-placeholder screenshot for the task frame first_real_screenshot = None for item in history.history: screenshot_b64 = item.state.get_screenshot() if screenshot_b64 and screenshot_b64 != PLACEHOLDER_4PX_SCREENSHOT: first_real_screenshot = screenshot_b64 break if first_real_screenshot: task_frame = _create_task_frame( task, first_real_screenshot, title_font, # type: ignore regular_font, # type: ignore logo, line_spacing, ) images.append(task_frame) else: logger.warning('No real screenshots found for task frame, skipping task frame') # Process each history item with its corresponding screenshot for i, (item, screenshot) in enumerate(zip(history.history, screenshots), 1): if not screenshot: continue # Skip placeholder screenshots from about:blank pages # These are 4x4 white PNGs encoded as a specific base64 string if screenshot == PLACEHOLDER_4PX_SCREENSHOT: logger.debug(f'Skipping placeholder screenshot from about:blank page at step {i}') continue # Skip screenshots from new tab pages from browser_use.utils import is_new_tab_page if is_new_tab_page(item.state.url): logger.debug(f'Skipping screenshot from new tab page ({item.state.url}) at step {i}') continue # Convert base64 screenshot to PIL Image img_data = base64.b64decode(screenshot) image = Image.open(io.BytesIO(img_data)) if show_goals and item.model_output: image = _add_overlay_to_image( image=image, step_number=i, goal_text=item.model_output.current_state.next_goal, regular_font=regular_font, # type: ignore title_font=title_font, # type: ignore margin=margin, logo=logo, ) images.append(image) if images: # Save the GIF images[0].save( output_path, save_all=True, append_images=images[1:], duration=duration, loop=0, optimize=False, ) logger.info(f'Created GIF at {output_path}') else: logger.warning('No images found in history to create GIF') def _create_task_frame( task: str, first_screenshot: str, title_font: ImageFont.FreeTypeFont, regular_font: ImageFont.FreeTypeFont, logo: Image.Image | None = None, line_spacing: float = 1.5, ) -> Image.Image: """Create initial frame showing the task.""" from PIL import Image, ImageDraw, ImageFont img_data = base64.b64decode(first_screenshot) template = Image.open(io.BytesIO(img_data)) image = Image.new('RGB', template.size, (0, 0, 0)) draw = ImageDraw.Draw(image) # Calculate vertical center of image center_y = image.height // 2 # Draw task text with dynamic font size based on task length margin = 140 # Increased margin max_width = image.width - (2 * margin) # Dynamic font size calculation based on task length # Start with base font size (regular + 16) base_font_size = regular_font.size + 16 min_font_size = max(regular_font.size - 10, 16) # Don't go below 16pt max_font_size = base_font_size # Cap at the base font size # Calculate dynamic font size based on text length and complexity # Longer texts get progressively smaller fonts text_length = len(task) if text_length > 200: # For very long text, reduce font size logarithmically font_size = max(base_font_size - int(10 * (text_length / 200)), min_font_size) else: font_size = base_font_size larger_font = ImageFont.truetype(regular_font.path, font_size) # type: ignore # Generate wrapped text with the calculated font size wrapped_text = _wrap_text(task, larger_font, max_width) # Calculate line height with spacing line_height = larger_font.size * line_spacing # Split text into lines and draw with custom spacing lines = wrapped_text.split('\n') total_height = line_height * len(lines) # Start position for first line text_y = center_y - (total_height / 2) + 50 # Shifted down slightly for line in lines: # Get line width for centering line_bbox = draw.textbbox((0, 0), line, font=larger_font) text_x = (image.width - (line_bbox[2] - line_bbox[0])) // 2 draw.text( (text_x, text_y), line, font=larger_font, fill=(255, 255, 255), ) text_y += line_height # Add logo if provided (top right corner) if logo: logo_margin = 20 logo_x = image.width - logo.width - logo_margin image.paste(logo, (logo_x, logo_margin), logo if logo.mode == 'RGBA' else None) return image def _add_overlay_to_image( image: Image.Image, step_number: int, goal_text: str, regular_font: ImageFont.FreeTypeFont, title_font: ImageFont.FreeTypeFont, margin: int, logo: Image.Image | None = None, display_step: bool = True, text_color: tuple[int, int, int, int] = (255, 255, 255, 255), text_box_color: tuple[int, int, int, int] = (0, 0, 0, 255), ) -> Image.Image: """Add step number and goal overlay to an image.""" from PIL import Image, ImageDraw goal_text = decode_unicode_escapes_to_utf8(goal_text) image = image.convert('RGBA') txt_layer = Image.new('RGBA', image.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(txt_layer) if display_step: # Add step number (bottom left) step_text = str(step_number) step_bbox = draw.textbbox((0, 0), step_text, font=title_font) step_width = step_bbox[2] - step_bbox[0] step_height = step_bbox[3] - step_bbox[1] # Position step number in bottom left x_step = margin + 10 # Slight additional offset from edge y_step = image.height - margin - step_height - 10 # Slight offset from bottom # Draw rounded rectangle background for step number padding = 20 # Increased padding step_bg_bbox = ( x_step - padding, y_step - padding, x_step + step_width + padding, y_step + step_height + padding, ) draw.rounded_rectangle( step_bg_bbox, radius=15, # Add rounded corners fill=text_box_color, ) # Draw step number draw.text( (x_step, y_step), step_text, font=title_font, fill=text_color, ) # Draw goal text (centered, bottom) max_width = image.width - (4 * margin) wrapped_goal = _wrap_text(goal_text, title_font, max_width) goal_bbox = draw.multiline_textbbox((0, 0), wrapped_goal, font=title_font) goal_width = goal_bbox[2] - goal_bbox[0] goal_height = goal_bbox[3] - goal_bbox[1] # Center goal text horizontally, place above step number x_goal = (image.width - goal_width) // 2 y_goal = y_step - goal_height - padding * 4 # More space between step and goal # Draw rounded rectangle background for goal padding_goal = 25 # Increased padding for goal goal_bg_bbox = ( x_goal - padding_goal, # Remove extra space for logo y_goal - padding_goal, x_goal + goal_width + padding_goal, y_goal + goal_height + padding_goal, ) draw.rounded_rectangle( goal_bg_bbox, radius=15, # Add rounded corners fill=text_box_color, ) # Draw goal text draw.multiline_text( (x_goal, y_goal), wrapped_goal, font=title_font, fill=text_color, align='center', ) # Add logo if provided (top right corner) if logo: logo_layer = Image.new('RGBA', image.size, (0, 0, 0, 0)) logo_margin = 20 logo_x = image.width - logo.width - logo_margin logo_layer.paste(logo, (logo_x, logo_margin), logo if logo.mode == 'RGBA' else None) txt_layer = Image.alpha_composite(logo_layer, txt_layer) # Composite and convert result = Image.alpha_composite(image, txt_layer) return result.convert('RGB') def _wrap_text(text: str, font: ImageFont.FreeTypeFont, max_width: int) -> str: """ Wrap text to fit within a given width. Args: text: Text to wrap font: Font to use for text max_width: Maximum width in pixels Returns: Wrapped text with newlines """ text = decode_unicode_escapes_to_utf8(text) words = text.split() lines = [] current_line = [] for word in words: current_line.append(word) line = ' '.join(current_line) bbox = font.getbbox(line) if bbox[2] > max_width: if len(current_line) == 1: lines.append(current_line.pop()) else: current_line.pop() lines.append(' '.join(current_line)) current_line = [word] if current_line: lines.append(' '.join(current_line)) return '\n'.join(lines)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dsouza-anush/browser-use-heroku'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

gif.py•11.8 KiB