Browser Use Heroku

browser-use-heroku
examples
custom-functions

drag_and_drop.py•10.3 KiB

""" Drag and Drop Custom Action Example This example demonstrates how to implement drag and drop functionality as a custom action. The drag and drop action supports both element-based and coordinate-based operations, making it useful for canvas drawing, sortable lists, sliders, file uploads, and UI rearrangement. """ import asyncio from typing import cast from pydantic import BaseModel, Field from browser_use import ActionResult, Agent, Controller from browser_use.browser.types import ElementHandle, Page from browser_use.llm import ChatOpenAI class Position(BaseModel): """Represents a position with x and y coordinates.""" x: int = Field(..., description='X coordinate') y: int = Field(..., description='Y coordinate') class DragDropAction(BaseModel): """Parameters for drag and drop operations.""" # Element-based approach element_source: str | None = Field(None, description='CSS selector or XPath for the source element to drag') element_target: str | None = Field(None, description='CSS selector or XPath for the target element to drop on') element_source_offset: Position | None = Field(None, description='Optional offset from source element center (x, y)') element_target_offset: Position | None = Field(None, description='Optional offset from target element center (x, y)') # Coordinate-based approach coord_source_x: int | None = Field(None, description='Source X coordinate for drag start') coord_source_y: int | None = Field(None, description='Source Y coordinate for drag start') coord_target_x: int | None = Field(None, description='Target X coordinate for drag end') coord_target_y: int | None = Field(None, description='Target Y coordinate for drag end') # Operation parameters steps: int | None = Field(10, description='Number of intermediate steps during drag (default: 10)') delay_ms: int | None = Field(5, description='Delay in milliseconds between steps (default: 5)') async def create_drag_drop_controller() -> Controller: """Create a controller with drag and drop functionality.""" controller = Controller() @controller.registry.action( 'Drag and drop elements or between coordinates on the page - useful for canvas drawing, sortable lists, sliders, file uploads, and UI rearrangement', param_model=DragDropAction, ) async def drag_drop(params: DragDropAction, page: Page) -> ActionResult: """ Performs a precise drag and drop operation between elements or coordinates. """ async def get_drag_elements( page: Page, source_selector: str, target_selector: str, ) -> tuple[ElementHandle | None, ElementHandle | None]: """Get source and target elements with appropriate error handling.""" source_element = None target_element = None try: # page.locator() auto-detects CSS and XPath source_locator = page.locator(source_selector) target_locator = page.locator(target_selector) # Check if elements exist source_count = await source_locator.count() target_count = await target_locator.count() if source_count > 0: source_element = await source_locator.first.element_handle() print(f'Found source element with selector: {source_selector}') else: print(f'Source element not found: {source_selector}') if target_count > 0: target_element = await target_locator.first.element_handle() print(f'Found target element with selector: {target_selector}') else: print(f'Target element not found: {target_selector}') except Exception as e: print(f'Error finding elements: {str(e)}') return source_element, target_element async def get_element_coordinates( source_element: ElementHandle, target_element: ElementHandle, source_position: Position | None, target_position: Position | None, ) -> tuple[tuple[int, int] | None, tuple[int, int] | None]: """Get coordinates from elements with appropriate error handling.""" source_coords = None target_coords = None try: # Get source coordinates if source_position: source_coords = (source_position.x, source_position.y) else: source_box = await source_element.bounding_box() if source_box: source_coords = ( int(source_box['x'] + source_box['width'] / 2), int(source_box['y'] + source_box['height'] / 2), ) # Get target coordinates if target_position: target_coords = (target_position.x, target_position.y) else: target_box = await target_element.bounding_box() if target_box: target_coords = ( int(target_box['x'] + target_box['width'] / 2), int(target_box['y'] + target_box['height'] / 2), ) except Exception as e: print(f'Error getting element coordinates: {str(e)}') return source_coords, target_coords async def execute_drag_operation( page: Page, source_x: int, source_y: int, target_x: int, target_y: int, steps: int, delay_ms: int, ) -> tuple[bool, str]: """Execute the drag operation with comprehensive error handling.""" try: # Try to move to source position try: await page.mouse.move(source_x, source_y) print(f'Moved to source position ({source_x}, {source_y})') except Exception as e: print(f'Failed to move to source position: {str(e)}') return False, f'Failed to move to source position: {str(e)}' # Press mouse button down await page.mouse.down() # Move to target position with intermediate steps for i in range(1, steps + 1): ratio = i / steps intermediate_x = int(source_x + (target_x - source_x) * ratio) intermediate_y = int(source_y + (target_y - source_y) * ratio) await page.mouse.move(intermediate_x, intermediate_y) if delay_ms > 0: await asyncio.sleep(delay_ms / 1000) # Move to final target position await page.mouse.move(target_x, target_y) # Move again to ensure dragover events are properly triggered await page.mouse.move(target_x, target_y) # Release mouse button await page.mouse.up() return True, 'Drag operation completed successfully' except Exception as e: return False, f'Error during drag operation: {str(e)}' try: # Initialize variables source_x: int | None = None source_y: int | None = None target_x: int | None = None target_y: int | None = None # Normalize parameters steps = max(1, params.steps or 10) delay_ms = max(0, params.delay_ms or 5) # Case 1: Element selectors provided if params.element_source and params.element_target: print('Using element-based approach with selectors') source_element, target_element = await get_drag_elements( page, params.element_source, params.element_target, ) if not source_element or not target_element: error_msg = f'Failed to find {"source" if not source_element else "target"} element' return ActionResult(error=error_msg, include_in_memory=True) source_coords, target_coords = await get_element_coordinates( source_element, target_element, params.element_source_offset, params.element_target_offset ) if not source_coords or not target_coords: error_msg = f'Failed to determine {"source" if not source_coords else "target"} coordinates' return ActionResult(error=error_msg, include_in_memory=True) source_x, source_y = source_coords target_x, target_y = target_coords # Case 2: Coordinates provided directly elif all( coord is not None for coord in [params.coord_source_x, params.coord_source_y, params.coord_target_x, params.coord_target_y] ): print('Using coordinate-based approach') source_x = params.coord_source_x source_y = params.coord_source_y target_x = params.coord_target_x target_y = params.coord_target_y else: error_msg = 'Must provide either source/target selectors or source/target coordinates' return ActionResult(error=error_msg, include_in_memory=True) # Validate coordinates if any(coord is None for coord in [source_x, source_y, target_x, target_y]): error_msg = 'Failed to determine source or target coordinates' return ActionResult(error=error_msg, include_in_memory=True) # Perform the drag operation success, message = await execute_drag_operation( page, cast(int, source_x), cast(int, source_y), cast(int, target_x), cast(int, target_y), steps, delay_ms, ) if not success: print(f'Drag operation failed: {message}') return ActionResult(error=message, include_in_memory=True) # Create descriptive message if params.element_source and params.element_target: msg = f"🖱️ Dragged element '{params.element_source}' to '{params.element_target}'" else: msg = f'🖱️ Dragged from ({source_x}, {source_y}) to ({target_x}, {target_y})' print(msg) return ActionResult(extracted_content=msg, include_in_memory=True, long_term_memory=msg) except Exception as e: error_msg = f'Failed to perform drag and drop: {str(e)}' print(error_msg) return ActionResult(error=error_msg, include_in_memory=True) return controller async def example_drag_drop_sortable_list(): """Example: Drag and drop to reorder items in a sortable list.""" controller = await create_drag_drop_controller() # Initialize LLM (replace with your preferred model) llm = ChatOpenAI(model='gpt-4.1') # Create the agent agent = Agent( task='Go to a drag and drop demo website and reorder some list items using drag and drop', llm=llm, controller=controller, ) # Run the agent print('🚀 Starting drag and drop example...') history = await agent.run() return history async def example_drag_drop_coordinates(): """Example: Direct coordinate-based drag and drop.""" controller = await create_drag_drop_controller() llm = ChatOpenAI(model='gpt-4.1') agent = Agent( task='Go to a canvas drawing website and draw a simple line using drag and drop from coordinates (100, 100) to (300, 200)', llm=llm, controller=controller, ) print('🎨 Starting coordinate-based drag and drop example...') history = await agent.run() return history if __name__ == '__main__': # Run different examples print('Choose an example:') print('1. Sortable list drag and drop') print('2. Coordinate-based drawing') choice = input('Enter choice (1-3): ').strip() if choice == '1': asyncio.run(example_drag_drop_sortable_list()) elif choice == '2': asyncio.run(example_drag_drop_coordinates()) else: print('Invalid choice, running sortable list example...') asyncio.run(example_drag_drop_sortable_list())

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/dsouza-anush/browser-use-heroku'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

drag_and_drop.py•10.3 KiB