run_browser_agent
Execute browser automation tasks via natural language commands, enabling web navigation, form filling, and visual interaction with the browser-use MCP server.
Instructions
Handle run-browser-agent tool calls.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| add_infos | No | ||
| task | Yes |
Input Schema (JSON Schema)
{
"properties": {
"add_infos": {
"default": "",
"title": "Add Infos",
"type": "string"
},
"task": {
"title": "Task",
"type": "string"
}
},
"required": [
"task"
],
"title": "run_browser_agentArguments",
"type": "object"
}
Implementation Reference
- The core handler function for the 'run_browser_agent' tool. It is registered via the @app.tool() decorator (line 75). Configures and initializes a CustomBrowser, CustomAgent, and executes the browser agent task based on input parameters, handling errors and cleanup.@app.tool() async def run_browser_agent(task: str, add_infos: str = "") -> str: """Handle run-browser-agent tool calls.""" global _global_agent, _global_browser, _global_browser_context, _global_agent_state try: # Clear any previous agent stop signals _global_agent_state.clear_stop() # Get browser configuration headless = get_env_bool("BROWSER_HEADLESS", True) disable_security = get_env_bool("BROWSER_DISABLE_SECURITY", False) window_w = int(os.getenv("BROWSER_WINDOW_WIDTH", "1280")) window_h = int(os.getenv("BROWSER_WINDOW_HEIGHT", "720")) # Get agent configuration model_provider = os.getenv("MCP_MODEL_PROVIDER", "openrouter") model_name = os.getenv("MCP_MODEL_NAME", "openai/o3-mini-high") temperature = float(os.getenv("MCP_TEMPERATURE", "0.7")) max_steps = int(os.getenv("MCP_MAX_STEPS", "100")) use_vision = get_env_bool("MCP_USE_VISION", True) max_actions_per_step = int(os.getenv("MCP_MAX_ACTIONS_PER_STEP", "5")) tool_calling_method = os.getenv("MCP_TOOL_CALLING_METHOD", "auto") # Configure browser window size extra_chromium_args = [f"--window-size={window_w},{window_h}"] # Initialize browser if needed if not _global_browser: _global_browser = CustomBrowser( config=BrowserConfig( headless=headless, disable_security=disable_security, extra_chromium_args=extra_chromium_args, ) ) # Initialize browser context if needed if not _global_browser_context: _global_browser_context = await _global_browser.new_context( config=BrowserContextConfig( trace_path=os.getenv("BROWSER_TRACE_PATH"), save_recording_path=os.getenv("BROWSER_RECORDING_PATH"), no_viewport=False, browser_window_size=BrowserContextWindowSize( width=window_w, height=window_h ), ) ) # Prepare LLM llm = utils.get_llm_model( provider=model_provider, model_name=model_name, temperature=temperature ) # Create controller and agent controller = CustomController() _global_agent = CustomAgent( task=task, add_infos=add_infos, use_vision=use_vision, llm=llm, browser=_global_browser, browser_context=_global_browser_context, controller=controller, system_prompt_class=CustomSystemPrompt, agent_prompt_class=CustomAgentMessagePrompt, max_actions_per_step=max_actions_per_step, agent_state=_global_agent_state, tool_calling_method=tool_calling_method, ) # Run agent with improved error handling try: history = await _global_agent.run(max_steps=max_steps) final_result = ( history.final_result() or f"No final result. Possibly incomplete. {history}" ) return final_result except asyncio.CancelledError: return "Task was cancelled" except Exception as e: logging.error(f"Agent run error: {str(e)}\n{traceback.format_exc()}") return f"Error during task execution: {str(e)}" except Exception as e: logging.error(f"run-browser-agent error: {str(e)}\n{traceback.format_exc()}") return f"Error during task execution: {str(e)}" finally: asyncio.create_task(_safe_cleanup())