import pytest
from pydantic import BaseModel
from browser_use.agent.service import Agent
from browser_use.agent.views import AgentHistoryList
from browser_use.browser import BrowserSession
from browser_use.llm import ChatAzureOpenAI
@pytest.fixture
def llm():
"""Initialize language model for testing"""
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
return ChatAzureOpenAI(model='gpt-4.1')
# return ChatOpenAI(model='gpt-4.1-mini')
@pytest.fixture(scope='module')
async def browser_session():
from browser_use.browser.profile import BrowserProfile
profile = BrowserProfile(headless=True, user_data_dir=None)
browser_session = BrowserSession(browser_profile=profile)
await browser_session.start()
yield browser_session
await browser_session.kill()
# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
# @pytest.mark.asyncio
@pytest.mark.skip(reason='Kinda expensive to run')
async def test_ecommerce_interaction(llm, browser_session):
"""Test complex ecommerce interaction sequence"""
agent = Agent(
task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
llm=llm,
browser_session=browser_session,
save_conversation_path='tmp/test_ecommerce_interaction/conversation',
)
history: AgentHistoryList = await agent.run(max_steps=20)
# Verify sequence of actions
action_sequence = []
for action in history.model_actions():
action_name = list(action.keys())[0]
if action_name in ['go_to_url']:
action_sequence.append('navigate')
elif action_name == 'input_text':
action_sequence.append('input')
# Check that the input is 'laptop'
inp = action['input_text']['text'].lower() # type: ignore
if inp == 'laptop':
action_sequence.append('input_exact_correct')
elif 'laptop' in inp:
action_sequence.append('correct_in_input')
else:
action_sequence.append('incorrect_input')
elif action_name == 'click_element':
action_sequence.append('click')
# Verify essential steps were performed
assert 'navigate' in action_sequence # Navigated to Amazon
assert 'input' in action_sequence # Entered search term
assert 'click' in action_sequence # Clicked search/filter
assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
async def test_error_recovery(llm, browser_session):
"""Test agent's ability to recover from errors"""
agent = Agent(
task='Navigate to nonexistent-site.com and then recover by going to google.com ',
llm=llm,
browser_session=browser_session,
)
history: AgentHistoryList = await agent.run(max_steps=10)
actions_names = history.action_names()
actions = history.model_actions()
assert 'go_to_url' in actions_names, f'{actions_names} does not contain go_to_url'
for action in actions:
if 'go_to_url' in action:
assert 'url' in action['go_to_url'], 'url is not in go_to_url'
assert action['go_to_url']['url'].endswith('google.com'), 'url does not end with google.com'
break
async def test_find_contact_email(llm, browser_session):
"""Test agent's ability to find contact email on a website"""
agent = Agent(
task='Go to https://browser-use.com/ and find out the contact email',
llm=llm,
browser_session=browser_session,
)
history: AgentHistoryList = await agent.run(max_steps=10)
# Verify the agent found the contact email
extracted_content = history.extracted_content()
email = 'info@browser-use.com'
for content in extracted_content:
if email in content:
break
else:
pytest.fail(f'{extracted_content} does not contain {email}')
async def test_agent_finds_installation_command(llm, browser_session):
"""Test agent's ability to find the pip installation command for browser-use on the web"""
agent = Agent(
task='Find the pip installation command for the browser-use repo',
llm=llm,
browser_session=browser_session,
)
history: AgentHistoryList = await agent.run(max_steps=10)
# Verify the agent found the correct installation command
extracted_content = history.extracted_content()
install_command = 'pip install browser-use'
for content in extracted_content:
if install_command in content:
break
else:
pytest.fail(f'{extracted_content} does not contain {install_command}')
class CaptchaTest(BaseModel):
name: str
url: str
success_text: str
additional_text: str | None = None
# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
@pytest.mark.parametrize(
'captcha',
[
CaptchaTest(
name='Text Captcha',
url='https://2captcha.com/demo/text',
success_text='Captcha is passed successfully!',
),
CaptchaTest(
name='Basic Captcha',
url='https://captcha.com/demos/features/captcha-demo.aspx',
success_text='Correct!',
),
CaptchaTest(
name='Rotate Captcha',
url='https://2captcha.com/demo/rotatecaptcha',
success_text='Captcha is passed successfully',
additional_text='Use multiple clicks at once. click done when image is exact correct position.',
),
CaptchaTest(
name='MT Captcha',
url='https://2captcha.com/demo/mtcaptcha',
success_text='Verified Successfully',
additional_text='Stop when you solved it successfully.',
),
],
)
async def test_captcha_solver(llm, browser_session, captcha: CaptchaTest):
"""Test agent's ability to solve different types of captchas"""
agent = Agent(
task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
llm=llm,
browser_session=browser_session,
)
from browser_use.agent.views import AgentHistoryList
history: AgentHistoryList = await agent.run(max_steps=7)
# Get page content to verify success
page = await browser_session.get_current_page()
all_text = await page.content()
if not all_text:
all_text = ''
if not isinstance(all_text, str):
all_text = str(all_text)
solved = captcha.success_text in all_text
assert solved, f'Failed to solve {captcha.name}'
# python -m pytest tests/test_agent_actions.py -v --capture=no
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO