test_action_generation.py•7.68 kB
#!/usr/bin/env python3
"""
Comprehensive test script for action_agent, task_agent, and their integration
Logs all outputs to test_results.log
"""
import asyncio
import json
import logging
from datetime import datetime
from agents.action_generation import action_agent, task_agent
from agents.browser_agent import browser_agent, scoring_agent
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('test_results.log', mode='w'),
logging.StreamHandler() # Also print to console for progress
]
)
logger = logging.getLogger(__name__)
def log_separator(title=""):
"""Log a visual separator"""
logger.info("=" * 80)
if title:
logger.info(f" {title}")
logger.info("=" * 80)
def log_subsection(title):
"""Log a subsection header"""
logger.info("-" * 80)
logger.info(f" {title}")
logger.info("-" * 80)
async def test_action_agent_only(test_name, action, website):
"""Test only the action_agent"""
log_subsection(f"TEST: {test_name} - Action Agent Only")
logger.info(f"Action: {action}")
logger.info(f"Website: {website}")
logger.info("")
try:
result = await action_agent(action, website)
logger.info("✓ Action agent succeeded!")
logger.info("")
logger.info("Generated Function Metadata:")
logger.info(f" Function Name: {result.function_name}")
logger.info(f" Description: {result.description}")
logger.info("")
logger.info(" Parameters:")
for param in result.parameters:
logger.info(f" - {param.name} ({param.type}): {param.description}")
logger.info("")
logger.info(" Returns:")
for ret in result.returns:
logger.info(f" - {ret.name} ({ret.type}): {ret.description}")
logger.info("")
return result
except Exception as e:
logger.error(f"✗ Test failed with error: {e}")
import traceback
logger.error(traceback.format_exc())
return None
async def test_task_agent_only(test_name, action, website):
"""Test action_agent -> task_agent pipeline"""
log_subsection(f"TEST: {test_name} - Task Agent Pipeline")
logger.info(f"Action: {action}")
logger.info(f"Website: {website}")
logger.info("")
try:
# First get function metadata
logger.info("Step 1: Running action_agent...")
function_metadata = await action_agent(action, website)
logger.info("✓ Action agent succeeded!")
logger.info(f" Generated function: {function_metadata.function_name}")
logger.info("")
# Then convert to task description
logger.info("Step 2: Running task_agent...")
task_description = await task_agent(function_metadata, website)
logger.info("✓ Task agent succeeded!")
logger.info("")
logger.info("Generated Task Description:")
logger.info(f"{task_description}")
logger.info("")
return function_metadata, task_description
except Exception as e:
logger.error(f"✗ Test failed with error: {e}")
import traceback
logger.error(traceback.format_exc())
return None, None
async def test_full_pipeline(test_name, action, website):
"""Test action_agent -> task_agent -> browser_agent -> scoring_agent"""
log_subsection(f"TEST: {test_name} - Full Pipeline with Browser")
logger.info(f"Action: {action}")
logger.info(f"Website: {website}")
logger.info("")
try:
# Step 1: Action Agent
logger.info("Step 1: Running action_agent...")
function_metadata = await action_agent(action, website)
logger.info(f"✓ Generated function: {function_metadata.function_name}")
logger.info("")
# Step 2: Task Agent
logger.info("Step 2: Running task_agent...")
task_description = await task_agent(function_metadata, website)
logger.info("✓ Generated task description")
logger.info(f"Task: {task_description}")
logger.info("")
# Step 3: Browser Agent
logger.info("Step 3: Running browser_agent...")
browser_result = await browser_agent(website, task_description)
logger.info("✓ Browser agent completed")
logger.info(f"Result: {browser_result}")
logger.info("")
# Step 4: Scoring Agent
logger.info("Step 4: Running scoring_agent...")
evaluation = await scoring_agent(task_description, str(browser_result))
logger.info("✓ Scoring agent completed")
logger.info(f"Score: {evaluation.score}/10")
logger.info(f"Reasoning: {evaluation.reasoning}")
logger.info("")
return {
"function_metadata": function_metadata,
"task_description": task_description,
"browser_result": browser_result,
"evaluation": evaluation
}
except Exception as e:
logger.error(f"✗ Test failed with error: {e}")
import traceback
logger.error(traceback.format_exc())
return None
async def run_all_tests():
"""Run all test cases"""
start_time = datetime.now()
log_separator("STARTING COMPREHENSIVE TEST SUITE")
logger.info(f"Test started at: {start_time}")
logger.info("")
# Test cases for action_agent only
log_separator("SECTION 1: ACTION AGENT ONLY TESTS")
await test_action_agent_only(
"Simple Login",
"login to the website with username and password",
"https://example.com"
)
await test_action_agent_only(
"GitHub Search",
"search for repositories on GitHub",
"https://github.com"
)
await test_action_agent_only(
"Amazon Product Search",
"search for products and get their prices",
"https://amazon.com"
)
await test_action_agent_only(
"Twitter Post",
"post a tweet with text and optional image",
"https://twitter.com"
)
# Test cases for task_agent pipeline
log_separator("SECTION 2: TASK AGENT PIPELINE TESTS")
await test_task_agent_only(
"HackerNews Top Stories",
"get the top 5 stories from the homepage",
"https://news.ycombinator.com"
)
await test_task_agent_only(
"Reddit Subreddit Browse",
"browse a subreddit and get the top posts",
"https://reddit.com/r/python"
)
await test_task_agent_only(
"Wikipedia Article Summary",
"extract the introduction and key facts from an article",
"https://en.wikipedia.org/wiki/Python_(programming_language)"
)
# Test cases for full pipeline with browser
log_separator("SECTION 3: FULL PIPELINE WITH BROWSER TESTS")
await test_full_pipeline(
"Google Search",
"search for a query and get the top results",
"https://google.com"
)
await test_full_pipeline(
"Product Hunt Browse",
"get today's top products",
"https://www.producthunt.com"
)
await test_full_pipeline(
"Stack Overflow Question",
"search for questions about Python async",
"https://stackoverflow.com"
)
# Summary
end_time = datetime.now()
duration = end_time - start_time
log_separator("TEST SUITE COMPLETED")
logger.info(f"Test completed at: {end_time}")
logger.info(f"Total duration: {duration}")
logger.info("")
logger.info("All results have been logged to: test_results.log")
logger.info("=" * 80)
if __name__ == "__main__":
asyncio.run(run_all_tests())