test_action_generation.py•7.68 kB
#!/usr/bin/env python3
"""
Comprehensive test script for action_agent, task_agent, and their integration
Logs all outputs to test_results.log
"""
import asyncio
import json
import logging
from datetime import datetime
from agents.action_generation import action_agent, task_agent
from agents.browser_agent import browser_agent, scoring_agent
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('test_results.log', mode='w'),
        logging.StreamHandler()  # Also print to console for progress
    ]
)
logger = logging.getLogger(__name__)
def log_separator(title=""):
    """Log a visual separator"""
    logger.info("=" * 80)
    if title:
        logger.info(f"  {title}")
        logger.info("=" * 80)
def log_subsection(title):
    """Log a subsection header"""
    logger.info("-" * 80)
    logger.info(f"  {title}")
    logger.info("-" * 80)
async def test_action_agent_only(test_name, action, website):
    """Test only the action_agent"""
    log_subsection(f"TEST: {test_name} - Action Agent Only")
    logger.info(f"Action: {action}")
    logger.info(f"Website: {website}")
    logger.info("")
    try:
        result = await action_agent(action, website)
        logger.info("✓ Action agent succeeded!")
        logger.info("")
        logger.info("Generated Function Metadata:")
        logger.info(f"  Function Name: {result.function_name}")
        logger.info(f"  Description: {result.description}")
        logger.info("")
        logger.info("  Parameters:")
        for param in result.parameters:
            logger.info(f"    - {param.name} ({param.type}): {param.description}")
        logger.info("")
        logger.info("  Returns:")
        for ret in result.returns:
            logger.info(f"    - {ret.name} ({ret.type}): {ret.description}")
        logger.info("")
        return result
    except Exception as e:
        logger.error(f"✗ Test failed with error: {e}")
        import traceback
        logger.error(traceback.format_exc())
        return None
async def test_task_agent_only(test_name, action, website):
    """Test action_agent -> task_agent pipeline"""
    log_subsection(f"TEST: {test_name} - Task Agent Pipeline")
    logger.info(f"Action: {action}")
    logger.info(f"Website: {website}")
    logger.info("")
    try:
        # First get function metadata
        logger.info("Step 1: Running action_agent...")
        function_metadata = await action_agent(action, website)
        logger.info("✓ Action agent succeeded!")
        logger.info(f"  Generated function: {function_metadata.function_name}")
        logger.info("")
        # Then convert to task description
        logger.info("Step 2: Running task_agent...")
        task_description = await task_agent(function_metadata, website)
        logger.info("✓ Task agent succeeded!")
        logger.info("")
        logger.info("Generated Task Description:")
        logger.info(f"{task_description}")
        logger.info("")
        return function_metadata, task_description
    except Exception as e:
        logger.error(f"✗ Test failed with error: {e}")
        import traceback
        logger.error(traceback.format_exc())
        return None, None
async def test_full_pipeline(test_name, action, website):
    """Test action_agent -> task_agent -> browser_agent -> scoring_agent"""
    log_subsection(f"TEST: {test_name} - Full Pipeline with Browser")
    logger.info(f"Action: {action}")
    logger.info(f"Website: {website}")
    logger.info("")
    try:
        # Step 1: Action Agent
        logger.info("Step 1: Running action_agent...")
        function_metadata = await action_agent(action, website)
        logger.info(f"✓ Generated function: {function_metadata.function_name}")
        logger.info("")
        # Step 2: Task Agent
        logger.info("Step 2: Running task_agent...")
        task_description = await task_agent(function_metadata, website)
        logger.info("✓ Generated task description")
        logger.info(f"Task: {task_description}")
        logger.info("")
        # Step 3: Browser Agent
        logger.info("Step 3: Running browser_agent...")
        browser_result = await browser_agent(website, task_description)
        logger.info("✓ Browser agent completed")
        logger.info(f"Result: {browser_result}")
        logger.info("")
        # Step 4: Scoring Agent
        logger.info("Step 4: Running scoring_agent...")
        evaluation = await scoring_agent(task_description, str(browser_result))
        logger.info("✓ Scoring agent completed")
        logger.info(f"Score: {evaluation.score}/10")
        logger.info(f"Reasoning: {evaluation.reasoning}")
        logger.info("")
        return {
            "function_metadata": function_metadata,
            "task_description": task_description,
            "browser_result": browser_result,
            "evaluation": evaluation
        }
    except Exception as e:
        logger.error(f"✗ Test failed with error: {e}")
        import traceback
        logger.error(traceback.format_exc())
        return None
async def run_all_tests():
    """Run all test cases"""
    start_time = datetime.now()
    log_separator("STARTING COMPREHENSIVE TEST SUITE")
    logger.info(f"Test started at: {start_time}")
    logger.info("")
    # Test cases for action_agent only
    log_separator("SECTION 1: ACTION AGENT ONLY TESTS")
    await test_action_agent_only(
        "Simple Login",
        "login to the website with username and password",
        "https://example.com"
    )
    await test_action_agent_only(
        "GitHub Search",
        "search for repositories on GitHub",
        "https://github.com"
    )
    await test_action_agent_only(
        "Amazon Product Search",
        "search for products and get their prices",
        "https://amazon.com"
    )
    await test_action_agent_only(
        "Twitter Post",
        "post a tweet with text and optional image",
        "https://twitter.com"
    )
    # Test cases for task_agent pipeline
    log_separator("SECTION 2: TASK AGENT PIPELINE TESTS")
    await test_task_agent_only(
        "HackerNews Top Stories",
        "get the top 5 stories from the homepage",
        "https://news.ycombinator.com"
    )
    await test_task_agent_only(
        "Reddit Subreddit Browse",
        "browse a subreddit and get the top posts",
        "https://reddit.com/r/python"
    )
    await test_task_agent_only(
        "Wikipedia Article Summary",
        "extract the introduction and key facts from an article",
        "https://en.wikipedia.org/wiki/Python_(programming_language)"
    )
    # Test cases for full pipeline with browser
    log_separator("SECTION 3: FULL PIPELINE WITH BROWSER TESTS")
    await test_full_pipeline(
        "Google Search",
        "search for a query and get the top results",
        "https://google.com"
    )
    await test_full_pipeline(
        "Product Hunt Browse",
        "get today's top products",
        "https://www.producthunt.com"
    )
    await test_full_pipeline(
        "Stack Overflow Question",
        "search for questions about Python async",
        "https://stackoverflow.com"
    )
    # Summary
    end_time = datetime.now()
    duration = end_time - start_time
    log_separator("TEST SUITE COMPLETED")
    logger.info(f"Test completed at: {end_time}")
    logger.info(f"Total duration: {duration}")
    logger.info("")
    logger.info("All results have been logged to: test_results.log")
    logger.info("=" * 80)
if __name__ == "__main__":
    asyncio.run(run_all_tests())