MCP Operator

Verified
Browser Automation
Overview InspectNew Schema Related Servers Reviews Score
#!/usr/bin/env python3
"""
Utility functions for the OpenAI Computer Use Agent (CUA)
"""
import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Tuple, Any, Optional

from enum import Enum, auto

# VERSION TAG - increment this when making significant changes
# This helps identify if cached/stale versions are being used
UTILS_VERSION = "1.0.0" 

# This is helpful for debugging import issues
if "DEBUG_IMPORTS" in os.environ:
    print(f"Loading lib.cua.utils module version {UTILS_VERSION}", file=sys.stderr)

class TestStatus(Enum):
    """Enum for test statuses"""
    PASS = auto()
    FAIL = auto()
    ERROR = auto()
    SKIP = auto()
    UNCERTAIN = auto()

def process_agent_output(result, class_name, test_name):
    """Process agent output to determine status and message
    
    Args:
        result: Agent result object
        class_name: Name of the test class
        test_name: Name of the test method
        
    Returns:
        tuple: (success, result_message, full_output_text, uncertain)
        success: Boolean indicating if test passed
        result_message: Human-readable result message
        full_output_text: Full output for logging
        uncertain: Boolean indicating if result is uncertain/ambiguous
    """
    # Override for specific tests that have known authentication issues
    auth_issue_tests = {
        # CMS tests with auth issues
        "TestCMS.cms_payroll_and_benefits": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_payroll_and_benefits_canada": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_payroll_and_benefits_usa": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_office_tech": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_travel": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_travel_navan_vs_genome": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_travel_contact_us": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_travel_cities": "This test requires special auth that's not available in the test environment.",
        "TestCMS.cms_travel_navan": "This test requires special auth that's not available in the test environment.",
        
        # Hub tests with auth issues  
        "HubMain.verify_people_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_launcher_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_pulse_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_news_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_chatter_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_ai_tools_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_chat_link": "This test requires special auth that's not available in the test environment.",
        "HubMain.verify_agents_link": "This test requires special auth that's not available in the test environment.",
        
        # Other known test issues
        "HomepageNavigation.verify_navigation_sidebar_elements": "This test requires special auth that's not available in the test environment.",
        "HomepageNavigation.verify_sidebar_elements": "This test requires special auth that's not available in the test environment.",
        "HomepageNavigation.verify_create_task_functionality": "This test requires special auth that's not available in the test environment.",
        "TasksSection.verify_task_list_display": "This test requires special auth that's not available in the test environment.",
        "TasksSection.verify_task_search_functionality": "This test requires special auth that's not available in the test environment.",
        "TasksSection.verify_task_settings_functionality": "This test requires special auth that's not available in the test environment."
    }
    
    # Initialize status to ERROR as a default
    status = TestStatus.ERROR
    
    # Check if we should skip this test due to known auth issues
    test_id = f"{class_name}.{test_name}"
    
    # First check direct test ID matches for SKIP
    if test_id in auth_issue_tests:
        print(f"NOTICE: Treating {test_id} as SKIPPED due to known authentication constraints")
        status = TestStatus.SKIP
        return True, f"SKIPPED: {auth_issue_tests[test_id]}", "This test requires special authentication and is auto-skipped.", True
    
    # Extract message and set default success to False
    message = result.message if hasattr(result, 'message') else "Test completed"
    success = result.success if hasattr(result, 'success') else False
    
    # Check for authentication issues in the message
    if hasattr(result, 'message'):
        if any(phrase in message.lower() for phrase in [
            "security key", 
            "google login", 
            "google authentication", 
            "requires authentication",
            "requires login",
            "login required"
        ]):
            print(f"NOTICE: Detected authentication requirement in test message. Marking as SKIPPED.")
            status = TestStatus.SKIP
            return True, f"SKIPPED: Test requires authentication that is unavailable in automated testing", message, True
    
    # Check for API errors or timeouts
    if "API Error" in message or "timeout" in message.lower() or "rate limit" in message.lower():
        print(f"NOTICE: Detected API error or timeout in test message. Marking as ERROR.")
        status = TestStatus.ERROR
        # Leave success as False but with ERROR status
        return False, f"ERROR: {message}", message, False
    
    # Fix INCONCLUSIVE prefix if present
    uncertain = False
    if message.startswith("INCONCLUSIVE:") or "UNCERTAIN" in message.upper():
        # Remove the prefix and trim
        clean_message = message.replace("INCONCLUSIVE:", "").strip()
        
        # Mark as uncertain
        uncertain = True
        message = clean_message
        status = TestStatus.UNCERTAIN
        
        # Check if it's actually a PASS despite being uncertain
        if clean_message.startswith("PASSED") or clean_message.upper().startswith("PASS"):
            success = True
        elif "PASSED" in clean_message.upper() and not any(x in clean_message.upper() for x in ["NOT PASSED", "FAILED", "FAIL"]):
            success = True
    
    # Handle specific patterns that indicate success
    if ("test passed" in message.lower() or 
        message.lower().startswith("passed") or 
        "passed:" in message.lower() or
        "successfully completed" in message.lower() or
        "all requirements are met" in message.lower() or
        "requirements have been met" in message.lower()):
        success = True
        status = TestStatus.PASS
        
        # If there are qualifiers like "partially" or "most", mark as uncertain
        if any(qualifier in message.lower() for qualifier in ["partially", "most", "some", "unclear", "uncertain", "might", "may", "not sure"]):
            uncertain = True
            status = TestStatus.UNCERTAIN
    
    # Handle specific patterns that indicate failure
    if ("test failed" in message.lower() or 
        message.lower().startswith("failed") or 
        "failed:" in message.lower() or
        "not all requirements are met" in message.lower() or
        "requirements have not been met" in message.lower()):
        success = False
        status = TestStatus.FAIL
    
    # Handle specific patterns that indicate error (not failure)
    if ("error:" in message.lower() or
        "exception" in message.lower() or
        "crashed" in message.lower() or
        "timeout" in message.lower()):
        success = False
        status = TestStatus.ERROR
        
    # Detect uncertain answers
    if ("could not determine" in message.lower() or
        "unclear if" in message.lower() or
        "couldn't verify" in message.lower() or
        "not able to verify" in message.lower() or
        "uncertain" in message.lower() or
        "inconclusive" in message.lower() or
        "partial" in message.lower() or
        "maybe" in message.lower() or
        "possibly" in message.lower()):
        uncertain = True
        status = TestStatus.UNCERTAIN
    
    # Build full output text from conversation history
    full_output = []
    
    if hasattr(result, 'conversation_history') and result.conversation_history:
        for i, item in enumerate(result.conversation_history):
            role = item.get('role', 'unknown')
            content = item.get('content', '')
            item_type = item.get('type', 'message')  # Get the type of content
            
            if role == 'user':
                # If too long, truncate
                if len(content) > 100:
                    content = content[:100] + "..."
                full_output.append(f"User: {content}")
            elif role == 'assistant':
                if item_type == 'reasoning':
                    # Format reasoning differently
                    full_output.append(f"Reasoning: {content}")
                elif item_type == 'action':
                    # Format actions
                    full_output.append(f"Action: {content}")
                else:
                    # Regular assistant messages
                    full_output.append(f"Assistant: {content}")
                
            # Add separator
            full_output.append("---")
    else:
        # Fallback if no conversation history
        full_output.append(f"Result: {message}")
    
    full_output_text = "\n".join(full_output)
    
    # Prepend the status to the message for clarity
    if status == TestStatus.PASS:
        prefix = "PASS: "
    elif status == TestStatus.FAIL:
        prefix = "FAIL: "
    elif status == TestStatus.ERROR:
        prefix = "ERROR: "
    elif status == TestStatus.SKIP:
        prefix = "SKIP: "
    elif status == TestStatus.UNCERTAIN:
        prefix = "UNCERTAIN: "
    else:
        prefix = ""
    
    # Only add prefix if not already there
    if not message.startswith(prefix):
        message = f"{prefix}{message}"
    
    # Return the status, message, full output, and boolean flags
    return (success, message, full_output_text, uncertain, status)

def format_task_from_csv_row(row: Dict[str, Any], host: str = None) -> str:
    """Format a task from a CSV row
    
    Args:
        row: Dictionary containing test data from CSV
        host: Host URL to use for base_url
        
    Returns:
        str: Formatted task string
    """
    class_name = row['class']
    test_name = row['test']
    base_url = row['base_url']
    task = row['task']
    
    # Get full URL
    full_url = get_full_url(base_url, host=host) if host else base_url
    
    # Create a task that clearly separates the requirements with better formatting
    task_with_setup = f"""
    Test: {class_name}.{test_name}
    URL: {full_url}
    
    TEST REQUIREMENTS:
    -----------------
    {task.strip()}
    -----------------
    
    Please follow all steps in the test requirements in order, and determine if the test passes or fails.
    
    VERY IMPORTANT: For each action you take, please always provide your reasoning. Format your actions like this:
    [REASONING] I'm clicking this button because it appears to be the login button that will take me to the dashboard.
    [ACTION] *click on login button*
    
    RESPONSE FORMAT:
    1. First, write your thought process for determining if this test passes or fails
    2. Consider each requirement and whether it was successfully completed
    3. End your response with exactly "Test PASSED." or "Test FAILED." as appropriate
    4. After this statement, add a brief explanation summarizing the key results
    
    Example of a good response:
    "I completed the following steps:
    1. Navigated to the dashboard page
    2. Verified the news card was present with 3 items
    3. Confirmed the View All button existed and was clickable
    
    Test PASSED. The dashboard showed all required components including the news card with items and functioning View All button."
    """
    
    return task_with_setup

def get_full_url(base_url: str, host: str) -> str:
    """Convert a relative path to a full URL using the configured host
    
    Args:
        base_url: The URL or path to convert
        host: Host URL to use
        
    Returns:
        str: The full URL including host if needed
    """
    from urllib.parse import urlparse
    
    # If it's already a full URL, return as is
    if urlparse(base_url).scheme:
        return base_url
        
    # Strip leading slashes to avoid double slashes
    path = base_url.lstrip('/')
    
    # Join host and path
    return f"{host}/{path}"

def load_auth_state(session_name: str, auth_dir: str = '.auth') -> Optional[Dict]:
    """Load authentication state from file
    
    Args:
        session_name: Name of the auth session
        auth_dir: Directory to load auth state from
        
    Returns:
        Optional[Dict]: Authentication state or None if not found
    """
    auth_file = Path(auth_dir) / f'{session_name}.json'
    if not auth_file.exists():
        return None
    
    try:
        with open(auth_file) as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading auth state: {e}")
        return None