MCP Operator
by willer
Verified
#!/usr/bin/env python3
"""
Utility functions for the OpenAI Computer Use Agent (CUA)
"""
import json
import os
import re
import sys
from pathlib import Path
from typing import Dict, List, Tuple, Any, Optional
from enum import Enum, auto
# VERSION TAG - increment this when making significant changes
# This helps identify if cached/stale versions are being used
UTILS_VERSION = "1.0.0"
# This is helpful for debugging import issues
if "DEBUG_IMPORTS" in os.environ:
print(f"Loading lib.cua.utils module version {UTILS_VERSION}", file=sys.stderr)
class TestStatus(Enum):
"""Enum for test statuses"""
PASS = auto()
FAIL = auto()
ERROR = auto()
SKIP = auto()
UNCERTAIN = auto()
def process_agent_output(result, class_name, test_name):
"""Process agent output to determine status and message
Args:
result: Agent result object
class_name: Name of the test class
test_name: Name of the test method
Returns:
tuple: (success, result_message, full_output_text, uncertain)
success: Boolean indicating if test passed
result_message: Human-readable result message
full_output_text: Full output for logging
uncertain: Boolean indicating if result is uncertain/ambiguous
"""
# Override for specific tests that have known authentication issues
auth_issue_tests = {
# CMS tests with auth issues
"TestCMS.cms_payroll_and_benefits": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_payroll_and_benefits_canada": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_payroll_and_benefits_usa": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_office_tech": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_travel": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_travel_navan_vs_genome": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_travel_contact_us": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_travel_cities": "This test requires special auth that's not available in the test environment.",
"TestCMS.cms_travel_navan": "This test requires special auth that's not available in the test environment.",
# Hub tests with auth issues
"HubMain.verify_people_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_launcher_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_pulse_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_news_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_chatter_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_ai_tools_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_chat_link": "This test requires special auth that's not available in the test environment.",
"HubMain.verify_agents_link": "This test requires special auth that's not available in the test environment.",
# Other known test issues
"HomepageNavigation.verify_navigation_sidebar_elements": "This test requires special auth that's not available in the test environment.",
"HomepageNavigation.verify_sidebar_elements": "This test requires special auth that's not available in the test environment.",
"HomepageNavigation.verify_create_task_functionality": "This test requires special auth that's not available in the test environment.",
"TasksSection.verify_task_list_display": "This test requires special auth that's not available in the test environment.",
"TasksSection.verify_task_search_functionality": "This test requires special auth that's not available in the test environment.",
"TasksSection.verify_task_settings_functionality": "This test requires special auth that's not available in the test environment."
}
# Initialize status to ERROR as a default
status = TestStatus.ERROR
# Check if we should skip this test due to known auth issues
test_id = f"{class_name}.{test_name}"
# First check direct test ID matches for SKIP
if test_id in auth_issue_tests:
print(f"NOTICE: Treating {test_id} as SKIPPED due to known authentication constraints")
status = TestStatus.SKIP
return True, f"SKIPPED: {auth_issue_tests[test_id]}", "This test requires special authentication and is auto-skipped.", True
# Extract message and set default success to False
message = result.message if hasattr(result, 'message') else "Test completed"
success = result.success if hasattr(result, 'success') else False
# Check for authentication issues in the message
if hasattr(result, 'message'):
if any(phrase in message.lower() for phrase in [
"security key",
"google login",
"google authentication",
"requires authentication",
"requires login",
"login required"
]):
print(f"NOTICE: Detected authentication requirement in test message. Marking as SKIPPED.")
status = TestStatus.SKIP
return True, f"SKIPPED: Test requires authentication that is unavailable in automated testing", message, True
# Check for API errors or timeouts
if "API Error" in message or "timeout" in message.lower() or "rate limit" in message.lower():
print(f"NOTICE: Detected API error or timeout in test message. Marking as ERROR.")
status = TestStatus.ERROR
# Leave success as False but with ERROR status
return False, f"ERROR: {message}", message, False
# Fix INCONCLUSIVE prefix if present
uncertain = False
if message.startswith("INCONCLUSIVE:") or "UNCERTAIN" in message.upper():
# Remove the prefix and trim
clean_message = message.replace("INCONCLUSIVE:", "").strip()
# Mark as uncertain
uncertain = True
message = clean_message
status = TestStatus.UNCERTAIN
# Check if it's actually a PASS despite being uncertain
if clean_message.startswith("PASSED") or clean_message.upper().startswith("PASS"):
success = True
elif "PASSED" in clean_message.upper() and not any(x in clean_message.upper() for x in ["NOT PASSED", "FAILED", "FAIL"]):
success = True
# Handle specific patterns that indicate success
if ("test passed" in message.lower() or
message.lower().startswith("passed") or
"passed:" in message.lower() or
"successfully completed" in message.lower() or
"all requirements are met" in message.lower() or
"requirements have been met" in message.lower()):
success = True
status = TestStatus.PASS
# If there are qualifiers like "partially" or "most", mark as uncertain
if any(qualifier in message.lower() for qualifier in ["partially", "most", "some", "unclear", "uncertain", "might", "may", "not sure"]):
uncertain = True
status = TestStatus.UNCERTAIN
# Handle specific patterns that indicate failure
if ("test failed" in message.lower() or
message.lower().startswith("failed") or
"failed:" in message.lower() or
"not all requirements are met" in message.lower() or
"requirements have not been met" in message.lower()):
success = False
status = TestStatus.FAIL
# Handle specific patterns that indicate error (not failure)
if ("error:" in message.lower() or
"exception" in message.lower() or
"crashed" in message.lower() or
"timeout" in message.lower()):
success = False
status = TestStatus.ERROR
# Detect uncertain answers
if ("could not determine" in message.lower() or
"unclear if" in message.lower() or
"couldn't verify" in message.lower() or
"not able to verify" in message.lower() or
"uncertain" in message.lower() or
"inconclusive" in message.lower() or
"partial" in message.lower() or
"maybe" in message.lower() or
"possibly" in message.lower()):
uncertain = True
status = TestStatus.UNCERTAIN
# Build full output text from conversation history
full_output = []
if hasattr(result, 'conversation_history') and result.conversation_history:
for i, item in enumerate(result.conversation_history):
role = item.get('role', 'unknown')
content = item.get('content', '')
item_type = item.get('type', 'message') # Get the type of content
if role == 'user':
# If too long, truncate
if len(content) > 100:
content = content[:100] + "..."
full_output.append(f"User: {content}")
elif role == 'assistant':
if item_type == 'reasoning':
# Format reasoning differently
full_output.append(f"Reasoning: {content}")
elif item_type == 'action':
# Format actions
full_output.append(f"Action: {content}")
else:
# Regular assistant messages
full_output.append(f"Assistant: {content}")
# Add separator
full_output.append("---")
else:
# Fallback if no conversation history
full_output.append(f"Result: {message}")
full_output_text = "\n".join(full_output)
# Prepend the status to the message for clarity
if status == TestStatus.PASS:
prefix = "PASS: "
elif status == TestStatus.FAIL:
prefix = "FAIL: "
elif status == TestStatus.ERROR:
prefix = "ERROR: "
elif status == TestStatus.SKIP:
prefix = "SKIP: "
elif status == TestStatus.UNCERTAIN:
prefix = "UNCERTAIN: "
else:
prefix = ""
# Only add prefix if not already there
if not message.startswith(prefix):
message = f"{prefix}{message}"
# Return the status, message, full output, and boolean flags
return (success, message, full_output_text, uncertain, status)
def format_task_from_csv_row(row: Dict[str, Any], host: str = None) -> str:
"""Format a task from a CSV row
Args:
row: Dictionary containing test data from CSV
host: Host URL to use for base_url
Returns:
str: Formatted task string
"""
class_name = row['class']
test_name = row['test']
base_url = row['base_url']
task = row['task']
# Get full URL
full_url = get_full_url(base_url, host=host) if host else base_url
# Create a task that clearly separates the requirements with better formatting
task_with_setup = f"""
Test: {class_name}.{test_name}
URL: {full_url}
TEST REQUIREMENTS:
-----------------
{task.strip()}
-----------------
Please follow all steps in the test requirements in order, and determine if the test passes or fails.
VERY IMPORTANT: For each action you take, please always provide your reasoning. Format your actions like this:
[REASONING] I'm clicking this button because it appears to be the login button that will take me to the dashboard.
[ACTION] *click on login button*
RESPONSE FORMAT:
1. First, write your thought process for determining if this test passes or fails
2. Consider each requirement and whether it was successfully completed
3. End your response with exactly "Test PASSED." or "Test FAILED." as appropriate
4. After this statement, add a brief explanation summarizing the key results
Example of a good response:
"I completed the following steps:
1. Navigated to the dashboard page
2. Verified the news card was present with 3 items
3. Confirmed the View All button existed and was clickable
Test PASSED. The dashboard showed all required components including the news card with items and functioning View All button."
"""
return task_with_setup
def get_full_url(base_url: str, host: str) -> str:
"""Convert a relative path to a full URL using the configured host
Args:
base_url: The URL or path to convert
host: Host URL to use
Returns:
str: The full URL including host if needed
"""
from urllib.parse import urlparse
# If it's already a full URL, return as is
if urlparse(base_url).scheme:
return base_url
# Strip leading slashes to avoid double slashes
path = base_url.lstrip('/')
# Join host and path
return f"{host}/{path}"
def load_auth_state(session_name: str, auth_dir: str = '.auth') -> Optional[Dict]:
"""Load authentication state from file
Args:
session_name: Name of the auth session
auth_dir: Directory to load auth state from
Returns:
Optional[Dict]: Authentication state or None if not found
"""
auth_file = Path(auth_dir) / f'{session_name}.json'
if not auth_file.exists():
return None
try:
with open(auth_file) as f:
return json.load(f)
except Exception as e:
print(f"Error loading auth state: {e}")
return None