OmniMCP

OmniMCP
tests

# tests/test_omnimcp_core.py

import pytest
from unittest.mock import patch
from PIL import Image

# Corrected imports based on file moves
from omnimcp.visual_state import VisualState

# Removed: from omnimcp.mcp_server import OmniMCP (no longer used in this file)
from omnimcp.synthetic_ui import generate_login_screen


# --- Fixtures ---


# Mock OmniParserClient for testing VisualState without real API calls
class MockOmniParserClient:
    def __init__(self, mock_response: dict):
        self.mock_response = mock_response
        self.server_url = "http://mock-parser.test"

    def parse_image(self, image: Image.Image) -> dict:
        # Simulate returning the mock response regardless of image input
        return self.mock_response


@pytest.fixture
def synthetic_ui_data():
    """
    Generates synthetic UI data and formats it like the expected parser response.
    Returns: (PIL.Image, dict_parser_response, list_of_expected_parser_dicts)
    """
    img, elements_obj_list = generate_login_screen()

    # Convert UIElement objects to dicts matching expected OmniParser JSON structure
    mock_parser_list = []
    for el in elements_obj_list:
        x, y, w, h = el.bounds
        x_min, y_min, x_max, y_max = x, y, x + w, y + h
        # Create dict matching expected parser structure key "bbox" and list format
        parser_dict = {
            "bbox": [x_min, y_min, x_max, y_max],  # Use "bbox" key with list
            "content": el.content,
            "type": el.type,
            "confidence": el.confidence,  # Ensure confidence is included
            "attributes": el.attributes,
        }
        mock_parser_list.append(parser_dict)

    # The final structure expected by VisualState._update_elements_from_parser
    mock_parser_response = {"parsed_content_list": mock_parser_list}

    return img, mock_parser_response, mock_parser_list


@pytest.fixture
def mock_parser_client(synthetic_ui_data):
    """Provides a MockOmniParserClient instance with synthetic data."""
    _, mock_parser_response, _ = synthetic_ui_data
    return MockOmniParserClient(mock_parser_response)


# --- Tests ---

# TODO: Add test for VisualState initialization failure if parser client is None/invalid.
# def test_visual_state_initialization_fails_without_client():
#     with pytest.raises(ValueError):
#         VisualState(parser_client=None)


def test_visual_state_parsing(synthetic_ui_data, mock_parser_client):
    """Test VisualState.update processes elements from the (mocked) parser client."""
    test_img, _, expected_elements_list_of_dicts = synthetic_ui_data

    # Patch take_screenshot used within visual_state.update
    # Target the function where it's looked up (in the visual_state module)
    with patch("omnimcp.visual_state.take_screenshot", return_value=test_img):
        vs = VisualState(parser_client=mock_parser_client)
        vs.update()  # Trigger screenshot mock and parsing mock

    # Assertions
    assert vs._last_screenshot == test_img
    assert vs.screen_dimensions == test_img.size
    assert len(vs.elements) == len(expected_elements_list_of_dicts)

    # Compare element details (convert actual UIElements back to dicts for comparison)
    actual_elements_dicts = [elem.to_dict() for elem in vs.elements]

    # Basic check: Ensure IDs are sequential starting from 0
    assert all(
        actual_elements_dicts[i]["id"] == i for i in range(len(actual_elements_dicts))
    )

    # Compare content based on expected list (ignoring ID for comparison)
    expected_contents = {
        (d["type"], d["content"]) for d in expected_elements_list_of_dicts
    }
    actual_contents = {(d["type"], d["content"]) for d in actual_elements_dicts}
    assert actual_contents == expected_contents


def test_element_finding(synthetic_ui_data, mock_parser_client):
    """Test VisualState.find_element locates elements using basic matching."""
    test_img, _, _ = synthetic_ui_data

    # Patch take_screenshot used within visual_state.update
    with patch("omnimcp.visual_state.take_screenshot", return_value=test_img):
        vs = VisualState(parser_client=mock_parser_client)
        vs.update()  # Populate elements

    # TODO: Improve find_element logic and add more robust tests here.
    # Current matching is very basic keyword search.

    # Test finding existing elements
    login_button = vs.find_element("Login button")
    assert login_button is not None
    assert login_button.type == "button"
    assert login_button.content == "Login"

    username_field = vs.find_element("username text field")
    assert username_field is not None
    assert username_field.type == "text_field"
    # Expect empty string for initial content of the username field
    assert username_field.content == ""

    # Test finding non-existent element
    non_existent = vs.find_element("non_existent element foobar")
    assert non_existent is None

    # Test finding based only on type (might be ambiguous)
    a_button = vs.find_element("button")
    assert a_button is not None  # Should find *a* button

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OpenAdaptAI/OmniMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server