Skip to main content
Glama

OmniMCP

by OpenAdaptAI
# tests/test_omnimcp_core.py import pytest from unittest.mock import patch from PIL import Image # Corrected imports based on file moves from omnimcp.visual_state import VisualState # Removed: from omnimcp.mcp_server import OmniMCP (no longer used in this file) from omnimcp.synthetic_ui import generate_login_screen # --- Fixtures --- # Mock OmniParserClient for testing VisualState without real API calls class MockOmniParserClient: def __init__(self, mock_response: dict): self.mock_response = mock_response self.server_url = "http://mock-parser.test" def parse_image(self, image: Image.Image) -> dict: # Simulate returning the mock response regardless of image input return self.mock_response @pytest.fixture def synthetic_ui_data(): """ Generates synthetic UI data and formats it like the expected parser response. Returns: (PIL.Image, dict_parser_response, list_of_expected_parser_dicts) """ img, elements_obj_list = generate_login_screen() # Convert UIElement objects to dicts matching expected OmniParser JSON structure mock_parser_list = [] for el in elements_obj_list: x, y, w, h = el.bounds x_min, y_min, x_max, y_max = x, y, x + w, y + h # Create dict matching expected parser structure key "bbox" and list format parser_dict = { "bbox": [x_min, y_min, x_max, y_max], # Use "bbox" key with list "content": el.content, "type": el.type, "confidence": el.confidence, # Ensure confidence is included "attributes": el.attributes, } mock_parser_list.append(parser_dict) # The final structure expected by VisualState._update_elements_from_parser mock_parser_response = {"parsed_content_list": mock_parser_list} return img, mock_parser_response, mock_parser_list @pytest.fixture def mock_parser_client(synthetic_ui_data): """Provides a MockOmniParserClient instance with synthetic data.""" _, mock_parser_response, _ = synthetic_ui_data return MockOmniParserClient(mock_parser_response) # --- Tests --- # TODO: Add test for VisualState initialization failure if parser client is None/invalid. # def test_visual_state_initialization_fails_without_client(): # with pytest.raises(ValueError): # VisualState(parser_client=None) def test_visual_state_parsing(synthetic_ui_data, mock_parser_client): """Test VisualState.update processes elements from the (mocked) parser client.""" test_img, _, expected_elements_list_of_dicts = synthetic_ui_data # Patch take_screenshot used within visual_state.update # Target the function where it's looked up (in the visual_state module) with patch("omnimcp.visual_state.take_screenshot", return_value=test_img): vs = VisualState(parser_client=mock_parser_client) vs.update() # Trigger screenshot mock and parsing mock # Assertions assert vs._last_screenshot == test_img assert vs.screen_dimensions == test_img.size assert len(vs.elements) == len(expected_elements_list_of_dicts) # Compare element details (convert actual UIElements back to dicts for comparison) actual_elements_dicts = [elem.to_dict() for elem in vs.elements] # Basic check: Ensure IDs are sequential starting from 0 assert all( actual_elements_dicts[i]["id"] == i for i in range(len(actual_elements_dicts)) ) # Compare content based on expected list (ignoring ID for comparison) expected_contents = { (d["type"], d["content"]) for d in expected_elements_list_of_dicts } actual_contents = {(d["type"], d["content"]) for d in actual_elements_dicts} assert actual_contents == expected_contents def test_element_finding(synthetic_ui_data, mock_parser_client): """Test VisualState.find_element locates elements using basic matching.""" test_img, _, _ = synthetic_ui_data # Patch take_screenshot used within visual_state.update with patch("omnimcp.visual_state.take_screenshot", return_value=test_img): vs = VisualState(parser_client=mock_parser_client) vs.update() # Populate elements # TODO: Improve find_element logic and add more robust tests here. # Current matching is very basic keyword search. # Test finding existing elements login_button = vs.find_element("Login button") assert login_button is not None assert login_button.type == "button" assert login_button.content == "Login" username_field = vs.find_element("username text field") assert username_field is not None assert username_field.type == "text_field" # Expect empty string for initial content of the username field assert username_field.content == "" # Test finding non-existent element non_existent = vs.find_element("non_existent element foobar") assert non_existent is None # Test finding based only on type (might be ambiguous) a_button = vs.find_element("button") assert a_button is not None # Should find *a* button

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OpenAdaptAI/OmniMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server