OmniMCP

OmniMCP
omnimcp
omniparser

# omnimcp/omniparser/mapper.py

from typing import List, Dict, Any  # Added Any

from loguru import logger

# Assuming types are imported correctly
from omnimcp.types import UIElement, Bounds  # Assuming Bounds is tuple (x,y,w,h)


def map_omniparser_to_uielements(
    parser_json: Dict, img_width: int, img_height: int
) -> List[UIElement]:
    """Converts raw OmniParser JSON output to a list of UIElement objects."""
    elements: List[UIElement] = []
    element_id_counter = 0
    # Adjust key if needed based on actual OmniParser output schema
    raw_elements: List[Dict[str, Any]] = parser_json.get("parsed_content_list", [])

    if not isinstance(raw_elements, list):
        logger.error(
            f"Expected 'parsed_content_list' to be a list, got: {type(raw_elements)}"
        )
        return elements  # Return empty list

    logger.info(f"Processing {len(raw_elements)} raw elements from OmniParser.")

    for item in raw_elements:
        try:
            if not isinstance(item, dict):
                logger.warning(f"Skipping non-dict item in parsed_content_list: {item}")
                continue

            # 1. Extract and validate bbox
            bbox_rel = item.get("bbox")
            if not isinstance(bbox_rel, list) or len(bbox_rel) != 4:
                logger.debug(
                    f"Skipping element due to invalid/missing bbox: {item.get('content')}"
                )
                continue  # Skip elements without a valid bbox list

            # 2. Convert bbox to normalized (x, y, width, height) format and validate values
            x_min, y_min, x_max, y_max = bbox_rel
            x = float(x_min)
            y = float(y_min)
            w = float(x_max - x_min)
            h = float(y_max - y_min)

            # Check bounds validity (relative coords, positive w/h)
            # Allow zero coordinates but require positive width/height
            if not (
                0.0 <= x <= 1.0
                and 0.0 <= y <= 1.0
                and w > 0.0
                and h > 0.0
                and (x + w) <= 1.001
                and (y + h) <= 1.001
            ):
                # Add a small tolerance (0.001) for potential floating point inaccuracies near edges
                logger.warning(
                    f"Skipping element due to invalid relative bounds values (x={x:.3f}, y={y:.3f}, w={w:.3f}, h={h:.3f}): {item.get('content')}"
                )
                continue  # Validate bounds

            # Optionally filter tiny elements based on absolute size
            min_pixel_size = 3  # Minimum width or height in pixels
            if (w * img_width < min_pixel_size) or (h * img_height < min_pixel_size):
                logger.debug(
                    f"Skipping potentially tiny element (w={w * img_width:.1f}, h={h * img_height:.1f} px): {item.get('content')}"
                )
                continue

            bounds: Bounds = (x, y, w, h)

            # 3. Extract and normalize type string
            element_type = str(item.get("type", "unknown")).lower().replace(" ", "_")

            # 4. Extract content
            content = str(item.get("content", ""))

            # 5. Create UIElement
            elements.append(
                UIElement(
                    id=element_id_counter,
                    type=element_type,
                    content=content,
                    bounds=bounds,
                    confidence=float(item.get("confidence", 0.0)),
                    attributes=item.get("attributes", {}) or {},  # Ensure it's a dict
                )
            )
            element_id_counter += 1

        except (ValueError, TypeError, KeyError) as e:
            logger.warning(
                f"Skipping element due to mapping error: {item.get('content')} - Error: {e}"
            )
        except Exception as unexpected_e:
            # Catch any other unexpected errors during item processing
            logger.error(
                f"Unexpected error mapping element: {item.get('content')} - {unexpected_e}",
                exc_info=True,
            )

    logger.info(
        f"Successfully mapped {len(elements)} UIElements from OmniParser response."
    )
    return elements

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OpenAdaptAI/OmniMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server