OmniMCP

by OpenAdaptAI
Verified
# omnimcp/omniparser/mapper.py from typing import List, Dict, Any # Added Any from loguru import logger # Assuming types are imported correctly from omnimcp.types import UIElement, Bounds # Assuming Bounds is tuple (x,y,w,h) def map_omniparser_to_uielements( parser_json: Dict, img_width: int, img_height: int ) -> List[UIElement]: """Converts raw OmniParser JSON output to a list of UIElement objects.""" elements: List[UIElement] = [] element_id_counter = 0 # Adjust key if needed based on actual OmniParser output schema raw_elements: List[Dict[str, Any]] = parser_json.get("parsed_content_list", []) if not isinstance(raw_elements, list): logger.error( f"Expected 'parsed_content_list' to be a list, got: {type(raw_elements)}" ) return elements # Return empty list logger.info(f"Processing {len(raw_elements)} raw elements from OmniParser.") for item in raw_elements: try: if not isinstance(item, dict): logger.warning(f"Skipping non-dict item in parsed_content_list: {item}") continue # 1. Extract and validate bbox bbox_rel = item.get("bbox") if not isinstance(bbox_rel, list) or len(bbox_rel) != 4: logger.debug( f"Skipping element due to invalid/missing bbox: {item.get('content')}" ) continue # Skip elements without a valid bbox list # 2. Convert bbox to normalized (x, y, width, height) format and validate values x_min, y_min, x_max, y_max = bbox_rel x = float(x_min) y = float(y_min) w = float(x_max - x_min) h = float(y_max - y_min) # Check bounds validity (relative coords, positive w/h) # Allow zero coordinates but require positive width/height if not ( 0.0 <= x <= 1.0 and 0.0 <= y <= 1.0 and w > 0.0 and h > 0.0 and (x + w) <= 1.001 and (y + h) <= 1.001 ): # Add a small tolerance (0.001) for potential floating point inaccuracies near edges logger.warning( f"Skipping element due to invalid relative bounds values (x={x:.3f}, y={y:.3f}, w={w:.3f}, h={h:.3f}): {item.get('content')}" ) continue # Validate bounds # Optionally filter tiny elements based on absolute size min_pixel_size = 3 # Minimum width or height in pixels if (w * img_width < min_pixel_size) or (h * img_height < min_pixel_size): logger.debug( f"Skipping potentially tiny element (w={w * img_width:.1f}, h={h * img_height:.1f} px): {item.get('content')}" ) continue bounds: Bounds = (x, y, w, h) # 3. Extract and normalize type string element_type = str(item.get("type", "unknown")).lower().replace(" ", "_") # 4. Extract content content = str(item.get("content", "")) # 5. Create UIElement elements.append( UIElement( id=element_id_counter, type=element_type, content=content, bounds=bounds, confidence=float(item.get("confidence", 0.0)), attributes=item.get("attributes", {}) or {}, # Ensure it's a dict ) ) element_id_counter += 1 except (ValueError, TypeError, KeyError) as e: logger.warning( f"Skipping element due to mapping error: {item.get('content')} - Error: {e}" ) except Exception as unexpected_e: # Catch any other unexpected errors during item processing logger.error( f"Unexpected error mapping element: {item.get('content')} - {unexpected_e}", exc_info=True, ) logger.info( f"Successfully mapped {len(elements)} UIElements from OmniParser response." ) return elements