OmniMCP

OmniMCP
omnimcp

synthetic_ui.py•20.3 kB

# omnimcp/synthetic_ui.py import os from typing import List, Tuple, Any from PIL import Image, ImageDraw, ImageFont, ImageEnhance import copy # For deep copying element list from .types import UIElement, Bounds from .utils import logger # --- Constants and Font --- IMG_WIDTH, IMG_HEIGHT = 800, 600 try: FONT = ImageFont.truetype("arial.ttf", 15) FONT_BOLD = ImageFont.truetype("arialbd.ttf", 20) # Added bold font except IOError: logger.warning("Arial fonts not found. Using default PIL font.") FONT = ImageFont.load_default() FONT_BOLD = ImageFont.load_default() # --- Coordinate Conversion --- def _bounds_to_abs(bounds: Bounds) -> Tuple[int, int, int, int]: """Convert normalized bounds to absolute pixel coordinates.""" x, y, w, h = bounds abs_x = int(x * IMG_WIDTH) abs_y = int(y * IMG_HEIGHT) abs_w = int(w * IMG_WIDTH) abs_h = int(h * IMG_HEIGHT) return abs_x, abs_y, abs_w, abs_h def _abs_to_bounds(abs_coords: Tuple[int, int, int, int]) -> Bounds: """Convert absolute pixel coordinates to normalized bounds.""" abs_x, abs_y, abs_w, abs_h = abs_coords x = abs_x / IMG_WIDTH y = abs_y / IMG_HEIGHT w = abs_w / IMG_WIDTH h = abs_h / IMG_HEIGHT return x, y, w, h # --- UI Generation --- def generate_login_screen( save_path: str | None = None, ) -> Tuple[Image.Image, List[UIElement]]: """Generates the initial synthetic login screen image and element data.""" img = Image.new("RGB", (IMG_WIDTH, IMG_HEIGHT), color=(230, 230, 230)) draw = ImageDraw.Draw(img) elements: List[UIElement] = [] element_id_counter = 0 # Title title_text = "Welcome Back!" title_bbox = draw.textbbox((0, 0), title_text, font=FONT) title_w, _title_h = title_bbox[2] - title_bbox[0], title_bbox[3] - title_bbox[1] title_x, title_y = (IMG_WIDTH - title_w) / 2, 80 draw.text((title_x, title_y), title_text, fill="black", font=FONT) # Username Field uname_label_y, uname_x, uname_w, uname_h = 150, 200, 400, 40 uname_field_y = uname_label_y + 25 draw.text((uname_x, uname_label_y), "Username:", fill="black", font=FONT) draw.rectangle( [(uname_x, uname_field_y), (uname_x + uname_w, uname_field_y + uname_h)], fill="white", outline="black", ) elements.append( UIElement( id=element_id_counter, type="text_field", content="", bounds=_abs_to_bounds((uname_x, uname_field_y, uname_w, uname_h)), attributes={"label": "Username:"}, # Store label for potential use ) ) element_id_counter += 1 # Password Field pw_label_y = uname_field_y + uname_h + 20 pw_x, pw_w, pw_h = 200, 400, 40 pw_field_y = pw_label_y + 25 draw.text((pw_x, pw_label_y), "Password:", fill="black", font=FONT) draw.rectangle( [(pw_x, pw_field_y), (pw_x + pw_w, pw_field_y + pw_h)], fill="white", outline="black", ) elements.append( UIElement( id=element_id_counter, type="text_field", content="", bounds=_abs_to_bounds((pw_x, pw_field_y, pw_w, pw_h)), attributes={"is_password": True, "label": "Password:"}, ) ) element_id_counter += 1 # Remember Me Checkbox cb_y = pw_field_y + pw_h + 30 cb_x, cb_size = 200, 20 cb_text_x = cb_x + cb_size + 10 draw.rectangle( [(cb_x, cb_y), (cb_x + cb_size, cb_y + cb_size)], fill="white", outline="black" ) draw.text((cb_text_x, cb_y + 2), "Remember Me", fill="black", font=FONT) elements.append( UIElement( id=element_id_counter, type="checkbox", content="Remember Me", bounds=_abs_to_bounds((cb_x, cb_y, cb_size, cb_size)), attributes={"checked": False}, ) ) element_id_counter += 1 # Forgot Password Link fp_text = "Forgot Password?" fp_bbox = draw.textbbox((0, 0), fp_text, font=FONT) fp_w, fp_h = fp_bbox[2] - fp_bbox[0], fp_bbox[3] - fp_bbox[1] fp_x, fp_y = pw_x + pw_w - fp_w, cb_y + 5 draw.text((fp_x, fp_y), fp_text, fill="blue", font=FONT) elements.append( UIElement( id=element_id_counter, type="link", content="Forgot Password?", bounds=_abs_to_bounds((fp_x, fp_y, fp_w, fp_h)), ) ) element_id_counter += 1 # Login Button btn_y = cb_y + cb_size + 40 btn_w, btn_h = 120, 45 btn_x = (IMG_WIDTH - btn_w) / 2 draw.rectangle( [(btn_x, btn_y), (btn_x + btn_w, btn_y + btn_h)], fill="green", outline="black" ) btn_text = "Login" btn_bbox = draw.textbbox((0, 0), btn_text, font=FONT) btn_text_w, btn_text_h = btn_bbox[2] - btn_bbox[0], btn_bbox[3] - btn_bbox[1] draw.text( (btn_x + (btn_w - btn_text_w) / 2, btn_y + (btn_h - btn_text_h) / 2), btn_text, fill="white", font=FONT, ) elements.append( UIElement( id=element_id_counter, type="button", content="Login", bounds=_abs_to_bounds((btn_x, btn_y, btn_w, btn_h)), ) ) element_id_counter += 1 if save_path: os.makedirs(os.path.dirname(os.path.abspath(save_path)), exist_ok=True) img.save(save_path) logger.info(f"Saved synthetic UI to {save_path}") return img, elements def generate_logged_in_screen( username: str, save_path: str | None = None ) -> Tuple[Image.Image, List[UIElement]]: """Generates a simple 'logged in' screen.""" img = Image.new( "RGB", (IMG_WIDTH, IMG_HEIGHT), color=(210, 230, 210) ) # Light green background draw = ImageDraw.Draw(img) elements: List[UIElement] = [] element_id_counter = 0 # Start fresh IDs for new screen state # Welcome Message welcome_text = f"Welcome, {username}!" welcome_bbox = draw.textbbox((0, 0), welcome_text, font=FONT_BOLD) welcome_w, welcome_h = ( welcome_bbox[2] - welcome_bbox[0], welcome_bbox[3] - welcome_bbox[1], ) welcome_x, welcome_y = (IMG_WIDTH - welcome_w) / 2, 200 draw.text((welcome_x, welcome_y), welcome_text, fill="darkgreen", font=FONT_BOLD) elements.append( UIElement( id=element_id_counter, type="text", content=welcome_text, bounds=_abs_to_bounds( (int(welcome_x), int(welcome_y), welcome_w, welcome_h) ), attributes={"is_heading": True}, ) ) element_id_counter += 1 # Logout Button btn_y = welcome_y + welcome_h + 50 btn_w, btn_h = 120, 45 btn_x = (IMG_WIDTH - btn_w) / 2 draw.rectangle( [(btn_x, btn_y), (btn_x + btn_w, btn_y + btn_h)], fill="orange", outline="black" ) btn_text = "Logout" btn_bbox = draw.textbbox((0, 0), btn_text, font=FONT) btn_text_w, btn_text_h = btn_bbox[2] - btn_bbox[0], btn_bbox[3] - btn_bbox[1] draw.text( (btn_x + (btn_w - btn_text_w) / 2, btn_y + (btn_h - btn_text_h) / 2), btn_text, fill="black", font=FONT, ) elements.append( UIElement( id=element_id_counter, type="button", content="Logout", bounds=_abs_to_bounds((int(btn_x), int(btn_y), btn_w, btn_h)), ) ) element_id_counter += 1 if save_path: os.makedirs(os.path.dirname(os.path.abspath(save_path)), exist_ok=True) img.save(save_path) logger.info(f"Saved 'Logged In' screen to {save_path}") return img, elements # --- Simulation Logic --- def simulate_action( image: Image.Image, elements: List[UIElement], plan: Any, # Using Any to avoid circular import with core.py/LLMActionPlan username_for_login: str = "User", # Default username for welcome screen ) -> Tuple[Image.Image, List[UIElement]]: """ Simulates the effect of a planned action on the synthetic UI state. Args: image: The current PIL Image. elements: The current list of UIElements. plan: The LLMActionPlan object for the action to simulate. username_for_login: Username to display on successful login screen. Returns: A tuple containing the new (PIL Image, List[UIElement]) after simulation. Returns the original state if action cannot be simulated. """ if plan.is_goal_complete: logger.info("Goal is marked complete, no simulation needed for this step.") # Return the *current* state without modification # Use deepcopy only if downstream might modify elements accidentally return image, copy.deepcopy( elements ) # Or just 'return image, elements' if mutation isn't a risk logger.debug(f"Simulating action: {plan.action} on element {plan.element_id}") new_image = image.copy() # IMPORTANT: Deep copy elements to avoid modifying previous steps' state new_elements = copy.deepcopy(elements) draw = ImageDraw.Draw(new_image) target_element = next((el for el in new_elements if el.id == plan.element_id), None) if not target_element: logger.warning(f"Simulation failed: Element ID {plan.element_id} not found.") return image, elements # Return original state action = plan.action element_type = target_element.type try: # --- Simulate TYPE action --- if action == "type": if element_type == "text_field" and plan.text_to_type is not None: text_to_draw = plan.text_to_type target_element.content = text_to_draw # Update element data abs_x, abs_y, abs_w, abs_h = _bounds_to_abs(target_element.bounds) # Mask password text for drawing if target_element.attributes.get("is_password"): text_to_draw = "*" * len(text_to_draw) # Erase previous content by drawing background color draw.rectangle( [(abs_x, abs_y), (abs_x + abs_w, abs_y + abs_h)], fill="white", outline="black", ) # Draw new text (vertically centered) text_bbox = draw.textbbox((0, 0), text_to_draw, font=FONT) text_h = text_bbox[3] - text_bbox[1] draw.text( (abs_x + 5, abs_y + (abs_h - text_h) / 2), text_to_draw, fill="black", font=FONT, ) logger.info( f"Simulated typing '{plan.text_to_type}' into element {target_element.id}" ) return new_image, new_elements else: logger.warning( f"Cannot simulate 'type' on element type '{element_type}' or missing text." ) return image, elements # --- Simulate CLICK action --- elif action == "click": # Click on Login Button if element_type == "button" and target_element.content == "Login": # Basic check: assume login succeeds if both fields have content username_filled = any(el.id == 0 and el.content for el in new_elements) password_filled = any(el.id == 1 and el.content for el in new_elements) if username_filled and password_filled: logger.info("Simulating successful login transition.") # Transition to logged-in screen # Get username from element 0 content for personalization login_username = next( (el.content for el in new_elements if el.id == 0), username_for_login, ) return generate_logged_in_screen( username=login_username ) # Return new screen state else: logger.warning( "Simulating login click, but fields not filled. No state change." ) # Optionally: Add an error message element to the current screen return image, elements # No state change if fields empty # Click on Checkbox elif element_type == "checkbox": is_checked = target_element.attributes.get("checked", False) target_element.attributes["checked"] = not is_checked # Toggle state abs_x, abs_y, abs_w, abs_h = _bounds_to_abs(target_element.bounds) # Re-draw checkbox draw.rectangle( [(abs_x, abs_y), (abs_x + abs_w, abs_y + abs_h)], fill="white", outline="black", ) if not is_checked: # Draw checkmark if it's now checked draw.line( [ (abs_x + 2, abs_y + abs_h // 2), (abs_x + abs_w // 2, abs_y + abs_h - 2), ], fill="black", width=2, ) draw.line( [ (abs_x + abs_w // 2, abs_y + abs_h - 2), (abs_x + abs_w - 2, abs_y + 2), ], fill="black", width=2, ) logger.info( f"Simulated clicking checkbox {target_element.id}. New state: checked={not is_checked}" ) return new_image, new_elements # Click on Link / Other Buttons (add more simulation logic here if needed) elif ( element_type == "link" and target_element.content == "Forgot Password?" ): logger.info( "Simulated clicking 'Forgot Password?' link. (No visual state change implemented)." ) # Could transition to another screen if desired return image, elements # No state change for now else: logger.warning( f"Simulation for clicking element type '{element_type}' with content '{target_element.content}' not fully implemented." ) return image, elements # No change if click simulation not defined else: logger.warning(f"Action type '{action}' simulation not implemented.") return image, elements # Return original state if action unknown except Exception as e: logger.error(f"Error during simulation: {e}", exc_info=True) return image, elements # Return original state on error # --- Visualization --- def draw_highlight( image: Image.Image, element: UIElement, plan: Any, # Add plan object (can use 'LLMActionPlan' if imported or from typing import Any) color: str = "lime", width: int = 3, dim_factor: float = 0.5, text_color: str = "black", # Color for annotation text text_bg_color: Tuple[int, int, int, int] = ( 255, 255, 255, 200, ), # Semi-transparent white bg for text ) -> Image.Image: """ Draws highlight box, dims background, and adds text annotation for the planned action. Args: image: The source PIL Image. element: The UIElement to highlight. plan: The LLMActionPlan object containing the planned action details. color: The color of the highlight box. width: The line width of the highlight box. dim_factor: Factor to reduce brightness of non-highlighted areas. text_color: Color for the annotation text. text_bg_color: Background color for the annotation text. Returns: A new PIL Image with the effects. """ if not element or not hasattr(element, "bounds") or not plan: logger.warning( "Attempted to draw highlight/text for invalid element or missing plan." ) return image.copy() final_image = image.copy() try: abs_x, abs_y, abs_w, abs_h = _bounds_to_abs(element.bounds) element_box = (abs_x, abs_y, abs_x + abs_w, abs_y + abs_h) # --- Apply Dimming --- if 0.0 <= dim_factor < 1.0: enhancer = ImageEnhance.Brightness(final_image) dimmed_image = enhancer.enhance(dim_factor) crop_box = ( max(0, element_box[0]), max(0, element_box[1]), min(image.width, element_box[2]), min(image.height, element_box[3]), ) if crop_box[0] < crop_box[2] and crop_box[1] < crop_box[3]: original_element_area = image.crop(crop_box) dimmed_image.paste(original_element_area, (crop_box[0], crop_box[1])) final_image = dimmed_image else: logger.warning( f"Invalid crop box {crop_box} for element {element.id}. Skipping paste." ) final_image = dimmed_image # --- Draw Highlight Box --- draw = ImageDraw.Draw(final_image) draw.rectangle( [(element_box[0], element_box[1]), (element_box[2], element_box[3])], outline=color, width=width, ) # --- Add Text Annotation --- try: # Construct text based on plan action_text = str(plan.action).capitalize() if plan.action == "type" and plan.text_to_type is not None: # Truncate long text for display text_preview = ( (plan.text_to_type[:20] + "...") if len(plan.text_to_type) > 23 else plan.text_to_type ) annotation_text = f"Next: {action_text} '{text_preview}'" else: annotation_text = f"Next: {action_text}" # Optionally add element content: # content_preview = (element.content[:15] + '...') if len(element.content) > 18 else element.content # if content_preview: annotation_text += f" '{content_preview}'" # Calculate text position (prefer placing above the box) margin = 5 text_bbox = draw.textbbox((0, 0), annotation_text, font=FONT) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] # Center horizontally above box, clamp to image bounds text_x = max( margin, min( abs_x + (abs_w - text_width) // 2, image.width - text_width - margin, # Ensure right edge fits ), ) # Position above box, clamp to image bounds (top edge) text_y = max(margin, abs_y - text_height - margin) # Optional: Draw background rectangle for text readability bg_x0 = text_x - margin // 2 bg_y0 = text_y - margin // 2 bg_x1 = text_x + text_width + margin // 2 bg_y1 = text_y + text_height + margin // 2 # Ensure background rect is within image bounds bg_x0, bg_y0 = max(0, bg_x0), max(0, bg_y0) bg_x1, bg_y1 = min(final_image.width, bg_x1), min(final_image.height, bg_y1) if bg_x0 < bg_x1 and bg_y0 < bg_y1: # Draw only if valid rect draw.rectangle([(bg_x0, bg_y0), (bg_x1, bg_y1)], fill=text_bg_color) # Draw the text draw.text((text_x, text_y), annotation_text, fill=text_color, font=FONT) except Exception as text_e: logger.warning(f"Failed to draw text annotation: {text_e}") # --- End Text Annotation --- except Exception as e: logger.error( f"Failed during drawing highlight/dimming/text for element {getattr(element, 'id', 'N/A')}: {e}", exc_info=True, ) return image.copy() return final_image

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OpenAdaptAI/OmniMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server