en es ja ko zh

OmniMCP

# demo_synthetic.py
"""
OmniMCP Demo: Synthetic Perception -> LLM Planner -> Synthetic Action Validation.
Generates UI images and simulates the loop without real screen interaction.
"""

import os
import time
from typing import List, Optional

# Import necessary components from the project
from omnimcp.synthetic_ui import (
    generate_login_screen,
    simulate_action,
    draw_highlight,  # Use the original draw_highlight from synthetic_ui
)
from omnimcp.core import plan_action_for_ui, LLMActionPlan
from omnimcp.utils import logger
from omnimcp.types import UIElement

# NOTE ON REFACTORING:
# The main loop structure in this script (run_synthetic_planner_demo) is similar
# to the core logic now encapsulated in `omnimcp.agent_executor.AgentExecutor`.
# In the future, this synthetic demo could be refactored to:
# 1. Create synthetic implementations of the PerceptionInterface and ExecutionInterface.
# 2. Instantiate AgentExecutor with these synthetic components.
# 3. Call `agent_executor.run(...)`.
# This would further consolidate the core loop logic and allow testing the
# AgentExecutor orchestration with controlled, synthetic inputs/outputs.
# For now, this script remains separate to demonstrate the synthetic setup
# independently.


# --- Configuration ---
OUTPUT_DIR = "demo_output_multistep"
SAVE_IMAGES = True
MAX_STEPS = 6


def run_synthetic_planner_demo():
    """Runs the multi-step OmniMCP demo using synthetic UI and LLM planning."""
    logger.info("--- Starting OmniMCP Multi-Step Synthetic Demo ---")
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # 1. Initial State & Goal
    logger.info("Generating initial login screen...")
    try:
        # Use save_path to ensure initial image is saved
        image, elements = generate_login_screen(
            save_path=os.path.join(OUTPUT_DIR, "step_0_state_initial.png")
        )
    except Exception as e:
        logger.error(f"Failed to generate initial screen: {e}", exc_info=True)
        return

    user_goal = "Log in using username 'testuser' and password 'password123'"
    logger.info(f"User Goal: '{user_goal}'")

    action_history: List[str] = []
    goal_achieved_flag = False
    last_step_completed = -1

    # --- Main Loop ---
    for step in range(MAX_STEPS):
        logger.info(f"\n--- Step {step + 1}/{MAX_STEPS} ---")
        step_img_prefix = f"step_{step + 1}"

        # Save/Show current state *before* planning/highlighting
        current_state_img_path = os.path.join(
            OUTPUT_DIR, f"{step_img_prefix}_state.png"
        )
        if SAVE_IMAGES:
            try:
                image.save(current_state_img_path)
                logger.info(f"Saved current state to {current_state_img_path}")
            except Exception as save_e:
                logger.warning(f"Could not save step state image: {save_e}")

        # 2. Plan Next Action
        logger.info("Planning action with LLM...")
        llm_plan: Optional[LLMActionPlan] = None
        target_element: Optional[UIElement] = None
        try:
            llm_plan, target_element = plan_action_for_ui(
                elements=elements,
                user_goal=user_goal,
                action_history=action_history,
                step=step,
            )

            logger.info(f"LLM Reasoning: {llm_plan.reasoning}")
            logger.info(
                f"LLM Proposed Action: {llm_plan.action} on Element ID: {llm_plan.element_id}"
            )
            if llm_plan.text_to_type:
                logger.info(f"Text to Type: '{llm_plan.text_to_type}'")
            if llm_plan.key_info:
                logger.info(f"Key Info: '{llm_plan.key_info}'")
            logger.info(f"LLM Goal Complete Assessment: {llm_plan.is_goal_complete}")

            # 3. Check for Goal Completion Flag
            if llm_plan.is_goal_complete:
                logger.info(
                    "LLM flag indicates goal should be complete after this action."
                )
                goal_achieved_flag = True

            # --- Updated Validation Check ---
            if not goal_achieved_flag:
                if llm_plan.action == "click" and not target_element:
                    logger.error(
                        f"LLM planned 'click' on invalid element ID ({llm_plan.element_id}). Stopping."
                    )
                    break

            # 4. Visualize Planned Action (uses synthetic_ui.draw_highlight)
            highlight_img_path = os.path.join(
                OUTPUT_DIR, f"{step_img_prefix}_highlight.png"
            )
            if target_element:
                try:
                    highlighted_image = draw_highlight(
                        image,
                        target_element,
                        plan=llm_plan,
                        color="lime",
                        width=4,
                    )
                    if SAVE_IMAGES:
                        highlighted_image.save(highlight_img_path)
                        logger.info(
                            f"Saved highlighted action with text to {highlight_img_path}"
                        )
                except Exception as draw_e:
                    logger.warning(f"Could not save highlight image: {draw_e}")
            else:
                # For non-element actions like press_key, still save an image showing the state
                # before the action, potentially adding text annotation later if needed.
                if SAVE_IMAGES:
                    try:
                        image.save(
                            highlight_img_path.replace(
                                "_highlight.png", "_state_before_no_highlight.png"
                            )
                        )
                        logger.info("No target element, saved pre-action state.")
                    except Exception as save_e:
                        logger.warning(
                            f"Could not save pre-action state image: {save_e}"
                        )

            # Record action for history *before* simulation changes state
            action_desc = f"Action: {llm_plan.action}"
            if llm_plan.text_to_type:
                action_desc += f" '{llm_plan.text_to_type}'"
            if llm_plan.key_info:
                action_desc += f" Key='{llm_plan.key_info}'"
            if target_element:
                action_desc += (
                    f" on Element ID {target_element.id} ('{target_element.content}')"
                )
            action_history.append(action_desc)
            logger.debug(f"Added to history: {action_desc}")

            # 5. Simulate Action -> Get New State
            logger.info("Simulating action...")
            username = next(
                (
                    el.content
                    for el in elements
                    if el.id == 0 and el.type == "text_field"
                ),
                "User",
            )

            new_image, new_elements = simulate_action(
                image, elements, llm_plan, username_for_login=username
            )

            state_changed = (
                (id(new_image) != id(image))
                or (len(elements) != len(new_elements))
                or any(
                    e1.to_dict() != e2.to_dict()
                    for e1, e2 in zip(elements, new_elements)
                )
            )

            image, elements = new_image, new_elements

            if state_changed:
                logger.info(
                    f"State updated for next step. New element count: {len(elements)}"
                )
            else:
                logger.warning(
                    "Simulation did not result in a detectable state change."
                )

            last_step_completed = step

            # 6. NOW check the flag to break *after* simulation
            if goal_achieved_flag:
                logger.success(
                    "Goal completion flag was set, ending loop after simulation."
                )
                break

            time.sleep(1)

        except Exception as e:
            logger.error(f"Error during step {step + 1}: {e}", exc_info=True)
            break

    # --- End of Loop ---
    logger.info("\n--- Multi-Step Synthetic Demo Finished ---")
    if goal_achieved_flag:
        logger.success("Overall goal marked as achieved by LLM during execution.")
    elif last_step_completed == MAX_STEPS - 1:
        logger.warning(
            f"Reached maximum steps ({MAX_STEPS}) without goal completion flag being set."
        )
    else:
        logger.error(
            f"Execution stopped prematurely after Step {last_step_completed + 1} (check logs)."
        )

    # Save final state
    final_state_img_path = os.path.join(OUTPUT_DIR, "final_state.png")
    if SAVE_IMAGES:
        try:
            image.save(final_state_img_path)
            logger.info(f"Saved final state to {final_state_img_path}")
        except Exception as save_e:
            logger.warning(f"Could not save final state image: {save_e}")


if __name__ == "__main__":
    # Optional: Add check for API key, though planning might work differently
    # depending on whether core.plan_action_for_ui *requires* the LLM call
    # or could potentially use non-LLM logic someday.
    # from omnimcp.config import config
    # if not config.ANTHROPIC_API_KEY:
    #    logger.warning("ANTHROPIC_API_KEY not found. LLM planning might fail.")
    run_synthetic_planner_demo()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OpenAdaptAI/OmniMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server