en es ja ko zh

OmniMCP

#!/usr/bin/env python
# cli.py

"""
Command-line interface for running OmniMCP agent tasks using AgentExecutor.
"""

import platform
import sys
import time

import fire

from omnimcp.utils import logger

# Default configuration
DEFAULT_OUTPUT_DIR = "runs"
DEFAULT_MAX_STEPS = 10
DEFAULT_GOAL = "Open calculator and compute 5 * 9"


def run(
    goal: str = DEFAULT_GOAL,
    max_steps: int = DEFAULT_MAX_STEPS,
    output_dir: str = DEFAULT_OUTPUT_DIR,
    ci_mode: bool = False,
):
    """
    Runs the OmniMCP agent to achieve a specified goal.

    Args:
        goal: The natural language goal for the agent.
        max_steps: Maximum number of steps to attempt.
        output_dir: Base directory to save run artifacts (timestamped subdirs).
        ci_mode: Run in CI mode (skips API validation and actual execution).
    """
    # --- Initial Checks ---
    logger.info("--- OmniMCP CLI ---")

    # Skip import-time checks if we're in CI mode
    if ci_mode:
        logger.info("Running in CI mode - skipping credential checks and execution")
        return 0

    # Delay imports to avoid credential checks at import time
    try:
        # Import necessary components from the project
        from omnimcp.config import config
        from omnimcp.input import InputController, _pynput_error
        from omnimcp.agent_executor import AgentExecutor
        from omnimcp.core import plan_action_for_ui
        from omnimcp.omniparser.client import OmniParserClient
        from omnimcp.visual_state import VisualState
        from omnimcp.utils import (
            draw_bounding_boxes,
            draw_action_highlight,
            NSScreen,  # Check for AppKit on macOS
        )
    except ImportError as e:
        logger.critical(f"Required dependency not found: {e}")
        return 1

    logger.info("Performing initial checks...")
    success = True

    # 1. API Key Check
    if not config.ANTHROPIC_API_KEY:
        logger.critical(
            "❌ ANTHROPIC_API_KEY not found in config or .env file. LLM planning requires this."
        )
        success = False
    else:
        logger.info("✅ ANTHROPIC_API_KEY found.")

    # 2. pynput Check
    if _pynput_error:
        logger.critical(
            f"❌ Input control library (pynput) failed to load: {_pynput_error}"
        )
        logger.critical(
            "   Real action execution will not work. Is it installed and prerequisites met (e.g., display server)?"
        )
        success = False
    else:
        logger.info("✅ Input control library (pynput) loaded.")

    # 3. macOS Scaling Check
    if platform.system() == "darwin":
        if not NSScreen:
            logger.warning(
                "⚠️ AppKit (pyobjc-framework-Cocoa) not found or failed to import."
            )
            logger.warning(
                "   Coordinate scaling for Retina displays may be incorrect. Install with 'uv pip install pyobjc-framework-Cocoa'."
            )
        else:
            logger.info("✅ AppKit found for macOS scaling.")

    if not success:
        logger.error("Prerequisite checks failed. Exiting.")
        return 1

    # --- Component Initialization ---
    logger.info("\nInitializing components...")
    try:
        # OmniParser Client (handles deployment if URL not set)
        parser_client = OmniParserClient(
            server_url=config.OMNIPARSER_URL, auto_deploy=(not config.OMNIPARSER_URL)
        )
        logger.info(f"   - OmniParserClient ready (URL: {parser_client.server_url})")

        # Perception Component
        visual_state = VisualState(parser_client=parser_client)
        logger.info("   - VisualState (Perception) ready.")

        # Execution Component
        controller = InputController()
        logger.info("   - InputController (Execution) ready.")

        # Planner Function (already imported)
        logger.info("   - LLM Planner function ready.")

        # Visualization Functions (already imported)
        logger.info("   - Visualization functions ready.")

    except ImportError as e:
        logger.critical(
            f"❌ Component initialization failed due to missing dependency: {e}"
        )
        logger.critical(
            "   Ensure all requirements are installed (`uv pip install -e .`)"
        )
        return 1
    except Exception as e:
        logger.critical(f"❌ Component initialization failed: {e}", exc_info=True)
        return 1

    # --- Agent Executor Initialization ---
    logger.info("\nInitializing Agent Executor...")
    try:
        agent_executor = AgentExecutor(
            perception=visual_state,
            planner=plan_action_for_ui,
            execution=controller,
            box_drawer=draw_bounding_boxes,
            highlighter=draw_action_highlight,
        )
        logger.success("✅ Agent Executor initialized successfully.")
    except Exception as e:
        logger.critical(f"❌ Agent Executor initialization failed: {e}", exc_info=True)
        return 1

    # --- User Confirmation & Start ---
    print("\n" + "=" * 60)
    print(" WARNING: This script WILL take control of your mouse and keyboard!")
    print(f"          TARGET OS: {platform.system()}")
    print(" Please ensure no sensitive information is visible on screen.")
    print(" To stop execution manually: Move mouse RAPIDLY to a screen corner")
    print("                           OR press Ctrl+C in the terminal.")
    print("=" * 60 + "\n")
    for i in range(5, 0, -1):
        print(f"Starting in {i}...", end="\r")
        time.sleep(1)
    print("Starting agent run now!             ")

    # --- Run the Agent ---
    overall_success = False
    try:
        overall_success = agent_executor.run(
            goal=goal,
            max_steps=max_steps,
            output_base_dir=output_dir,
        )
    except KeyboardInterrupt:
        logger.warning("\nExecution interrupted by user (Ctrl+C).")
        return 1
    except Exception as run_e:
        logger.critical(
            f"\nAn unexpected error occurred during the agent run: {run_e}",
            exc_info=True,
        )
        return 1
    finally:
        # Optional: Add cleanup here if needed (e.g., stopping parser server)
        logger.info(
            "Reminder: If using auto-deploy, stop the parser server with "
            "'python -m omnimcp.omniparser.server stop' when finished."
        )

    # --- Exit ---
    if overall_success:
        logger.success("\nAgent run finished successfully (goal achieved).")
        return 0
    else:
        logger.error(
            "\nAgent run finished unsuccessfully (goal not achieved or error occurred)."
        )
        return 1


def main():
    """Main entry point that handles Fire's return code conversion."""
    result = fire.Fire(run)
    if isinstance(result, int):
        sys.exit(result)


if __name__ == "__main__":
    main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/OpenAdaptAI/OmniMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server