mcp-autogui-multinode

"""Register computer control tools with FastMCP server"""
from pydantic import Field
from typing import Any
from src.computer.client import get_computer_use_mcp_client
from src.computer.base import handle_error
from mcp import types
from fastmcp import FastMCP
from loguru import logger
from middleware.auth import get_mcp_api_key
from src.computer.client import ComputerUseMCPClient

def get_computer_use_mcp_client_with_api_key(endpoint: str) -> ComputerUseMCPClient:
    api_key = get_mcp_api_key()
    print(f"API key: {api_key}")
    return get_computer_use_mcp_client(endpoint, api_key=get_mcp_api_key())

def register_computer_tools_with_client(mcp: FastMCP):
    """Register all computer control tools with the MCP server.
    For remote usage with client
    """
    # ============================================================================
    # Mouse Control Tools
    # ============================================================================
    
    @mcp.tool(
        name="move_mouse",
        description="Move the mouse cursor to the specified coordinates"
    )
    async def move_mouse(
        x: int = Field(description="X coordinate (horizontal position)"),
        y: int = Field(description="Y coordinate (vertical position)"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.move_mouse(x, y)
            if not response:
                return handle_error("move_mouse", "Failed to move mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in move_mouse: {}", e)
            return handle_error("move_mouse", e)
    
    @mcp.tool(
        name="click_mouse",
        description="Click the mouse button at the specified coordinates"
    )
    async def click_mouse(
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        button: str = Field(default="left", description="Mouse button: left, right, middle, double_click, double_left"),
        press: bool = Field(default=False, description="Only press without releasing"),
        release: bool = Field(default=False, description="Only release without pressing"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.click_mouse(x, y, button, press, release)
            if not response:
                return handle_error("click_mouse", "Failed to click mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in click_mouse: {}", e)
            return handle_error("click_mouse", e)
    
    @mcp.tool(
        name="press_mouse",
        description="Press down a mouse button at the specified coordinates"
    )
    async def press_mouse(
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        button: str = Field(default="left", description="Mouse button: left, right, middle"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.press_mouse(x, y, button)
            if not response:
                return handle_error("press_mouse", "Failed to press mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in press_mouse: {}", e)
            return handle_error("press_mouse", e)
    
    @mcp.tool(
        name="release_mouse",
        description="Release a mouse button at the specified coordinates"
    )
    async def release_mouse(
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        button: str = Field(default="left", description="Mouse button: left, right, middle")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.release_mouse(x, y, button)
            if not response:
                return handle_error("release_mouse", "Failed to release mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in release_mouse: {}", e)
            return handle_error("release_mouse", e)
    
    @mcp.tool(
        name="drag_mouse",
        description="Drag the mouse from source coordinates to target coordinates"
    )
    async def drag_mouse(
        source_x: int = Field(description="Source X coordinate"),
        source_y: int = Field(description="Source Y coordinate"),
        target_x: int = Field(description="Target X coordinate"),
        target_y: int = Field(description="Target Y coordinate"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.drag_mouse(source_x, source_y, target_x, target_y)
            if not response:
                return handle_error("drag_mouse", "Failed to drag mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in drag_mouse: {}", e)
            return handle_error("drag_mouse", e)
    
    @mcp.tool(
        name="scroll",
        description="Scroll the mouse wheel at the specified coordinates"
    )
    async def scroll(
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        scroll_direction: str = Field(default="up", description="Scroll direction: up, down, left, right"),
        scroll_amount: int = Field(default=1, description="Amount to scroll"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.scroll(x, y, scroll_direction, scroll_amount)
            if not response:
                return handle_error("scroll", "Failed to scroll")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in scroll: {}", e)
            return handle_error("scroll", e)
    
    # ============================================================================
    # Keyboard Control Tools
    # ============================================================================
    
    @mcp.tool(
        name="press_key",
        description="Press a keyboard key or key combination"
    )
    async def press_key(
        key: str = Field(description="Key name or key combination (e.g., 'enter', 'ctrl c', 'alt tab')"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.press_key(key)
            if not response:
                return handle_error("press_key", "Failed to press key")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in press_key: {}", e)
            return handle_error("press_key", e)
    
    @mcp.tool(
        name="type_text",
        description="Type the specified text using clipboard paste"
    )
    async def type_text(
        text: str = Field(description="Text to type"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.type_text(text)
            if not response:
                return handle_error("type_text", "Failed to type text")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in type_text: {}", e)
            return handle_error("type_text", e)
    
    # ============================================================================
    # Utility Tools
    # ============================================================================
    
    @mcp.tool(
        name="wait",
        description="Wait for a specified duration in milliseconds"
    )
    async def wait(
        duration: int = Field(description="Duration to wait in milliseconds"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.wait(duration)
            if not response:
                return handle_error("wait", "Failed to wait")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in wait: {}", e)
            return handle_error("wait", e)
    
    @mcp.tool(
        name="take_screenshot",
        description="Take a screenshot of the current screen"
    )
    async def take_screenshot(
       endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> list[dict[str, Any]]:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            # Call synchronous method
            screenshot_response = client.take_screenshot()
            if not screenshot_response or not hasattr(screenshot_response, 'Result'):
                return handle_error("take_screenshot", "Failed to take screenshot")
            
            # Extract data
            result = screenshot_response.model_dump().get('Result')
            if not result or 'screenshot' not in result:
                return handle_error("take_screenshot", "Invalid screenshot response")
            
            image = result['screenshot']
            # get screen size
            screen_size_response = client.get_screen_size()
            if not screen_size_response or not hasattr(screen_size_response, 'Result'):
                return handle_error("get_screen_size", "Failed to get screen size")

            screen_result = screen_size_response.model_dump().get('Result')
            if not screen_result:
                return handle_error("get_screen_size", "Invalid screen size response")

            width = screen_result.get('width') or screen_result.get('Width')
            height = screen_result.get('height') or screen_result.get('Height')
            logger.info(f"Get screen size, width: {width}, height: {height}")
            return [
                types.TextContent(
                    type="text",
                    text=str(
                        {
                            "width": width,
                            "height": height,
                        }
                    )
                ),
                types.ImageContent(
                    type="image",
                    data=image,
                    mimeType="image/png",
                )
            ]
        except Exception as e:
            logger.error("Error in take_screenshot: {}", e)
            return handle_error("take_screenshot", e)
    
    @mcp.tool(
        name="get_cursor_position",
        description="Get the current mouse cursor position"
    )
    async def get_cursor_position(
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.get_cursor_position()
            if not response:
                return handle_error("get_cursor_position", "Failed to get cursor position")
            return types.TextContent(
                type="text",
                text=str(
                    {
                        "x": response.Result.x,
                        "y": response.Result.y
                    }
                )
            )
        except Exception as e:
            logger.error("Error in get_cursor_position: {}", e)
            return handle_error("get_cursor_position", e)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/stonehill-2345/mcp-autogui-multinode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

"""Register computer control tools with FastMCP server"""
from pydantic import Field
from typing import Any
from src.computer.client import get_computer_use_mcp_client
from src.computer.base import handle_error
from mcp import types
from fastmcp import FastMCP
from loguru import logger
from middleware.auth import get_mcp_api_key
from src.computer.client import ComputerUseMCPClient

def get_computer_use_mcp_client_with_api_key(endpoint: str) -> ComputerUseMCPClient:
    api_key = get_mcp_api_key()
    print(f"API key: {api_key}")
    return get_computer_use_mcp_client(endpoint, api_key=get_mcp_api_key())

def register_computer_tools_with_client(mcp: FastMCP):
    """Register all computer control tools with the MCP server.
    For remote usage with client
    """
    # ============================================================================
    # Mouse Control Tools
    # ============================================================================
    
    @mcp.tool(
        name="move_mouse",
        description="Move the mouse cursor to the specified coordinates"
    )
    async def move_mouse(
        x: int = Field(description="X coordinate (horizontal position)"),
        y: int = Field(description="Y coordinate (vertical position)"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.move_mouse(x, y)
            if not response:
                return handle_error("move_mouse", "Failed to move mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in move_mouse: {}", e)
            return handle_error("move_mouse", e)
    
    @mcp.tool(
        name="click_mouse",
        description="Click the mouse button at the specified coordinates"
    )
    async def click_mouse(
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        button: str = Field(default="left", description="Mouse button: left, right, middle, double_click, double_left"),
        press: bool = Field(default=False, description="Only press without releasing"),
        release: bool = Field(default=False, description="Only release without pressing"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.click_mouse(x, y, button, press, release)
            if not response:
                return handle_error("click_mouse", "Failed to click mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in click_mouse: {}", e)
            return handle_error("click_mouse", e)
    
    @mcp.tool(
        name="press_mouse",
        description="Press down a mouse button at the specified coordinates"
    )
    async def press_mouse(
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        button: str = Field(default="left", description="Mouse button: left, right, middle"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.press_mouse(x, y, button)
            if not response:
                return handle_error("press_mouse", "Failed to press mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in press_mouse: {}", e)
            return handle_error("press_mouse", e)
    
    @mcp.tool(
        name="release_mouse",
        description="Release a mouse button at the specified coordinates"
    )
    async def release_mouse(
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        button: str = Field(default="left", description="Mouse button: left, right, middle")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.release_mouse(x, y, button)
            if not response:
                return handle_error("release_mouse", "Failed to release mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in release_mouse: {}", e)
            return handle_error("release_mouse", e)
    
    @mcp.tool(
        name="drag_mouse",
        description="Drag the mouse from source coordinates to target coordinates"
    )
    async def drag_mouse(
        source_x: int = Field(description="Source X coordinate"),
        source_y: int = Field(description="Source Y coordinate"),
        target_x: int = Field(description="Target X coordinate"),
        target_y: int = Field(description="Target Y coordinate"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.drag_mouse(source_x, source_y, target_x, target_y)
            if not response:
                return handle_error("drag_mouse", "Failed to drag mouse")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in drag_mouse: {}", e)
            return handle_error("drag_mouse", e)
    
    @mcp.tool(
        name="scroll",
        description="Scroll the mouse wheel at the specified coordinates"
    )
    async def scroll(
        x: int = Field(default=0, description="X coordinate"),
        y: int = Field(default=0, description="Y coordinate"),
        scroll_direction: str = Field(default="up", description="Scroll direction: up, down, left, right"),
        scroll_amount: int = Field(default=1, description="Amount to scroll"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.scroll(x, y, scroll_direction, scroll_amount)
            if not response:
                return handle_error("scroll", "Failed to scroll")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in scroll: {}", e)
            return handle_error("scroll", e)
    
    # ============================================================================
    # Keyboard Control Tools
    # ============================================================================
    
    @mcp.tool(
        name="press_key",
        description="Press a keyboard key or key combination"
    )
    async def press_key(
        key: str = Field(description="Key name or key combination (e.g., 'enter', 'ctrl c', 'alt tab')"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.press_key(key)
            if not response:
                return handle_error("press_key", "Failed to press key")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in press_key: {}", e)
            return handle_error("press_key", e)
    
    @mcp.tool(
        name="type_text",
        description="Type the specified text using clipboard paste"
    )
    async def type_text(
        text: str = Field(description="Text to type"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server"),
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.type_text(text)
            if not response:
                return handle_error("type_text", "Failed to type text")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in type_text: {}", e)
            return handle_error("type_text", e)
    
    # ============================================================================
    # Utility Tools
    # ============================================================================
    
    @mcp.tool(
        name="wait",
        description="Wait for a specified duration in milliseconds"
    )
    async def wait(
        duration: int = Field(description="Duration to wait in milliseconds"),
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.wait(duration)
            if not response:
                return handle_error("wait", "Failed to wait")
            return types.TextContent(
                type="text",
                text="Operation successful"
            )
        except Exception as e:
            logger.error("Error in wait: {}", e)
            return handle_error("wait", e)
    
    @mcp.tool(
        name="take_screenshot",
        description="Take a screenshot of the current screen"
    )
    async def take_screenshot(
       endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> list[dict[str, Any]]:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            # Call synchronous method
            screenshot_response = client.take_screenshot()
            if not screenshot_response or not hasattr(screenshot_response, 'Result'):
                return handle_error("take_screenshot", "Failed to take screenshot")
            
            # Extract data
            result = screenshot_response.model_dump().get('Result')
            if not result or 'screenshot' not in result:
                return handle_error("take_screenshot", "Invalid screenshot response")
            
            image = result['screenshot']
            # get screen size
            screen_size_response = client.get_screen_size()
            if not screen_size_response or not hasattr(screen_size_response, 'Result'):
                return handle_error("get_screen_size", "Failed to get screen size")

            screen_result = screen_size_response.model_dump().get('Result')
            if not screen_result:
                return handle_error("get_screen_size", "Invalid screen size response")

            width = screen_result.get('width') or screen_result.get('Width')
            height = screen_result.get('height') or screen_result.get('Height')
            logger.info(f"Get screen size, width: {width}, height: {height}")
            return [
                types.TextContent(
                    type="text",
                    text=str(
                        {
                            "width": width,
                            "height": height,
                        }
                    )
                ),
                types.ImageContent(
                    type="image",
                    data=image,
                    mimeType="image/png",
                )
            ]
        except Exception as e:
            logger.error("Error in take_screenshot: {}", e)
            return handle_error("take_screenshot", e)
    
    @mcp.tool(
        name="get_cursor_position",
        description="Get the current mouse cursor position"
    )
    async def get_cursor_position(
        endpoint: str = Field(default=None, description="Endpoint of the Computer Use Tool Server")
    ) -> dict:
        try:
            client = get_computer_use_mcp_client_with_api_key(endpoint)
            response = client.get_cursor_position()
            if not response:
                return handle_error("get_cursor_position", "Failed to get cursor position")
            return types.TextContent(
                type="text",
                text=str(
                    {
                        "x": response.Result.x,
                        "y": response.Result.y
                    }
                )
            )
        except Exception as e:
            logger.error("Error in get_cursor_position: {}", e)
            return handle_error("get_cursor_position", e)