mcp-autogui-multinode

tools.py•16.9 KiB

from typing import Dict, Any from fastmcp import FastMCP from mcp import types from src.computer.computer_pyautogui import PyAutoGUIComputerTool from src.common import BaseResult from core.constants import REQUEST_MODELS from core.logger import logger from src.computer.computer_pyautogui import PyAutoGUIComputerTool from src.computer.base import IComputerTool from src.common import handle_error from src.computer.schema import ( TakeScreenshotRequest, GetCursorPositionRequest, ) def new_computer_tool() -> IComputerTool: return PyAutoGUIComputerTool() async def execute_computer_action(method_name: str, request_data: Dict[str, Any]) -> dict: """ Execute computer control action (similar to action_route in HTTP API) """ # Create computer tool instance computer_tool = new_computer_tool() try: # Convert camelCase action to snake_case method name # method_name = camel_to_snake(action) request_model = REQUEST_MODELS.get(method_name) logger.info(f"Request model: {request_model}") if not request_model: return {"Result": None, "error": f"Action '{method_name}' not found"} # Create validated request object validated_request = request_model(**request_data) method = getattr(computer_tool, method_name) # Execute the method result = await method(validated_request) # Handle different return types if isinstance(result, BaseResult): return result.model_dump() elif isinstance(result, dict): return {"output": result, "error": None} elif result is None: return {"output": None, "error": None} else: return {"output": result if result else None, "error": None} except Exception as e: raise e def register_computer_tools(mcp: FastMCP): """Register all computer control tools with the MCP server""" # ============================================================================ # Mouse Control Tools # ============================================================================ @mcp.tool() async def move_mouse(x: int, y: int) -> dict: """ Move the mouse cursor to the specified coordinates. Args: x: The x-coordinate (horizontal position) on the screen y: The y-coordinate (vertical position) on the screen Returns: dict: Result with output and error fields - output: Success message or None - error: Error message if any, None otherwise Example: move_mouse(x=100, y=200) # Move mouse to position (100, 200) """ request_data = {"x": x, "y": y} try: result = await execute_computer_action("move_mouse", request_data) if not result: return handle_error("move_mouse", "Failed to move mouse") output = result.get("output", "Operation successful") return types.TextContent( type="text", text=output ) except Exception as e: return handle_error("move_mouse", e) @mcp.tool() async def click_mouse( x: int = 0, y: int = 0, button: str = "left", press: bool = False, release: bool = False ) -> dict: """ Click the mouse button at the specified coordinates. Args: x: The x-coordinate where to click (default: 0, uses current position) y: The y-coordinate where to click (default: 0, uses current position) button: Mouse button to click - "left", "right", "middle", "double_click", or "double_left" (default: "left") press: If True, only press the button without releasing (default: False) release: If True, only release the button without pressing (default: False) Returns: dict: Result with output and error fields Example: click_mouse(x=100, y=200, button="left") # Left click at (100, 200) click_mouse(x=100, y=200, button="right") # Right click at (100, 200) click_mouse(x=100, y=200, button="double_left") # Double click at (100, 200) """ request_data = {"x": x, "y": y, "button": button, "press": press, "release": release} try: result = await execute_computer_action("click_mouse", request_data) if not result: return handle_error("click_mouse", "Failed to click mouse") output = result.get("output", "") if result.get("output", "") != "" else "Operation successful" return types.TextContent( type="text", text= output ) except Exception as e: return handle_error("click_mouse", e) @mcp.tool() async def press_mouse(x: int = 0, y: int = 0, button: str = "left") -> dict: """ Press down a mouse button at the specified coordinates without releasing it. Useful for drag operations. Args: x: The x-coordinate where to press (default: 0, uses current position) y: The y-coordinate where to press (default: 0, uses current position) button: Mouse button to press - "left", "right", or "middle" (default: "left") Returns: dict: Result with output and error fields Example: press_mouse(x=100, y=200, button="left") # Press left button at (100, 200) """ request_data = {"x": x, "y": y, "button": button} try: result = await execute_computer_action("press_mouse", request_data) if not result: return handle_error("press_mouse", "Failed to press mouse") output = result.get("output", "Operation successful") if output == "": output = "Operation successful" return types.TextContent( type="text", text= output ) except Exception as e: return handle_error("press_mouse", e) @mcp.tool() async def release_mouse(x: int = 0, y: int = 0, button: str = "left") -> dict: """ Release a mouse button at the specified coordinates. Usually called after press_mouse to complete a drag operation. Args: x: The x-coordinate where to release (default: 0, uses current position) y: The y-coordinate where to release (default: 0, uses current position) button: Mouse button to release - "left", "right", or "middle" (default: "left") Returns: dict: Result with output and error fields Example: release_mouse(x=300, y=400, button="left") # Release left button at (300, 400) """ request_data = {"x": x, "y": y, "button": button} try: result = await execute_computer_action("release_mouse", request_data) if not result: return handle_error("release_mouse", "Failed to release mouse") output = result.get("output", "Operation successful") if output == "": output = "Operation successful" return types.TextContent( type="text", text=output ) except Exception as e: return handle_error("release_mouse", e) return result @mcp.tool() async def drag_mouse(source_x: int, source_y: int, target_x: int, target_y: int) -> dict: """ Drag the mouse from source coordinates to target coordinates. This performs a complete drag operation: press at source, move to target, then release. Args: source_x: The starting x-coordinate source_y: The starting y-coordinate target_x: The ending x-coordinate target_y: The ending y-coordinate Returns: dict: Result with output and error fields Example: drag_mouse(source_x=100, source_y=200, target_x=300, target_y=400) # Drags mouse from (100, 200) to (300, 400) """ request_data = { "source_x": source_x, "source_y": source_y, "target_x": target_x, "target_y": target_y } try: result = await execute_computer_action("drag_mouse", request_data) if not result: return handle_error("drag_mouse", "Failed to drag mouse") output = result.get("output", "Operation successful") if output == "": output = "Operation successful" return types.TextContent( type="text", text=output ) except Exception as e: return handle_error("drag_mouse", e) return result @mcp.tool() async def scroll( scroll_direction: str = "up", scroll_amount: int = 0, x: int = 0, y: int = 0 ) -> dict: """ Scroll the mouse wheel at the specified coordinates. Args: scroll_direction: Direction to scroll - "up", "down", "left", or "right" (default: "up") scroll_amount: Amount to scroll (positive number, will be scaled automatically) (default: 0) x: The x-coordinate where to scroll (default: 0, uses current position) y: The y-coordinate where to scroll (default: 0, uses current position) Returns: dict: Result with output and error fields Example: scroll(scroll_direction="up", scroll_amount=3, x=100, y=200) # Scroll up 3 units at (100, 200) scroll(scroll_direction="down", scroll_amount=5) # Scroll down 5 units at current position """ request_data = { "scroll_direction": scroll_direction, "scroll_amount": scroll_amount, "x": x, "y": y } try: result = await execute_computer_action("scroll", request_data) if not result: return handle_error("scroll", "Failed to scroll") output = result.get("output", "Operation successful") if output == "": output = "Operation successful" return types.TextContent( type="text", text=output ) except Exception as e: return handle_error("scroll", e) # ============================================================================ # Keyboard Control Tools # ============================================================================ @mcp.tool() async def press_key(key: str) -> dict: """ Press a keyboard key or key combination. Args: key: Single key name (e.g., "enter", "space", "tab") or key combination separated by spaces (e.g., "ctrl c", "alt tab", "shift f10") Returns: dict: Result with output and error fields Example: press_key("enter") # Press Enter key press_key("ctrl c") # Press Ctrl+C (copy) press_key("alt tab") # Press Alt+Tab (switch windows) """ request_data = {"key": key} try: result = await execute_computer_action("press_key", request_data) if not result: return handle_error("press_key", "Failed to press key") output = result.get("output", "Operation successful") if output == "": output = "Operation successful" return types.TextContent( type="text", text=output ) except Exception as e: return handle_error("press_key", e) @mcp.tool() async def type_text(text: str) -> dict: """ Type the specified text. The text will be pasted using clipboard (Ctrl+V). This is more reliable than typing character by character for special characters. Args: text: The text string to type Returns: dict: Result with output and error fields Example: type_text("Hello, World!") # Types "Hello, World!" type_text("username@example.com") # Types email address """ request_data = {"text": text} try: result = await execute_computer_action("type_text", request_data) if not result: return handle_error("type_text", "Failed to type text") output = result.get("output", "Operation successful") if output == "": output = "Operation successful" return types.TextContent( type="text", text=output ) except Exception as e: return handle_error("type_text", e) return result # ============================================================================ # Utility Tools # ============================================================================ @mcp.tool() async def wait(duration: int) -> dict: """ Wait for a specified duration in milliseconds. Args: duration: Duration to wait in milliseconds (e.g., 1000 = 1 second) Returns: dict: Result with output and error fields Example: wait(1000) # Wait for 1 second wait(500) # Wait for 0.5 seconds """ request_data = {"duration": duration} try: result = await execute_computer_action("wait", request_data) if not result: return handle_error("wait", "Failed to wait") return types.TextContent( type="text", text="Operation successful" ) except Exception as e: return handle_error("wait", e) @mcp.tool() async def take_screenshot() -> list[types.Content]: """ Take a screenshot of the entire screen and return it as a base64-encoded PNG image. Returns: dict: Result with output and error fields - output: Base64-encoded PNG image string if successful, None otherwise - error: Error message if any, None otherwise Example: result = take_screenshot() # result["output"] contains the base64-encoded PNG image # You can decode it to display or save the screenshot """ # get screen size screen_size = await execute_computer_action("get_screen_size", {}) if not screen_size: return handle_error("take_screenshot", "Failed to get screen size") output = screen_size.get("output", {}) width = output.get("Width", 0) height = output.get("Height", 0) if width <= 0 or height <= 0: return handle_error("take_screenshot", "Invalid screen size") # take screenshot request = TakeScreenshotRequest() screenshot = await execute_computer_action("take_screenshot", request.model_dump()) if not screenshot: return handle_error("take_screenshot", "Failed to take screenshot") screenshot_output = screenshot.get("output", None) if not screenshot_output: return handle_error("take_screenshot", "Failed to take screenshot") return [ types.TextContent(type="text", text=str({"width": width, "height": height})), types.ImageContent(type="image", data=screenshot_output.screenshot, mimeType="image/png") ] @mcp.tool() async def get_cursor_position() -> dict: """ Get the current mouse cursor position on the screen. Returns: dict: Result with output and error fields - output: Dictionary with "PositionX" and "PositionY" keys containing coordinates - error: Error message if any, None otherwise Example: result = get_cursor_position() # result["output"] = {"PositionX": 100, "PositionY": 200} """ request = GetCursorPositionRequest() try: result = await execute_computer_action("get_cursor_position", request.model_dump()) if not result: return handle_error("get_cursor_position", "Failed to get cursor position") output = result.get("output", {}) return types.TextContent( type="text", text=str({ "x": output.get("PositionX", output.get("x", 0)), "y": output.get("PositionY", output.get("y", 0)) }) ) except Exception as e: logger.error("Error in get_cursor_position: {}", e) return handle_error("get_cursor_position", e)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/stonehill-2345/mcp-autogui-multinode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

tools.py•16.9 KiB