Skip to main content
Glama
action_handlers.py24.5 kB
import logging from typing import Any, Dict, List, Optional, Tuple import base64 import os import sys import subprocess import time import mcp.types as types # Import vnc_client from the current directory from vnc_client import VNCClient, capture_vnc_screen # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger('action_handlers') logger.setLevel(logging.DEBUG) # Load environment variables for VNC connection MACOS_HOST = os.environ.get('MACOS_HOST', '') MACOS_PORT = int(os.environ.get('MACOS_PORT', '5900')) MACOS_USERNAME = os.environ.get('MACOS_USERNAME', '') MACOS_PASSWORD = os.environ.get('MACOS_PASSWORD', '') VNC_ENCRYPTION = os.environ.get('VNC_ENCRYPTION', 'prefer_on') # Log environment variable status (without exposing actual values) logger.info(f"MACOS_HOST from environment: {'Set' if MACOS_HOST else 'Not set'}") logger.info(f"MACOS_PORT from environment: {MACOS_PORT}") logger.info(f"MACOS_USERNAME from environment: {'Set' if MACOS_USERNAME else 'Not set'}") logger.info(f"MACOS_PASSWORD from environment: {'Set' if MACOS_PASSWORD else 'Not set (Required)'}") logger.info(f"VNC_ENCRYPTION from environment: {VNC_ENCRYPTION}") # Check for required environment variables - use strict checking only in server.py, not when importing if not MACOS_HOST: logger.warning("MACOS_HOST environment variable is not set") if not MACOS_PASSWORD: logger.warning("MACOS_PASSWORD environment variable is not set") async def handle_remote_macos_get_screen(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Connect to a remote MacOs machine and get a screenshot of the remote desktop.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Capture screen using helper method success, screen_data, error_message, dimensions = await capture_vnc_screen( host=host, port=port, password=password, username=username, encryption=encryption ) if not success: return [types.TextContent(type="text", text=error_message)] # Encode image in base64 base64_data = base64.b64encode(screen_data).decode('utf-8') # Return image content with dimensions width, height = dimensions return [ types.ImageContent( type="image", data=base64_data, mimeType="image/png", alt_text=f"Screenshot from remote MacOs machine at {host}:{port}" ), types.TextContent( type="text", text=f"Image dimensions: {width}x{height}" ) ] def handle_remote_macos_mouse_scroll(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Perform a mouse scroll action on a remote MacOs machine.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Get required parameters from arguments x = arguments.get("x") y = arguments.get("y") source_width = int(arguments.get("source_width", 1366)) source_height = int(arguments.get("source_height", 768)) direction = arguments.get("direction", "down") if x is None or y is None: raise ValueError("x and y coordinates are required") # Ensure source dimensions are positive if source_width <= 0 or source_height <= 0: raise ValueError("Source dimensions must be positive values") # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: # Get target screen dimensions target_width = vnc.width target_height = vnc.height # Scale coordinates scaled_x = int((x / source_width) * target_width) scaled_y = int((y / source_height) * target_height) # Ensure coordinates are within the screen bounds scaled_x = max(0, min(scaled_x, target_width - 1)) scaled_y = max(0, min(scaled_y, target_height - 1)) # First move the mouse to the target location without clicking move_result = vnc.send_pointer_event(scaled_x, scaled_y, 0) # Map of special keys for page up/down special_keys = { "up": 0xff55, # Page Up key "down": 0xff56, # Page Down key } # Send the appropriate page key based on direction key = special_keys["up" if direction.lower() == "up" else "down"] key_result = vnc.send_key_event(key, True) and vnc.send_key_event(key, False) # Prepare the response with useful details scale_factors = { "x": target_width / source_width, "y": target_height / source_height } return [types.TextContent( type="text", text=f"""Mouse move to ({scaled_x}, {scaled_y}) {'succeeded' if move_result else 'failed'} Page {direction} key press {'succeeded' if key_result else 'failed'} Source dimensions: {source_width}x{source_height} Target dimensions: {target_width}x{target_height} Scale factors: {scale_factors['x']:.4f}x, {scale_factors['y']:.4f}y""" )] finally: # Close VNC connection vnc.close() def handle_remote_macos_mouse_click(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Perform a mouse click action on a remote MacOs machine.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Get required parameters from arguments x = arguments.get("x") y = arguments.get("y") source_width = int(arguments.get("source_width", 1366)) source_height = int(arguments.get("source_height", 768)) button = int(arguments.get("button", 1)) if x is None or y is None: raise ValueError("x and y coordinates are required") # Ensure source dimensions are positive if source_width <= 0 or source_height <= 0: raise ValueError("Source dimensions must be positive values") # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: # Get target screen dimensions target_width = vnc.width target_height = vnc.height # Scale coordinates scaled_x = int((x / source_width) * target_width) scaled_y = int((y / source_height) * target_height) # Ensure coordinates are within the screen bounds scaled_x = max(0, min(scaled_x, target_width - 1)) scaled_y = max(0, min(scaled_y, target_height - 1)) # Single click result = vnc.send_mouse_click(scaled_x, scaled_y, button, False) # Prepare the response with useful details scale_factors = { "x": target_width / source_width, "y": target_height / source_height } return [types.TextContent( type="text", text=f"""Mouse click (button {button}) from source ({x}, {y}) to target ({scaled_x}, {scaled_y}) {'succeeded' if result else 'failed'} Source dimensions: {source_width}x{source_height} Target dimensions: {target_width}x{target_height} Scale factors: {scale_factors['x']:.4f}x, {scale_factors['y']:.4f}y""" )] finally: # Close VNC connection vnc.close() def handle_remote_macos_send_keys(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Send keyboard input to a remote MacOs machine.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Get required parameters from arguments text = arguments.get("text") special_key = arguments.get("special_key") key_combination = arguments.get("key_combination") if not text and not special_key and not key_combination: raise ValueError("Either text, special_key, or key_combination must be provided") # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: result_message = [] # Map of special key names to X11 keysyms special_keys = { "enter": 0xff0d, "return": 0xff0d, "backspace": 0xff08, "tab": 0xff09, "escape": 0xff1b, "esc": 0xff1b, "delete": 0xffff, "del": 0xffff, "home": 0xff50, "end": 0xff57, "page_up": 0xff55, "page_down": 0xff56, "left": 0xff51, "up": 0xff52, "right": 0xff53, "down": 0xff54, "f1": 0xffbe, "f2": 0xffbf, "f3": 0xffc0, "f4": 0xffc1, "f5": 0xffc2, "f6": 0xffc3, "f7": 0xffc4, "f8": 0xffc5, "f9": 0xffc6, "f10": 0xffc7, "f11": 0xffc8, "f12": 0xffc9, "space": 0x20, } # Map of modifier key names to X11 keysyms modifier_keys = { "ctrl": 0xffe3, # Control_L "control": 0xffe3, # Control_L "shift": 0xffe1, # Shift_L "alt": 0xffe9, # Alt_L "option": 0xffe9, # Alt_L (Mac convention) "cmd": 0xffeb, # Command_L (Mac convention) "command": 0xffeb, # Command_L (Mac convention) "win": 0xffeb, # Command_L "super": 0xffeb, # Command_L "fn": 0xffed, # Function key "meta": 0xffeb, # Command_L (Mac convention) } # Map for letter keys (a-z) letter_keys = {chr(i): i for i in range(ord('a'), ord('z') + 1)} # Map for number keys (0-9) number_keys = {str(i): ord(str(i)) for i in range(10)} # Process special key if special_key: if special_key.lower() in special_keys: key = special_keys[special_key.lower()] if vnc.send_key_event(key, True) and vnc.send_key_event(key, False): result_message.append(f"Sent special key: {special_key}") else: result_message.append(f"Failed to send special key: {special_key}") else: result_message.append(f"Unknown special key: {special_key}") result_message.append(f"Supported special keys: {', '.join(special_keys.keys())}") # Process text if text: if vnc.send_text(text): result_message.append(f"Sent text: '{text}'") else: result_message.append(f"Failed to send text: '{text}'") # Process key combination if key_combination: keys = [] for part in key_combination.lower().split('+'): part = part.strip() if part in modifier_keys: keys.append(modifier_keys[part]) elif part in special_keys: keys.append(special_keys[part]) elif part in letter_keys: keys.append(letter_keys[part]) elif part in number_keys: keys.append(number_keys[part]) elif len(part) == 1: # For any other single character keys keys.append(ord(part)) else: result_message.append(f"Unknown key in combination: {part}") break if len(keys) == len(key_combination.split('+')): if vnc.send_key_combination(keys): result_message.append(f"Sent key combination: {key_combination}") else: result_message.append(f"Failed to send key combination: {key_combination}") return [types.TextContent(type="text", text="\n".join(result_message))] finally: vnc.close() def handle_remote_macos_mouse_double_click(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Perform a mouse double-click action on a remote MacOs machine.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Get required parameters from arguments x = arguments.get("x") y = arguments.get("y") source_width = int(arguments.get("source_width", 1366)) source_height = int(arguments.get("source_height", 768)) button = int(arguments.get("button", 1)) if x is None or y is None: raise ValueError("x and y coordinates are required") # Ensure source dimensions are positive if source_width <= 0 or source_height <= 0: raise ValueError("Source dimensions must be positive values") # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: # Get target screen dimensions target_width = vnc.width target_height = vnc.height # Scale coordinates scaled_x = int((x / source_width) * target_width) scaled_y = int((y / source_height) * target_height) # Ensure coordinates are within the screen bounds scaled_x = max(0, min(scaled_x, target_width - 1)) scaled_y = max(0, min(scaled_y, target_height - 1)) # Double click result = vnc.send_mouse_click(scaled_x, scaled_y, button, True) # Prepare the response with useful details scale_factors = { "x": target_width / source_width, "y": target_height / source_height } return [types.TextContent( type="text", text=f"""Mouse double-click (button {button}) from source ({x}, {y}) to target ({scaled_x}, {scaled_y}) {'succeeded' if result else 'failed'} Source dimensions: {source_width}x{source_height} Target dimensions: {target_width}x{target_height} Scale factors: {scale_factors['x']:.4f}x, {scale_factors['y']:.4f}y""" )] finally: # Close VNC connection vnc.close() def handle_remote_macos_mouse_move(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Move the mouse cursor on a remote MacOs machine.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Get required parameters from arguments x = arguments.get("x") y = arguments.get("y") source_width = int(arguments.get("source_width", 1366)) source_height = int(arguments.get("source_height", 768)) if x is None or y is None: raise ValueError("x and y coordinates are required") # Ensure source dimensions are positive if source_width <= 0 or source_height <= 0: raise ValueError("Source dimensions must be positive values") # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: # Get target screen dimensions target_width = vnc.width target_height = vnc.height # Scale coordinates scaled_x = int((x / source_width) * target_width) scaled_y = int((y / source_height) * target_height) # Ensure coordinates are within the screen bounds scaled_x = max(0, min(scaled_x, target_width - 1)) scaled_y = max(0, min(scaled_y, target_height - 1)) # Move mouse pointer (button_mask=0 means no buttons are pressed) result = vnc.send_pointer_event(scaled_x, scaled_y, 0) # Prepare the response with useful details scale_factors = { "x": target_width / source_width, "y": target_height / source_height } return [types.TextContent( type="text", text=f"""Mouse move from source ({x}, {y}) to target ({scaled_x}, {scaled_y}) {'succeeded' if result else 'failed'} Source dimensions: {source_width}x{source_height} Target dimensions: {target_width}x{target_height} Scale factors: {scale_factors['x']:.4f}x, {scale_factors['y']:.4f}y""" )] finally: # Close VNC connection vnc.close() def handle_remote_macos_open_application(arguments: dict[str, Any]) -> List[types.TextContent]: """ Opens or activates an application on the remote MacOS machine using VNC. Args: arguments: Dictionary containing: - identifier: App name, path, or bundle ID Returns: List containing a TextContent with the result """ # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION identifier = arguments.get("identifier") if not identifier: raise ValueError("identifier is required") start_time = time.time() # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: # Send Command+Space to open Spotlight cmd_key = 0xffeb # Command key space_key = 0x20 # Space key # Press Command+Space vnc.send_key_event(cmd_key, True) vnc.send_key_event(space_key, True) # Release Command+Space vnc.send_key_event(space_key, False) vnc.send_key_event(cmd_key, False) # Small delay to let Spotlight open time.sleep(0.5) # Type the application name vnc.send_text(identifier) # Small delay to let Spotlight find the app time.sleep(0.5) # Press Enter to launch enter_key = 0xff0d vnc.send_key_event(enter_key, True) vnc.send_key_event(enter_key, False) end_time = time.time() processing_time = round(end_time - start_time, 3) return [types.TextContent( type="text", text=f"Launched application: {identifier}\nProcessing time: {processing_time}s" )] finally: # Close VNC connection vnc.close() def handle_remote_macos_mouse_drag_n_drop(arguments: dict[str, Any]) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: """Perform a mouse drag operation on a remote MacOs machine.""" # Use environment variables host = MACOS_HOST port = MACOS_PORT password = MACOS_PASSWORD username = MACOS_USERNAME encryption = VNC_ENCRYPTION # Get required parameters from arguments start_x = arguments.get("start_x") start_y = arguments.get("start_y") end_x = arguments.get("end_x") end_y = arguments.get("end_y") source_width = int(arguments.get("source_width", 1366)) source_height = int(arguments.get("source_height", 768)) button = int(arguments.get("button", 1)) steps = int(arguments.get("steps", 10)) delay_ms = int(arguments.get("delay_ms", 10)) # Validate required parameters if any(x is None for x in [start_x, start_y, end_x, end_y]): raise ValueError("start_x, start_y, end_x, and end_y coordinates are required") # Ensure source dimensions are positive if source_width <= 0 or source_height <= 0: raise ValueError("Source dimensions must be positive values") # Initialize VNC client vnc = VNCClient(host=host, port=port, password=password, username=username, encryption=encryption) # Connect to remote MacOs machine success, error_message = vnc.connect() if not success: error_msg = f"Failed to connect to remote MacOs machine at {host}:{port}. {error_message}" return [types.TextContent(type="text", text=error_msg)] try: # Get target screen dimensions target_width = vnc.width target_height = vnc.height # Scale coordinates scaled_start_x = int((start_x / source_width) * target_width) scaled_start_y = int((start_y / source_height) * target_height) scaled_end_x = int((end_x / source_width) * target_width) scaled_end_y = int((end_y / source_height) * target_height) # Ensure coordinates are within the screen bounds scaled_start_x = max(0, min(scaled_start_x, target_width - 1)) scaled_start_y = max(0, min(scaled_start_y, target_height - 1)) scaled_end_x = max(0, min(scaled_end_x, target_width - 1)) scaled_end_y = max(0, min(scaled_end_y, target_height - 1)) # Calculate step sizes dx = (scaled_end_x - scaled_start_x) / steps dy = (scaled_end_y - scaled_start_y) / steps # Move to start position if not vnc.send_pointer_event(scaled_start_x, scaled_start_y, 0): return [types.TextContent(type="text", text="Failed to move to start position")] # Press button button_mask = 1 << (button - 1) if not vnc.send_pointer_event(scaled_start_x, scaled_start_y, button_mask): return [types.TextContent(type="text", text="Failed to press mouse button")] # Perform drag for step in range(1, steps + 1): current_x = int(scaled_start_x + dx * step) current_y = int(scaled_start_y + dy * step) if not vnc.send_pointer_event(current_x, current_y, button_mask): return [types.TextContent(type="text", text=f"Failed during drag at step {step}")] time.sleep(delay_ms / 1000.0) # Convert ms to seconds # Release button at final position if not vnc.send_pointer_event(scaled_end_x, scaled_end_y, 0): return [types.TextContent(type="text", text="Failed to release mouse button")] # Prepare the response with useful details scale_factors = { "x": target_width / source_width, "y": target_height / source_height } return [types.TextContent( type="text", text=f"""Mouse drag (button {button}) completed: From source ({start_x}, {start_y}) to ({end_x}, {end_y}) From target ({scaled_start_x}, {scaled_start_y}) to ({scaled_end_x}, {scaled_end_y}) Source dimensions: {source_width}x{source_height} Target dimensions: {target_width}x{target_height} Scale factors: {scale_factors['x']:.4f}x, {scale_factors['y']:.4f}y Steps: {steps} Delay: {delay_ms}ms""" )] finally: # Close VNC connection vnc.close()

Implementation Reference

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/baryhuang/mcp-remote-macos-use'

If you have feedback or need assistance with the MCP directory API, please join our Discord server