AiDD MCP Server

import json import os import platform import subprocess import tempfile from datetime import datetime from typing import Any, Dict, List, Optional, Tuple from mcp import types from .state import state # Import the required libraries for improved screenshot functionality try: import mss import mss.tools MSS_AVAILABLE = True except ImportError: MSS_AVAILABLE = False try: import pygetwindow as gw PYGETWINDOW_AVAILABLE = True except ImportError: PYGETWINDOW_AVAILABLE = False # Import macOS-specific libraries if available try: import Quartz from Quartz import ( CGWindowListCopyWindowInfo, kCGNullWindowID, kCGWindowListOptionOnScreenOnly, ) QUARTZ_AVAILABLE = True except ImportError: QUARTZ_AVAILABLE = False # Define platform-specific permission error messages PERMISSION_ERROR_MESSAGES = { "darwin": "Permission denied to capture the screen. Please grant screen recording permission in System Settings > Privacy & Security > Screen Recording." } def _check_macos_screen_recording_permission() -> Dict[str, Any]: """ Check if the application has screen recording permission on macOS. For macOS 11+, this function uses the official Apple API: - CGPreflightScreenCaptureAccess() to check if permission is already granted - CGRequestScreenCaptureAccess() to request permission if needed Requesting access will present the system prompt and automatically add your app in the list so the user just needs to enable access. The system prompt will only appear once per app session. Returns: Dict with keys: - has_permission (bool): Whether permission is granted - error (str or None): Error message if permission is denied - details (dict): Additional context about the permission check """ result = {"has_permission": False, "error": None, "details": {}} # Check if Quartz is available if not QUARTZ_AVAILABLE: result["error"] = "Quartz framework not available. Cannot check screen recording permission." result["details"] = {"error": "Quartz not available"} return result # Check if the API is available (macOS 11+) if not hasattr(Quartz, 'CGPreflightScreenCaptureAccess'): result["error"] = "CGPreflightScreenCaptureAccess not available. Your macOS version may be too old (requires macOS 11+)." result["details"] = {"error": "API not available"} return result try: # Check if we already have permission has_permission = Quartz.CGPreflightScreenCaptureAccess() result["details"]["preflight_result"] = has_permission if has_permission: # We already have permission result["has_permission"] = True return result else: # We don't have permission, request it # This will show the system prompt to the user permission_granted = Quartz.CGRequestScreenCaptureAccess() result["details"]["request_result"] = permission_granted if permission_granted: result["has_permission"] = True return result else: # User denied permission result["error"] = PERMISSION_ERROR_MESSAGES["darwin"] return result except Exception as e: result["details"]["exception"] = str(e) result["error"] = f"Error checking screen recording permission: {str(e)}" return result def capture_screenshot_tool(): """Define the capture_screenshot tool.""" return { "name": "capture_screenshot", "description": "Capture a screenshot of the current screen and save it to a file. " "This tool allows capturing the entire screen, the active window, or a specific named window. " "The screenshot will be saved to the specified output path or to a default location if not provided. " "Windows can be captured in the background without bringing them to the front. " "Returns the path to the saved screenshot file.", "inputSchema": { "type": "object", "properties": { "output_path": { "type": "string", "description": "Optional path where the screenshot should be saved. If not provided, a default path will be used." }, "capture_mode": { "type": "object", "description": "Specifies what to capture in the screenshot.", "properties": { "type": { "type": "string", "description": "The type of screenshot to capture: 'full' for entire screen, 'active_window' for the currently active window, or 'named_window' for a specific window by name.", # noqa: E501 "enum": ["full", "active_window", "named_window"] }, "window_name": { "type": "string", "description": "Name of the specific application or window to capture. Required when type is 'named_window'. Windows can be captured in the background without bringing them to the front." } }, "required": ["type"] }, "debug": { "type": "boolean", "description": "Whether to include detailed debug information in the response when the operation fails. Default is False.", } }, "required": ["capture_mode"] }, } def _get_default_screenshot_path() -> str: """Generate a default path for saving screenshots.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"screenshot_{timestamp}.png" # Use the allowed directory from state if available, otherwise use temp directory if hasattr(state, 'allowed_directory') and state.allowed_directory: base_dir = os.path.join(state.allowed_directory, "screenshots") # Create screenshots directory if it doesn't exist os.makedirs(base_dir, exist_ok=True) else: base_dir = tempfile.gettempdir() return os.path.join(base_dir, filename) def _capture_with_mss(output_path: str, region: Optional[Dict[str, int]] = None) -> bool: """ Capture screenshot using MSS library. Args: output_path: Path where to save the screenshot region: Optional dictionary with top, left, width, height for specific region Returns: bool: True if successful, False otherwise """ try: with mss.mss() as sct: if region: # Capture specific region monitor = region else: # Capture entire primary monitor monitor = sct.monitors[1] # monitors[0] is all monitors combined, monitors[1] is the primary # Grab the picture sct_img = sct.grab(monitor) # Save it to the output path mss.tools.to_png(sct_img.rgb, sct_img.size, output=output_path) return os.path.exists(output_path) and os.path.getsize(output_path) > 0 except Exception as e: print(f"MSS screenshot error: {str(e)}") return False def _find_window_by_name(window_name: str) -> Tuple[Optional[Dict[str, int]], Dict[str, Any]]: """ Find a window by name and return its position and size along with debug info. Args: window_name: Name of the window to find Returns: Tuple containing: - Window region dict with top, left, width, height (or None if not found) - Debug info dictionary with search results and details """ # Check if we're on macOS if platform.system().lower() in ["darwin", "macos"]: # Use the macOS-specific function window_region, detailed_debug_info = find_macos_window_by_name(window_name) if window_region: return window_region, { "search_term": window_name, "found_window": True, "match_type": "quartz_window_search", "detailed_info": detailed_debug_info } else: # Get active apps for better error message active_apps = _get_active_apps_macos() return None, { "search_term": window_name, "reason": "No matching window title", "active_apps": active_apps, "quartz_available": QUARTZ_AVAILABLE, "detailed_info": detailed_debug_info } # For non-macOS platforms, use PyGetWindow if not PYGETWINDOW_AVAILABLE: print("PyGetWindow is not available") return None, {"error": "PyGetWindow is not available"} try: # Get all available windows all_windows = gw.getAllWindows() # Collect window titles for debugging window_titles = [] for w in all_windows: if w.title: window_titles.append(f"'{w.title}' ({w.width}x{w.height})") print(f" - '{w.title}' ({w.width}x{w.height})") # Standard window matching (case-insensitive) matching_windows = [] for window in all_windows: if window.title and window_name.lower() in window.title.lower(): matching_windows.append(window) if not matching_windows: print(f"No window found with title containing '{window_name}'") return None, { "search_term": window_name, "reason": "No matching window title", "matching_method": "case_insensitive_substring", "all_windows": window_titles } # Get the first matching window window = matching_windows[0] print(f"Found matching window: '{window.title}'") # Check if window dimensions are valid if window.width <= 0 or window.height <= 0: print(f"Window has invalid dimensions: {window.width}x{window.height}") return None, { "search_term": window_name, "found_window": window.title, "reason": f"Invalid dimensions: {window.width}x{window.height}", "all_windows": window_titles } # Return the window position and size return { "top": window.top, "left": window.left, "width": window.width, "height": window.height }, { "search_term": window_name, "found_window": window.title, "match_type": "standard_case_insensitive" } except Exception as e: print(f"Error finding window: {str(e)}") return None, { "search_term": window_name, "error": str(e) } def _get_active_apps_macos() -> List[str]: """Get a list of currently active applications on macOS.""" try: script = ''' tell application "System Events" set appList to {} set allProcesses to application processes repeat with proc in allProcesses if windows of proc is not {} then set end of appList to name of proc end if end repeat return appList end tell ''' result = subprocess.run(["osascript", "-e", script], capture_output=True, text=True) if result.returncode == 0: # Parse the comma-separated list from AppleScript apps = result.stdout.strip() if apps: return [app.strip() for app in apps.split(",")] return [] except Exception as e: print(f"Error getting active apps: {str(e)}") return [] def _format_error_with_available_windows(window_name: str, debug_info: Dict[str, Any], result: Dict[str, Any]) -> None: """Format error message with available windows list and store debug info for later use.""" # Check for debug_info from macOS specific format if debug_info and "available_windows" in debug_info: available_windows = [] for window in debug_info["available_windows"]: window_desc = f"'{window['owner']}'" if window['name']: window_desc += f" - '{window['name']}'" available_windows.append(window_desc) # Create a formatted list of available windows for the error message windows_list = ", ".join(available_windows) if available_windows else "No windows found" result["error"] = f"Window '{window_name}' not found. Available windows: {windows_list}" result["_debug_info"] = debug_info # Store with underscore prefix for later use # Check for debug_info from PyGetWindow format elif debug_info and "all_windows" in debug_info: window_titles = debug_info["all_windows"] windows_list = ", ".join(window_titles) if window_titles else "No windows found" result["error"] = f"Window '{window_name}' not found. Available windows: {windows_list}" result["_debug_info"] = debug_info # Store with underscore prefix for later use else: result["error"] = f"Window '{window_name}' not found" if debug_info: result["_debug_info"] = debug_info # Store with underscore prefix for later use def _verify_screenshot_success(output_path: str) -> bool: """Verify if a screenshot was successfully saved to the output path.""" return os.path.exists(output_path) and os.path.getsize(output_path) > 0 def _try_mss_capture(output_path: str, window_region: Optional[Dict[str, int]], result: Dict[str, Any], window_name: Optional[str] = None, debug_info: Optional[Dict[str, Any]] = None) -> bool: """ Try to capture a screenshot using MSS library. Args: output_path: Path where the screenshot should be saved window_region: Region to capture (with top, left, width, height keys) or None for full screen result: Dictionary to store error information if capture fails window_name: Optional name of the window being captured, for error messages debug_info: Optional debug information to include in result on failure Returns: bool: True if capture was successful, False otherwise Note: - When window_region is None, captures the full primary screen. - Updates the result dictionary with success=True on success. - On failure, updates result with error message and debug_info if provided. """ if MSS_AVAILABLE: try: if _capture_with_mss(output_path, window_region): # Simply check if the file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True # Debug info will be added by the caller if debug mode is enabled return True else: result["error"] = "Failed to save screenshot (file is empty or not created)" else: if window_name: result["error"] = f"Failed to capture window '{window_name}' using MSS" else: result["error"] = "MSS failed to capture full screen" except Exception as e: result["error"] = f"MSS error: {str(e)}" return False def _capture_screenshot_macos(output_path: str, capture_area: str = "full", window_name: Optional[str] = None) -> Dict[str, Any]: """ Capture screenshot on macOS. Returns: Dict with success status and error message if failed """ result = {"success": False, "error": None} internal_debug_info = None # Store debug info internally but don't add to result yet # Check for screen recording permission first perm_check = _check_macos_screen_recording_permission() if not perm_check["has_permission"]: result["error"] = perm_check["error"] result["_debug_info"] = perm_check["details"] # Store with underscore prefix for later use return result # If window_name is specified, try to capture that specific window if window_name: # Try to find the window using our macOS-specific function window_region, debug_info = _find_window_by_name(window_name) # Store debug info internally but don't add to result yet internal_debug_info = debug_info if window_region: # If we have a window ID from Quartz, use it directly without activating the window if 'id' in window_region: try: # Capture using the window ID without activating the window cmd = ["screencapture", "-l", str(window_region['id']), output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True # Debug info will be added by the caller if debug mode is enabled result["_debug_info"] = internal_debug_info # Store for later use but with underscore prefix return result else: result["error"] = f"Native screencapture failed with return code {process.returncode}" except Exception as e: result["error"] = f"Screenshot error: {str(e)}" # If direct window ID capture failed or no ID available, try using MSS if _try_mss_capture(output_path, window_region, result, window_name): # If successful, store debug info for later use result["_debug_info"] = internal_debug_info # Store for later use but with underscore prefix return result else: # Window not found - create a more detailed error message with available windows _format_error_with_available_windows(window_name, internal_debug_info, result) # No fallback to capturing the active window - return the result return result elif capture_area == "window": # Capture active window try: cmd = ["screencapture", "-w", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"Active window capture failed with return code {process.returncode}" except Exception as e: result["error"] = f"Active window screenshot error: {str(e)}" # No fallback to full screen here either return result # For full screen capture if _try_mss_capture(output_path, None, result): return result # Fall back to native macOS screencapture for full screen only try: cmd = ["screencapture", "-x", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"Native screencapture failed with return code {process.returncode}" except Exception as e: result["error"] = f"Screenshot error: {str(e)}" return result def _capture_screenshot_linux(output_path: str, capture_area: str = "full", window_name: Optional[str] = None) -> Dict[str, Any]: """ Capture screenshot on Linux. Returns: Dict with success status and error message if failed """ result = {"success": False, "error": None} # If window_name is specified, try to capture that specific window if window_name: # Try to use MSS first if available if MSS_AVAILABLE and PYGETWINDOW_AVAILABLE: try: window_region, debug_info = _find_window_by_name(window_name) if window_region: if _try_mss_capture(output_path, window_region, result, window_name): # Store debug info for later use result["_debug_info"] = debug_info return result else: # Window not found - create a more detailed error message with available windows _format_error_with_available_windows(window_name, debug_info, result) return result except Exception as e: result["error"] = f"PyGetWindow error: {str(e)}" return result # Try native Linux methods only if MSS is not available if not MSS_AVAILABLE or not PYGETWINDOW_AVAILABLE: try: # Try to find the window using xdotool if subprocess.run(["which", "xdotool"], capture_output=True).returncode == 0: # Search for the window find_cmd = ["xdotool", "search", "--name", window_name] result_cmd = subprocess.run(find_cmd, capture_output=True, text=True) if result_cmd.returncode == 0 and result_cmd.stdout.strip(): # Get the first window ID window_id = result_cmd.stdout.strip().split('\n')[0] # Now capture the window if subprocess.run(["which", "gnome-screenshot"], capture_output=True).returncode == 0: cmd = ["gnome-screenshot", "-w", "-f", output_path, "-w", window_id] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"gnome-screenshot failed with return code {process.returncode}" elif subprocess.run(["which", "scrot"], capture_output=True).returncode == 0: cmd = ["scrot", "-u", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"scrot failed with return code {process.returncode}" else: result["error"] = "No screenshot tool found (gnome-screenshot or scrot)" else: result["error"] = f"Window '{window_name}' not found using xdotool" # Store debug info result["_debug_info"] = {"error": "Window not found using xdotool"} else: result["error"] = "xdotool not available for window capture" except Exception as e: result["error"] = f"Screenshot error: {str(e)}" # No fallback to full screen - just return the error return result elif capture_area == "window": # Capture active window try: if subprocess.run(["which", "gnome-screenshot"], capture_output=True).returncode == 0: cmd = ["gnome-screenshot", "-w", "-f", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"Active window capture failed with return code {process.returncode}" elif subprocess.run(["which", "scrot"], capture_output=True).returncode == 0: cmd = ["scrot", "-u", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"scrot failed with return code {process.returncode}" else: result["error"] = "No screenshot tool found (gnome-screenshot or scrot)" except Exception as e: result["error"] = f"Active window screenshot error: {str(e)}" # No fallback to full screen here either return result # For full screen capture if _try_mss_capture(output_path, None, result): return result # Fall back to native Linux methods for full screen only try: if subprocess.run(["which", "gnome-screenshot"], capture_output=True).returncode == 0: cmd = ["gnome-screenshot", "-f", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"gnome-screenshot failed with return code {process.returncode}" elif subprocess.run(["which", "scrot"], capture_output=True).returncode == 0: cmd = ["scrot", output_path] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"scrot failed with return code {process.returncode}" else: result["error"] = "No screenshot tool found (gnome-screenshot or scrot)" except Exception as e: result["error"] = f"Screenshot error: {str(e)}" return result def _capture_screenshot_windows(output_path: str, capture_area: str = "full", window_name: Optional[str] = None) -> Dict[str, Any]: """ Capture screenshot on Windows. Returns: Dict with success status and error message if failed """ result = {"success": False, "error": None} # If window_name is specified, try to capture that specific window if window_name: # Try to use MSS first if available if MSS_AVAILABLE and PYGETWINDOW_AVAILABLE: try: window_region, debug_info = _find_window_by_name(window_name) if window_region: if _try_mss_capture(output_path, window_region, result, window_name): # Store debug info for later use result["_debug_info"] = debug_info return result else: # Window not found - create a more detailed error message with available windows _format_error_with_available_windows(window_name, debug_info, result) return result except Exception as e: result["error"] = f"PyGetWindow error: {str(e)}" return result # Try native Windows methods only if MSS is not available if not MSS_AVAILABLE or not PYGETWINDOW_AVAILABLE: try: script = f""" Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing # Function to find window by title function Find-Window($title) {{ $processes = Get-Process | Where-Object {{$_.MainWindowTitle -like "*$title*"}} return $processes }} $targetProcess = Find-Window("{window_name}") if ($targetProcess -and $targetProcess.Count -gt 0) {{ # Use the first matching process $process = $targetProcess[0] # Get window bounds $hwnd = $process.MainWindowHandle $rect = New-Object System.Drawing.Rectangle [void][System.Runtime.InteropServices.Marshal]::GetWindowRect($hwnd, [ref]$rect) # Capture the window $bitmap = New-Object System.Drawing.Bitmap ($rect.Width - $rect.X), ($rect.Height - $rect.Y) $graphics = [System.Drawing.Graphics]::FromImage($bitmap) $graphics.CopyFromScreen($rect.X, $rect.Y, 0, 0, $bitmap.Size) $bitmap.Save('{output_path}') return $true }} else {{ # List all windows for diagnostics $allWindows = Get-Process | Where-Object {{$_.MainWindowTitle}} | Select-Object MainWindowTitle, ProcessName | Format-List | Out-String Write-Output "WINDOWS_LIST:$allWindows" return $false }} """ cmd = ["powershell", "-Command", script] process = subprocess.run(cmd, capture_output=True, text=True) output = process.stdout.strip() if output.startswith("True"): # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = "Failed to save screenshot of window" else: # Check if we got a list of windows in the output if "WINDOWS_LIST:" in output: windows_list = output.split("WINDOWS_LIST:")[1].strip() result["error"] = f"Window '{window_name}' not found. Available windows: {windows_list}" # Store windows list as debug info result["_debug_info"] = {"available_windows": windows_list} else: result["error"] = f"Window '{window_name}' not found or could not be captured" except Exception as e: result["error"] = f"Screenshot error: {str(e)}" # No fallback to full screen - just return the error return result elif capture_area == "window": # Capture active window using Windows methods try: script = f""" Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing function Get-ActiveWindow {{ $foregroundWindowHandle = [System.Windows.Forms.Form]::ActiveForm.Handle if (-not $foregroundWindowHandle) {{ # If no active form, try to get the foreground window $foregroundWindowHandle = [System.Runtime.InteropServices.Marshal]::GetForegroundWindow() }} if ($foregroundWindowHandle) {{ $rect = New-Object System.Drawing.Rectangle [void][System.Runtime.InteropServices.Marshal]::GetWindowRect($foregroundWindowHandle, [ref]$rect) $bitmap = New-Object System.Drawing.Bitmap ($rect.Width - $rect.X), ($rect.Height - $rect.Y) $graphics = [System.Drawing.Graphics]::FromImage($bitmap) $graphics.CopyFromScreen($rect.X, $rect.Y, 0, 0, $bitmap.Size) $bitmap.Save('{output_path}') return $true }} return $false }} Get-ActiveWindow """ cmd = ["powershell", "-Command", script] process = subprocess.run(cmd, capture_output=True, text=True) if process.stdout.strip() == "True": # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = "Failed to capture active window" else: result["error"] = "Failed to capture active window" except Exception as e: result["error"] = f"Active window screenshot error: {str(e)}" # No fallback to full screen here either return result else: # For full screen capture if _try_mss_capture(output_path, None, result): return result # Fall back to native Windows methods for full screen only try: script = f""" Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds $bitmap = New-Object System.Drawing.Bitmap $screen.Width, $screen.Height $graphics = [System.Drawing.Graphics]::FromImage($bitmap) $graphics.CopyFromScreen($screen.X, $screen.Y, 0, 0, $screen.Size) $bitmap.Save('{output_path}') """ cmd = ["powershell", "-Command", script] process = subprocess.run(cmd, capture_output=True) # Check if file exists and has non-zero size if _verify_screenshot_success(output_path): result["success"] = True return result else: result["error"] = f"PowerShell screenshot failed with return code {process.returncode}" except Exception as e: result["error"] = f"Screenshot error: {str(e)}" return result def find_macos_window_by_name(window_name): """Find a window by name on macOS using Quartz.""" try: if not QUARTZ_AVAILABLE: return None, {"error": "Quartz not available"} window_list = CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID) # Collect debug info instead of printing debug_info = { "search_term": window_name, "available_windows": [] } all_windows = [] for window in window_list: name = window.get('kCGWindowName', '') owner = window.get('kCGWindowOwnerName', '') layer = window.get('kCGWindowLayer', 0) window_id = window.get('kCGWindowNumber', 0) # Skip windows with layer > 0 (typically system UI elements) if layer > 0: continue window_info = { "id": window_id, "name": name, "owner": owner, "layer": layer } debug_info["available_windows"].append(window_info) all_windows.append({ 'id': window_id, 'name': name, 'owner': owner, 'layer': layer, 'bounds': window.get('kCGWindowBounds', {}) }) # Define matching categories with different priorities exact_app_matches = [] # Exact match on application name exact_window_matches = [] # Exact match on window title app_contains_matches = [] # Application name contains search term window_contains_matches = [] # Window title contains search term # Normalize the search term for comparison search_term_lower = window_name.lower() # First pass: categorize windows by match quality for window in all_windows: name = window['name'] or '' owner = window['owner'] or '' # Skip empty windows if not name and not owner: continue name_lower = name.lower() owner_lower = owner.lower() # Check for exact matches first (case-insensitive) if owner_lower == search_term_lower: exact_app_matches.append(window) elif name_lower == search_term_lower: exact_window_matches.append(window) # Then check for contains matches elif search_term_lower in owner_lower: app_contains_matches.append(window) elif search_term_lower in name_lower: window_contains_matches.append(window) # Process matches in priority order for match_list, reason in [ (exact_app_matches, "Exact match on application name"), (exact_window_matches, "Exact match on window title"), (app_contains_matches, "Application name contains search term"), (window_contains_matches, "Window title contains search term") ]: if match_list: # Sort by layer (lower layer = more in front) match_list.sort(key=lambda w: w['layer']) selected_window = match_list[0] debug_info["selected_window"] = { "id": selected_window['id'], "name": selected_window['name'], "owner": selected_window['owner'], "layer": selected_window['layer'], "selection_reason": reason } bounds = selected_window['bounds'] return { 'id': selected_window['id'], 'top': bounds.get('Y', 0), 'left': bounds.get('X', 0), 'width': bounds.get('Width', 0), 'height': bounds.get('Height', 0) }, debug_info debug_info["error"] = f"No matching window found for '{window_name}'" return None, debug_info except Exception as e: return None, {"error": f"Error finding macOS window: {str(e)}"} def capture_screenshot(output_path: Optional[str] = None, capture_mode: Optional[Dict[str, str]] = None, debug: bool = False) -> Dict[str, Any]: """ Capture a screenshot and save it to the specified path. Args: output_path: Path where the screenshot should be saved. If None, a default path will be used. capture_mode: Dictionary specifying what to capture: - type: 'full' for entire screen, 'active_window' for current window, 'named_window' for specific window - window_name: Name of window to capture (required when type is 'named_window') Windows can be captured in the background without bringing them to the front. debug: Whether to include debug information in the response on failure Returns: Dictionary with success status and path to the saved screenshot. """ # Set defaults if capture_mode is not provided if not capture_mode: capture_mode = {"type": "full"} # Extract capture type and window name capture_type = capture_mode.get("type", "full") window_name = capture_mode.get("window_name") if capture_type == "named_window" else None if debug: print(f"Capture mode: {capture_type}") if window_name: print(f"Window name: {window_name}") # Use default path if none provided if not output_path: output_path = _get_default_screenshot_path() # Ensure the output directory exists os.makedirs(os.path.dirname(output_path), exist_ok=True) # Convert to old parameters for compatibility with existing functions capture_area = "window" if capture_type in ["active_window", "named_window"] else "full" # Capture screenshot based on platform system_name = platform.system().lower() if debug: print(f"Detected platform: {system_name}") if system_name == "darwin" or system_name == "macos": result = _capture_screenshot_macos(output_path, capture_area, window_name) elif system_name == "linux": result = _capture_screenshot_linux(output_path, capture_area, window_name) elif system_name == "windows": result = _capture_screenshot_windows(output_path, capture_area, window_name) else: result = {"success": False, "error": f"Unsupported platform: {system_name}"} # Check if the error might be related to permission issues if not result["success"] and result.get("error"): # If the error already mentions permission, highlight it if "permission" in result["error"].lower(): # Make the error message more prominent for permission issues modified_message = f"PERMISSION ERROR: {result['error']}" result["error"] = modified_message # Add additional hints for macOS if system_name == "darwin": result["error"] += " To fix this: Open System Settings > Privacy & Security > Screen Recording, and enable permission for this application." # Extract debug info if present debug_info = result.pop("_debug_info", None) if "_debug_info" in result else None # Format the final result response = { "success": result["success"], "path": output_path if result["success"] else None, "message": "Screenshot captured successfully" if result["success"] else result.get("error", "Failed to capture screenshot") } # Add warning if present if "warning" in result: response["warning"] = result["warning"] # Only include debug info if debug mode is enabled AND the operation failed if debug and not result["success"] and debug_info: response["debug_info"] = debug_info return response async def handle_capture_screenshot(arguments: dict) -> List[types.TextContent]: """Handle capturing a screenshot.""" output_path = arguments.get("output_path") debug = arguments.get("debug", False) # Handle legacy platform parameter (ignore it) if "platform" in arguments: print("Note: 'platform' parameter is deprecated and will be auto-detected") # Enforce new parameter format requiring capture_mode capture_mode = arguments.get("capture_mode") if not capture_mode: result = { "success": False, "error": "Missing required parameter 'capture_mode'. Please provide a capture_mode object with 'type' field." } else: result = capture_screenshot(output_path, capture_mode, debug) return [types.TextContent(type="text", text=json.dumps(result, indent=2))]