AiDD MCP Server

import json import platform import subprocess from typing import Any, Dict, List import importlib.util from mcp import types # Use importlib.util.find_spec to check for availability of optional packages def is_package_available(package_name): """Check if a package is available using importlib.util.find_spec.""" return importlib.util.find_spec(package_name) is not None # Check for PyGetWindow PYGETWINDOW_AVAILABLE = is_package_available("pygetwindow") # Check for macOS-specific Quartz framework QUARTZ_AVAILABLE = False if platform.system().lower() == "darwin": QUARTZ_AVAILABLE = is_package_available("Quartz") if QUARTZ_AVAILABLE: from Quartz import ( CGWindowListCopyWindowInfo, kCGNullWindowID, kCGWindowListOptionOnScreenOnly, ) def get_available_windows_tool(): """Define the get_available_windows tool.""" return { "name": "get_available_windows", "description": "Get detailed information about all available windows currently displayed on the user's screen. " "This tool provides comprehensive window data including window titles and their owner applications. " "It can help with context-aware assistance, understanding what's visible on the user's " "screen, or when you need to interact with specific applications or windows.", "inputSchema": { "type": "object", "properties": {}, "required": [] }, } def _get_windows_macos() -> List[Dict[str, Any]]: """ Get information about all available windows on macOS. Returns: List of dictionaries containing window information """ windows = [] if not QUARTZ_AVAILABLE: print("Quartz framework not available. Unable to list windows on macOS.") return windows try: # Get the list of windows from Quartz window_list = CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID) for window in window_list: window_id = window.get('kCGWindowNumber', 0) owner = window.get('kCGWindowOwnerName', '') name = window.get('kCGWindowName', '') layer = window.get('kCGWindowLayer', 0) alpha = window.get('kCGWindowAlpha', 1.0) # Create the window info dictionary window_info = { "id": window_id, "title": name, "app": owner, "visible": layer <= 0 and alpha > 0.1, } windows.append(window_info) # Sort windows by application name and then by window title windows.sort(key=lambda w: (w.get("app", "").lower(), w.get("title", "").lower())) except Exception as e: print(f"Error getting windows on macOS: {str(e)}") return windows def _get_windows_windows() -> List[Dict[str, Any]]: """ Get information about all available windows on Windows. Returns: List of dictionaries containing window information """ windows = [] # Try using PyGetWindow if available if PYGETWINDOW_AVAILABLE: try: import pygetwindow as gw all_windows = gw.getAllWindows() for window in all_windows: # Skip windows with empty titles if not window.title: continue # Try to determine the application name from the window title # This is an approximation and may not be accurate for all applications app_name = "" title_parts = window.title.split(' - ') if len(title_parts) > 1: app_name = title_parts[-1] # Create the window info dictionary window_info = { "title": window.title, "visible": window.visible, "active": window.isActive } # Add app name if we were able to determine it if app_name: window_info["app"] = app_name windows.append(window_info) # Sort windows by application name and then by window title windows.sort(key=lambda w: (w.get("app", "").lower() if "app" in w else "", w.get("title", "").lower())) except Exception as e: print(f"Error getting windows with PyGetWindow: {str(e)}") # If PyGetWindow failed or isn't available, try using PowerShell if not windows: try: script = ''' Add-Type @" using System; using System.Runtime.InteropServices; using System.Text; public class Window { [DllImport("user32.dll")] [return: MarshalAs(UnmanagedType.Bool)] public static extern bool EnumWindows(EnumWindowsProc enumProc, IntPtr lParam); [DllImport("user32.dll")] public static extern int GetWindowText(IntPtr hWnd, StringBuilder lpString, int nMaxCount); [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr hWnd); [DllImport("user32.dll", SetLastError=true)] public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId); public delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam); } "@ $windows = @() $enumWindowsCallback = { param($hwnd, $lParam) # Get the window title $sb = New-Object System.Text.StringBuilder(256) [void][Window]::GetWindowText($hwnd, $sb, $sb.Capacity) $title = $sb.ToString() # Only process windows with titles if($title -and $title -ne "") { # Check if the window is visible $visible = [Window]::IsWindowVisible($hwnd) # Get process ID and name $processId = 0 [void][Window]::GetWindowThreadProcessId($hwnd, [ref]$processId) $process = Get-Process -Id $processId -ErrorAction SilentlyContinue $processName = if($process) { $process.ProcessName } else { "Unknown" } # Create the window object $window = @{ title = $title app = $processName visible = $visible } $windows += $window } # Continue enumeration return $true } # Enumerate all windows [void][Window]::EnumWindows($enumWindowsCallback, [IntPtr]::Zero) # Sort the windows $windows = $windows | Sort-Object -Property @{Expression="app"}, @{Expression="title"} # Convert to JSON $windows | ConvertTo-Json -Depth 3 ''' cmd = ["powershell", "-Command", script] process = subprocess.run(cmd, capture_output=True, text=True) if process.returncode == 0 and process.stdout.strip(): try: windows_data = json.loads(process.stdout) # Handle single item (not in a list) if isinstance(windows_data, dict): windows_data = [windows_data] windows = windows_data except json.JSONDecodeError: print("Failed to parse JSON from PowerShell output") except Exception as e: print(f"Error getting windows with PowerShell: {str(e)}") return windows def _get_windows_linux() -> List[Dict[str, Any]]: """ Get information about all available windows on Linux. Returns: List of dictionaries containing window information """ windows = [] # Try using wmctrl if available try: # Check if wmctrl is installed check_process = subprocess.run(["which", "wmctrl"], capture_output=True) if check_process.returncode == 0: # Get the list of windows wmctrl_process = subprocess.run(["wmctrl", "-l"], capture_output=True, text=True) if wmctrl_process.returncode == 0: window_data = wmctrl_process.stdout.strip().split('\n') for line in window_data: if not line: continue parts = line.split(None, 3) if len(parts) < 4: continue window_id, desktop, owner, *title_parts = parts title = title_parts[0] if title_parts else "" # Create the window info dictionary window_info = { "id": window_id, "title": title, "app": owner, "desktop": desktop, "visible": True # wmctrl -l only shows visible windows } windows.append(window_info) # Sort windows by application name and then by window title windows.sort(key=lambda w: (w.get("app", "").lower(), w.get("title", "").lower())) except Exception as e: print(f"Error getting windows with wmctrl: {str(e)}") # If wmctrl failed, try using xwininfo and xprop if not windows: try: # Get the list of window IDs xwininfo_process = subprocess.run(["xwininfo", "-root", "-children"], capture_output=True, text=True) if xwininfo_process.returncode == 0: lines = xwininfo_process.stdout.strip().split('\n') # Parse the output to find window IDs window_ids = [] for line in lines: # Look for lines with window IDs in hexadecimal format if "0x" in line and "child" in line.lower(): parts = line.split() for part in parts: if part.startswith("0x"): window_ids.append(part) break # Get information for each window for window_id in window_ids: # Get window name xprop_name_process = subprocess.run(["xprop", "-id", window_id, "WM_NAME"], capture_output=True, text=True) # Get window class (application) xprop_class_process = subprocess.run(["xprop", "-id", window_id, "WM_CLASS"], capture_output=True, text=True) # Extract the window title title = "" if xprop_name_process.returncode == 0: output = xprop_name_process.stdout.strip() if "=" in output: title = output.split("=", 1)[1].strip().strip('"') # Extract the application name app_name = "" if xprop_class_process.returncode == 0: output = xprop_class_process.stdout.strip() if "=" in output: classes = output.split("=", 1)[1].strip().strip('"').split('", "') app_name = classes[-1] if classes else "" # Create the window info dictionary window_info = { "id": window_id, "title": title, "app": app_name, "visible": True # Assuming all retrieved windows are visible } windows.append(window_info) # Sort windows by application name and then by window title windows.sort(key=lambda w: (w.get("app", "").lower(), w.get("title", "").lower())) except Exception as e: print(f"Error getting windows with xwininfo/xprop: {str(e)}") return windows def get_available_windows() -> Dict[str, Any]: """ Get detailed information about all available windows currently displayed on screen. Returns: Dictionary with platform, success status, and list of windows """ system_name = platform.system().lower() # Get windows based on platform if system_name == "darwin" or system_name == "macos": windows = _get_windows_macos() elif system_name == "windows": windows = _get_windows_windows() elif system_name == "linux": windows = _get_windows_linux() else: return { "success": False, "platform": system_name, "error": f"Unsupported platform: {system_name}. This tool currently supports macOS, Windows, and Linux.", "windows": [] } # If no windows were found, provide a descriptive error message if not windows: error_message = "No windows could be detected on your screen. " if system_name == "darwin": error_message += "This might be due to missing screen recording permissions. Please check System Settings > Privacy & Security > Screen Recording." elif system_name == "windows": error_message += "This might be due to insufficient permissions or no windows being displayed." elif system_name == "linux": error_message += "This might be due to wmctrl or xwininfo not being installed or no windows being displayed." return { "success": False, "platform": system_name, "error": error_message, "windows": [] } return { "success": True, "platform": system_name, "count": len(windows), "windows": windows } async def handle_get_available_windows(arguments: dict) -> List[types.TextContent]: """Handle getting available windows.""" result = get_available_windows() return [types.TextContent(type="text", text=json.dumps(result, indent=2))]