capture_window
Take a screenshot of a specific application window by providing its window ID, with an option to save the image to a specified file path.
Instructions
Capture screenshot of a specific window.
Args:
window_id: Window ID to capture (from list_windows)
output_path: Optional path to save the screenshot
Returns:
JSON string with capture results and file path.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| window_id | Yes | ||
| output_path | No |
Implementation Reference
- server.py:104-138 (handler)MCP tool handler and registration for 'capture_window'. Handles input validation via type hints, delegates to CrossPlatformWindowManager, returns JSON results.@mcp.tool() async def capture_window(window_id: str, output_path: Optional[str] = None) -> str: """ Capture screenshot of a specific window. Args: window_id: Window ID to capture (from list_windows) output_path: Optional path to save the screenshot Returns: JSON string with capture results and file path. """ try: wm = get_window_manager() captured_path = wm.capture_window(window_id, output_path) result = { "status": "success", "window_id": window_id, "output_path": captured_path, "file_exists": Path(captured_path).exists(), "file_size_mb": round(Path(captured_path).stat().st_size / (1024 * 1024), 2) } return json.dumps(result, indent=2) except Exception as e: logger.error(f"Failed to capture window {window_id}: {e}") return json.dumps({ "status": "error", "error": str(e), "window_id": window_id, "output_path": output_path })
- capture.py:235-459 (helper)WindowsWindowManager.capture_window: Primary implementation for Windows via WSL2/PowerShell. Uses PrintWindow API, handles minimized windows with stealth restoration, full PowerShell automation.def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str: """ Capture screenshot of a Windows application window. """ if not self.powershell_available: raise Exception("PowerShell not available - cannot capture Windows applications") try: # Enhanced PowerShell script with PrintWindow support for minimized windows ps_script = f''' Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing $windowHandle = [IntPtr]{window_id} if ($windowHandle -eq 0) {{ Write-Error "Invalid window handle" exit 1 }} # Define comprehensive Windows API functions Add-Type @" using System; using System.Runtime.InteropServices; public class Win32 {{ [StructLayout(LayoutKind.Sequential)] public struct RECT {{ public int Left, Top, Right, Bottom; }} // Window management APIs [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect); [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); [DllImport("user32.dll")] public static extern bool PrintWindow(IntPtr hWnd, IntPtr hdcBlt, uint nFlags); [DllImport("user32.dll")] public static extern bool SetLayeredWindowAttributes(IntPtr hWnd, uint crKey, byte bAlpha, uint dwFlags); [DllImport("user32.dll")] public static extern int GetWindowLong(IntPtr hWnd, int nIndex); [DllImport("user32.dll")] public static extern int SetWindowLong(IntPtr hWnd, int nIndex, int dwNewLong); [DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd); [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow(); [DllImport("user32.dll")] public static extern bool EnumChildWindows(IntPtr hWndParent, EnumChildProc lpEnumFunc, IntPtr lParam); [DllImport("user32.dll")] public static extern int GetClassName(IntPtr hWnd, System.Text.StringBuilder lpClassName, int nMaxCount); [DllImport("user32.dll")] public static extern IntPtr FindWindowEx(IntPtr hWndParent, IntPtr hWndChildAfter, string lpszClass, string lpszWindow); [DllImport("user32.dll")] public static extern bool PostMessage(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam); [DllImport("user32.dll")] public static extern IntPtr SendMessage(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam); [DllImport("user32.dll")] public static extern bool BringWindowToTop(IntPtr hWnd); public delegate bool EnumChildProc(IntPtr hWnd, IntPtr lParam); // Constants public const int SW_HIDE = 0; public const int SW_RESTORE = 9; public const int SW_MINIMIZE = 6; public const int SW_SHOW = 5; public const int GWL_EXSTYLE = -20; public const int WS_EX_LAYERED = 0x80000; public const int LWA_ALPHA = 0x2; public const uint PW_CLIENTONLY = 0x1; public const uint PW_RENDERFULLCONTENT = 0x2; // Windows Messages public const uint WM_KEYDOWN = 0x0100; public const uint WM_KEYUP = 0x0101; public const uint WM_CHAR = 0x0102; public const uint WM_COMMAND = 0x0111; public const uint WM_VSCROLL = 0x0115; // Virtual Key Codes public const int VK_SPACE = 0x20; public const int VK_PRIOR = 0x21; // Page Up public const int VK_NEXT = 0x22; // Page Down public const int VK_DOWN = 0x28; // Down Arrow public const int VK_UP = 0x26; // Up Arrow public const int VK_LEFT = 0x25; // Left Arrow public const int VK_RIGHT = 0x27; // Right Arrow }} "@ # Check if window is minimized and implement smart capture logic $isMinimized = [Win32]::IsIconic($windowHandle) $wasMinimized = $false $originalExStyle = 0 try {{ # Get window rectangle $rect = New-Object Win32+RECT $success = [Win32]::GetWindowRect($windowHandle, [ref]$rect) if (-not $success) {{ Write-Error "Could not get window rectangle" exit 1 }} $width = $rect.Right - $rect.Left $height = $rect.Bottom - $rect.Top if ($width -le 0 -or $height -le 0) {{ Write-Error "Invalid window dimensions: $($width)x$($height)" exit 1 }} # Handle minimized windows with stealth restoration if ($isMinimized) {{ Write-Verbose "Window is minimized, using stealth restoration technique" $wasMinimized = $true # Make window transparent for stealth operation $originalExStyle = [Win32]::GetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE) $newExStyle = $originalExStyle -bor [Win32]::WS_EX_LAYERED [Win32]::SetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE, $newExStyle) | Out-Null [Win32]::SetLayeredWindowAttributes($windowHandle, 0, 1, [Win32]::LWA_ALPHA) | Out-Null # Restore window temporarily [Win32]::ShowWindow($windowHandle, [Win32]::SW_RESTORE) | Out-Null Start-Sleep -Milliseconds 100 # Brief pause for window to render # Update rectangle after restoration $success = [Win32]::GetWindowRect($windowHandle, [ref]$rect) if ($success) {{ $width = $rect.Right - $rect.Left $height = $rect.Bottom - $rect.Top }} }} # Create bitmap for capture $bitmap = New-Object System.Drawing.Bitmap($width, $height) $graphics = [System.Drawing.Graphics]::FromImage($bitmap) # Try PrintWindow first (better for minimized/hidden windows) $hdcBitmap = $graphics.GetHdc() $printSuccess = [Win32]::PrintWindow($windowHandle, $hdcBitmap, [Win32]::PW_RENDERFULLCONTENT) $graphics.ReleaseHdc($hdcBitmap) # Fallback to CopyFromScreen if PrintWindow fails if (-not $printSuccess) {{ Write-Verbose "PrintWindow failed, falling back to CopyFromScreen" if (-not $isMinimized) {{ $graphics.CopyFromScreen($rect.Left, $rect.Top, 0, 0, [System.Drawing.Size]::new($width, $height)) }} else {{ throw "Cannot capture minimized window with CopyFromScreen" }} }} $graphics.Dispose() # Save to temporary file $tempPath = [System.IO.Path]::GetTempFileName() + ".png" $bitmap.Save($tempPath, [System.Drawing.Imaging.ImageFormat]::Png) $bitmap.Dispose() Write-Output $tempPath }} finally {{ # Restore original window state if it was modified if ($wasMinimized) {{ # Restore original transparency [Win32]::SetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE, $originalExStyle) | Out-Null # Re-minimize the window [Win32]::ShowWindow($windowHandle, [Win32]::SW_MINIMIZE) | Out-Null }} }} ''' result = subprocess.run( ['powershell.exe', '-Command', ps_script], capture_output=True, text=True, check=True, timeout=60, # 60 second timeout for screenshot capture encoding='utf-8', errors='ignore' # Ignore encoding errors to handle special characters ) temp_windows_path = result.stdout.strip() if not temp_windows_path: raise Exception("PowerShell did not return temp file path") # Convert Windows path to WSL path wsl_temp_path = self._windows_path_to_wsl(temp_windows_path) if output_path is None: timestamp = int(time.time()) output_path = f"windows_capture_{window_id}_{timestamp}.png" # Copy from Windows temp to desired location subprocess.run(['cp', wsl_temp_path, output_path], check=True) # Clean up Windows temp file subprocess.run( ['powershell.exe', '-Command', f'Remove-Item "{temp_windows_path}" -Force'], check=False # Don't fail if cleanup fails ) logger.info(f"Windows application captured: {output_path}") return output_path
- capture.py:1546-1576 (helper)WindowCapture.capture_window: Linux/X11 implementation using wmctrl for window focus/geometry and pyscreenshot (PIL) for region capture.def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str: """ Capture screenshot of a specific window. """ try: # First focus the window if not self.focus_window(window_id): logger.warning(f"Could not focus window {window_id}, trying capture anyway") # Get window geometry geometry = self.get_window_geometry(window_id) if geometry: x, y, width, height = geometry # Capture the specific region screenshot = ImageGrab.grab(bbox=(x, y, x + width, y + height)) else: logger.warning(f"Could not get geometry for window {window_id}, capturing full screen") screenshot = ImageGrab.grab() if output_path is None: timestamp = int(time.time()) output_path = f"window_{window_id}_{timestamp}.png" screenshot.save(output_path) logger.info(f"Window captured: {output_path}") return output_path except Exception as e: logger.error(f"Failed to capture window {window_id}: {e}") raise
- capture.py:1347-1431 (helper)CrossPlatformWindowManager: Dispatches capture_window calls to platform-specific implementations (WindowsWindowManager for WSL/Windows, WindowCapture for Linux). Called by MCP handler.class CrossPlatformWindowManager: """ Unified interface that automatically selects the appropriate window manager based on the runtime environment. """ def __init__(self): self.environment = detect_environment() self.manager = self._create_manager() def _create_manager(self): """Create appropriate window manager based on environment.""" if self.environment == 'wsl': # In WSL, try Windows manager first, fallback to Linux windows_manager = WindowsWindowManager() if windows_manager.powershell_available: logger.info("Using Windows application manager (PowerShell from WSL2)") return windows_manager else: logger.info("PowerShell unavailable, falling back to Linux X11 manager") return WindowCapture() elif self.environment == 'windows': logger.info("Using Windows application manager") return WindowsWindowManager() else: # linux logger.info("Using Linux X11 window manager") return WindowCapture() def list_windows(self) -> List[Dict[str, str]]: """List all available windows using the appropriate manager.""" try: windows = self.manager.list_windows() # Add environment info to each window for window in windows: window['environment'] = self.environment if 'type' not in window: window['type'] = 'x11' if self.environment == 'linux' else self.environment return windows except Exception as e: logger.error(f"Failed to list windows: {e}") return [] def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str: """Capture screenshot of a specific window.""" return self.manager.capture_window(window_id, output_path) def capture_full_screen(self, output_path: Optional[str] = None) -> str: """Capture full screen screenshot.""" if hasattr(self.manager, 'capture_full_screen'): return self.manager.capture_full_screen(output_path) else: # Fallback using pyscreenshot try: screenshot = ImageGrab.grab() if output_path is None: timestamp = int(time.time()) output_path = f"screenshot_{timestamp}.png" screenshot.save(output_path) logger.info(f"Full screen captured: {output_path}") return output_path except Exception as e: logger.error(f"Failed to capture full screen: {e}") raise def get_environment_info(self) -> Dict[str, str]: """Get information about the current environment and capabilities.""" info = { 'environment': self.environment, 'manager_type': type(self.manager).__name__ } if isinstance(self.manager, WindowsWindowManager): info['powershell_available'] = self.manager.powershell_available elif isinstance(self.manager, WindowCapture): missing_deps = check_dependencies() info['missing_dependencies'] = missing_deps info['x11_available'] = len(missing_deps) == 0 return info