capture_window
Capture screenshots of specific windows using the window ID, save them to a custom path, and retrieve results in JSON format. Integrates with Auto-Snap MCP for streamlined document processing and text extraction.
Instructions
Capture screenshot of a specific window.
Args:
window_id: Window ID to capture (from list_windows)
output_path: Optional path to save the screenshot
Returns:
JSON string with capture results and file path.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| output_path | No | ||
| window_id | Yes |
Implementation Reference
- server.py:104-138 (handler)MCP tool handler for 'capture_window': registers the tool, validates input via type hints, delegates capture to CrossPlatformWindowManager, handles errors, and returns JSON results with file info.@mcp.tool() async def capture_window(window_id: str, output_path: Optional[str] = None) -> str: """ Capture screenshot of a specific window. Args: window_id: Window ID to capture (from list_windows) output_path: Optional path to save the screenshot Returns: JSON string with capture results and file path. """ try: wm = get_window_manager() captured_path = wm.capture_window(window_id, output_path) result = { "status": "success", "window_id": window_id, "output_path": captured_path, "file_exists": Path(captured_path).exists(), "file_size_mb": round(Path(captured_path).stat().st_size / (1024 * 1024), 2) } return json.dumps(result, indent=2) except Exception as e: logger.error(f"Failed to capture window {window_id}: {e}") return json.dumps({ "status": "error", "error": str(e), "window_id": window_id, "output_path": output_path })
- capture.py:1391-1394 (helper)CrossPlatformWindowManager.capture_window: thin wrapper that dispatches to the appropriate platform-specific window manager (WindowsWindowManager or WindowCapture).def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str: """Capture screenshot of a specific window.""" return self.manager.capture_window(window_id, output_path)
- capture.py:235-459 (helper)WindowsWindowManager.capture_window: Platform-specific implementation for Windows (via WSL2 PowerShell). Uses advanced PrintWindow API, handles minimized windows stealthily, captures to temp file and copies to output.def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str: """ Capture screenshot of a Windows application window. """ if not self.powershell_available: raise Exception("PowerShell not available - cannot capture Windows applications") try: # Enhanced PowerShell script with PrintWindow support for minimized windows ps_script = f''' Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing $windowHandle = [IntPtr]{window_id} if ($windowHandle -eq 0) {{ Write-Error "Invalid window handle" exit 1 }} # Define comprehensive Windows API functions Add-Type @" using System; using System.Runtime.InteropServices; public class Win32 {{ [StructLayout(LayoutKind.Sequential)] public struct RECT {{ public int Left, Top, Right, Bottom; }} // Window management APIs [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect); [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); [DllImport("user32.dll")] public static extern bool PrintWindow(IntPtr hWnd, IntPtr hdcBlt, uint nFlags); [DllImport("user32.dll")] public static extern bool SetLayeredWindowAttributes(IntPtr hWnd, uint crKey, byte bAlpha, uint dwFlags); [DllImport("user32.dll")] public static extern int GetWindowLong(IntPtr hWnd, int nIndex); [DllImport("user32.dll")] public static extern int SetWindowLong(IntPtr hWnd, int nIndex, int dwNewLong); [DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd); [DllImport("user32.dll")] public static extern IntPtr GetForegroundWindow(); [DllImport("user32.dll")] public static extern bool EnumChildWindows(IntPtr hWndParent, EnumChildProc lpEnumFunc, IntPtr lParam); [DllImport("user32.dll")] public static extern int GetClassName(IntPtr hWnd, System.Text.StringBuilder lpClassName, int nMaxCount); [DllImport("user32.dll")] public static extern IntPtr FindWindowEx(IntPtr hWndParent, IntPtr hWndChildAfter, string lpszClass, string lpszWindow); [DllImport("user32.dll")] public static extern bool PostMessage(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam); [DllImport("user32.dll")] public static extern IntPtr SendMessage(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam); [DllImport("user32.dll")] public static extern bool BringWindowToTop(IntPtr hWnd); public delegate bool EnumChildProc(IntPtr hWnd, IntPtr lParam); // Constants public const int SW_HIDE = 0; public const int SW_RESTORE = 9; public const int SW_MINIMIZE = 6; public const int SW_SHOW = 5; public const int GWL_EXSTYLE = -20; public const int WS_EX_LAYERED = 0x80000; public const int LWA_ALPHA = 0x2; public const uint PW_CLIENTONLY = 0x1; public const uint PW_RENDERFULLCONTENT = 0x2; // Windows Messages public const uint WM_KEYDOWN = 0x0100; public const uint WM_KEYUP = 0x0101; public const uint WM_CHAR = 0x0102; public const uint WM_COMMAND = 0x0111; public const uint WM_VSCROLL = 0x0115; // Virtual Key Codes public const int VK_SPACE = 0x20; public const int VK_PRIOR = 0x21; // Page Up public const int VK_NEXT = 0x22; // Page Down public const int VK_DOWN = 0x28; // Down Arrow public const int VK_UP = 0x26; // Up Arrow public const int VK_LEFT = 0x25; // Left Arrow public const int VK_RIGHT = 0x27; // Right Arrow }} "@ # Check if window is minimized and implement smart capture logic $isMinimized = [Win32]::IsIconic($windowHandle) $wasMinimized = $false $originalExStyle = 0 try {{ # Get window rectangle $rect = New-Object Win32+RECT $success = [Win32]::GetWindowRect($windowHandle, [ref]$rect) if (-not $success) {{ Write-Error "Could not get window rectangle" exit 1 }} $width = $rect.Right - $rect.Left $height = $rect.Bottom - $rect.Top if ($width -le 0 -or $height -le 0) {{ Write-Error "Invalid window dimensions: $($width)x$($height)" exit 1 }} # Handle minimized windows with stealth restoration if ($isMinimized) {{ Write-Verbose "Window is minimized, using stealth restoration technique" $wasMinimized = $true # Make window transparent for stealth operation $originalExStyle = [Win32]::GetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE) $newExStyle = $originalExStyle -bor [Win32]::WS_EX_LAYERED [Win32]::SetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE, $newExStyle) | Out-Null [Win32]::SetLayeredWindowAttributes($windowHandle, 0, 1, [Win32]::LWA_ALPHA) | Out-Null # Restore window temporarily [Win32]::ShowWindow($windowHandle, [Win32]::SW_RESTORE) | Out-Null Start-Sleep -Milliseconds 100 # Brief pause for window to render # Update rectangle after restoration $success = [Win32]::GetWindowRect($windowHandle, [ref]$rect) if ($success) {{ $width = $rect.Right - $rect.Left $height = $rect.Bottom - $rect.Top }} }} # Create bitmap for capture $bitmap = New-Object System.Drawing.Bitmap($width, $height) $graphics = [System.Drawing.Graphics]::FromImage($bitmap) # Try PrintWindow first (better for minimized/hidden windows) $hdcBitmap = $graphics.GetHdc() $printSuccess = [Win32]::PrintWindow($windowHandle, $hdcBitmap, [Win32]::PW_RENDERFULLCONTENT) $graphics.ReleaseHdc($hdcBitmap) # Fallback to CopyFromScreen if PrintWindow fails if (-not $printSuccess) {{ Write-Verbose "PrintWindow failed, falling back to CopyFromScreen" if (-not $isMinimized) {{ $graphics.CopyFromScreen($rect.Left, $rect.Top, 0, 0, [System.Drawing.Size]::new($width, $height)) }} else {{ throw "Cannot capture minimized window with CopyFromScreen" }} }} $graphics.Dispose() # Save to temporary file $tempPath = [System.IO.Path]::GetTempFileName() + ".png" $bitmap.Save($tempPath, [System.Drawing.Imaging.ImageFormat]::Png) $bitmap.Dispose() Write-Output $tempPath }} finally {{ # Restore original window state if it was modified if ($wasMinimized) {{ # Restore original transparency [Win32]::SetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE, $originalExStyle) | Out-Null # Re-minimize the window [Win32]::ShowWindow($windowHandle, [Win32]::SW_MINIMIZE) | Out-Null }} }} ''' result = subprocess.run( ['powershell.exe', '-Command', ps_script], capture_output=True, text=True, check=True, timeout=60, # 60 second timeout for screenshot capture encoding='utf-8', errors='ignore' # Ignore encoding errors to handle special characters ) temp_windows_path = result.stdout.strip() if not temp_windows_path: raise Exception("PowerShell did not return temp file path") # Convert Windows path to WSL path wsl_temp_path = self._windows_path_to_wsl(temp_windows_path) if output_path is None: timestamp = int(time.time()) output_path = f"windows_capture_{window_id}_{timestamp}.png" # Copy from Windows temp to desired location subprocess.run(['cp', wsl_temp_path, output_path], check=True) # Clean up Windows temp file subprocess.run( ['powershell.exe', '-Command', f'Remove-Item "{temp_windows_path}" -Force'], check=False # Don't fail if cleanup fails ) logger.info(f"Windows application captured: {output_path}") return output_path
- capture.py:1546-1577 (helper)WindowCapture.capture_window: Linux/X11 implementation using wmctrl for window focus/geometry and pyscreenshot for bbox capture.def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str: """ Capture screenshot of a specific window. """ try: # First focus the window if not self.focus_window(window_id): logger.warning(f"Could not focus window {window_id}, trying capture anyway") # Get window geometry geometry = self.get_window_geometry(window_id) if geometry: x, y, width, height = geometry # Capture the specific region screenshot = ImageGrab.grab(bbox=(x, y, x + width, y + height)) else: logger.warning(f"Could not get geometry for window {window_id}, capturing full screen") screenshot = ImageGrab.grab() if output_path is None: timestamp = int(time.time()) output_path = f"window_{window_id}_{timestamp}.png" screenshot.save(output_path) logger.info(f"Window captured: {output_path}") return output_path except Exception as e: logger.error(f"Failed to capture window {window_id}: {e}") raise