Skip to main content
Glama

capture_window

Take a screenshot of a specific application window by providing its window ID, with an option to save the image to a specified file path.

Instructions

Capture screenshot of a specific window.

Args:
    window_id: Window ID to capture (from list_windows)
    output_path: Optional path to save the screenshot

Returns:
    JSON string with capture results and file path.

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
window_idYes
output_pathNo

Implementation Reference

  • MCP tool handler and registration for 'capture_window'. Handles input validation via type hints, delegates to CrossPlatformWindowManager, returns JSON results.
    @mcp.tool()
    async def capture_window(window_id: str, output_path: Optional[str] = None) -> str:
        """
        Capture screenshot of a specific window.
        
        Args:
            window_id: Window ID to capture (from list_windows)
            output_path: Optional path to save the screenshot
        
        Returns:
            JSON string with capture results and file path.
        """
        try:
            wm = get_window_manager()
            captured_path = wm.capture_window(window_id, output_path)
            
            result = {
                "status": "success",
                "window_id": window_id,
                "output_path": captured_path,
                "file_exists": Path(captured_path).exists(),
                "file_size_mb": round(Path(captured_path).stat().st_size / (1024 * 1024), 2)
            }
            
            return json.dumps(result, indent=2)
            
        except Exception as e:
            logger.error(f"Failed to capture window {window_id}: {e}")
            return json.dumps({
                "status": "error",
                "error": str(e),
                "window_id": window_id,
                "output_path": output_path
            })
  • WindowsWindowManager.capture_window: Primary implementation for Windows via WSL2/PowerShell. Uses PrintWindow API, handles minimized windows with stealth restoration, full PowerShell automation.
        def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str:
            """
            Capture screenshot of a Windows application window.
            """
            if not self.powershell_available:
                raise Exception("PowerShell not available - cannot capture Windows applications")
            
            try:
                # Enhanced PowerShell script with PrintWindow support for minimized windows
                ps_script = f'''
                Add-Type -AssemblyName System.Windows.Forms
                Add-Type -AssemblyName System.Drawing
                
                $windowHandle = [IntPtr]{window_id}
                if ($windowHandle -eq 0) {{
                    Write-Error "Invalid window handle"
                    exit 1
                }}
                
                # Define comprehensive Windows API functions
                Add-Type @"
                    using System;
                    using System.Runtime.InteropServices;
                    public class Win32 {{
                        [StructLayout(LayoutKind.Sequential)]
                        public struct RECT {{
                            public int Left, Top, Right, Bottom;
                        }}
                        
                        // Window management APIs
                        [DllImport("user32.dll")]
                        public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect);
                        
                        [DllImport("user32.dll")]
                        public static extern bool IsIconic(IntPtr hWnd);
                        
                        [DllImport("user32.dll")]
                        public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
                        
                        [DllImport("user32.dll")]
                        public static extern bool PrintWindow(IntPtr hWnd, IntPtr hdcBlt, uint nFlags);
                        
                        [DllImport("user32.dll")]
                        public static extern bool SetLayeredWindowAttributes(IntPtr hWnd, uint crKey, byte bAlpha, uint dwFlags);
                        
                        [DllImport("user32.dll")]
                        public static extern int GetWindowLong(IntPtr hWnd, int nIndex);
                        
                        [DllImport("user32.dll")]
                        public static extern int SetWindowLong(IntPtr hWnd, int nIndex, int dwNewLong);
                        
                        [DllImport("user32.dll")]
                        public static extern bool SetForegroundWindow(IntPtr hWnd);
                        
                        [DllImport("user32.dll")]
                        public static extern IntPtr GetForegroundWindow();
                        
                        [DllImport("user32.dll")]
                        public static extern bool EnumChildWindows(IntPtr hWndParent, EnumChildProc lpEnumFunc, IntPtr lParam);
                        
                        [DllImport("user32.dll")]
                        public static extern int GetClassName(IntPtr hWnd, System.Text.StringBuilder lpClassName, int nMaxCount);
                        
                        [DllImport("user32.dll")]
                        public static extern IntPtr FindWindowEx(IntPtr hWndParent, IntPtr hWndChildAfter, string lpszClass, string lpszWindow);
                        
                        [DllImport("user32.dll")]
                        public static extern bool PostMessage(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam);
                        
                        [DllImport("user32.dll")]
                        public static extern IntPtr SendMessage(IntPtr hWnd, uint Msg, IntPtr wParam, IntPtr lParam);
                        
                        [DllImport("user32.dll")]
                        public static extern bool BringWindowToTop(IntPtr hWnd);
                        
                        public delegate bool EnumChildProc(IntPtr hWnd, IntPtr lParam);
                        
                        // Constants
                        public const int SW_HIDE = 0;
                        public const int SW_RESTORE = 9;
                        public const int SW_MINIMIZE = 6;
                        public const int SW_SHOW = 5;
                        public const int GWL_EXSTYLE = -20;
                        public const int WS_EX_LAYERED = 0x80000;
                        public const int LWA_ALPHA = 0x2;
                        public const uint PW_CLIENTONLY = 0x1;
                        public const uint PW_RENDERFULLCONTENT = 0x2;
                        
                        // Windows Messages
                        public const uint WM_KEYDOWN = 0x0100;
                        public const uint WM_KEYUP = 0x0101;
                        public const uint WM_CHAR = 0x0102;
                        public const uint WM_COMMAND = 0x0111;
                        public const uint WM_VSCROLL = 0x0115;
                        
                        // Virtual Key Codes
                        public const int VK_SPACE = 0x20;
                        public const int VK_PRIOR = 0x21;  // Page Up
                        public const int VK_NEXT = 0x22;   // Page Down
                        public const int VK_DOWN = 0x28;   // Down Arrow
                        public const int VK_UP = 0x26;     // Up Arrow
                        public const int VK_LEFT = 0x25;   // Left Arrow
                        public const int VK_RIGHT = 0x27;  // Right Arrow
                    }}
    "@
                
                # Check if window is minimized and implement smart capture logic
                $isMinimized = [Win32]::IsIconic($windowHandle)
                $wasMinimized = $false
                $originalExStyle = 0
                
                try {{
                    # Get window rectangle
                    $rect = New-Object Win32+RECT
                    $success = [Win32]::GetWindowRect($windowHandle, [ref]$rect)
                    
                    if (-not $success) {{
                        Write-Error "Could not get window rectangle"
                        exit 1
                    }}
                    
                    $width = $rect.Right - $rect.Left
                    $height = $rect.Bottom - $rect.Top
                    
                    if ($width -le 0 -or $height -le 0) {{
                        Write-Error "Invalid window dimensions: $($width)x$($height)"
                        exit 1
                    }}
                    
                    # Handle minimized windows with stealth restoration
                    if ($isMinimized) {{
                        Write-Verbose "Window is minimized, using stealth restoration technique"
                        $wasMinimized = $true
                        
                        # Make window transparent for stealth operation
                        $originalExStyle = [Win32]::GetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE)
                        $newExStyle = $originalExStyle -bor [Win32]::WS_EX_LAYERED
                        [Win32]::SetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE, $newExStyle) | Out-Null
                        [Win32]::SetLayeredWindowAttributes($windowHandle, 0, 1, [Win32]::LWA_ALPHA) | Out-Null
                        
                        # Restore window temporarily
                        [Win32]::ShowWindow($windowHandle, [Win32]::SW_RESTORE) | Out-Null
                        Start-Sleep -Milliseconds 100  # Brief pause for window to render
                        
                        # Update rectangle after restoration
                        $success = [Win32]::GetWindowRect($windowHandle, [ref]$rect)
                        if ($success) {{
                            $width = $rect.Right - $rect.Left
                            $height = $rect.Bottom - $rect.Top
                        }}
                    }}
                    
                    # Create bitmap for capture
                    $bitmap = New-Object System.Drawing.Bitmap($width, $height)
                    $graphics = [System.Drawing.Graphics]::FromImage($bitmap)
                    
                    # Try PrintWindow first (better for minimized/hidden windows)
                    $hdcBitmap = $graphics.GetHdc()
                    $printSuccess = [Win32]::PrintWindow($windowHandle, $hdcBitmap, [Win32]::PW_RENDERFULLCONTENT)
                    $graphics.ReleaseHdc($hdcBitmap)
                    
                    # Fallback to CopyFromScreen if PrintWindow fails
                    if (-not $printSuccess) {{
                        Write-Verbose "PrintWindow failed, falling back to CopyFromScreen"
                        if (-not $isMinimized) {{
                            $graphics.CopyFromScreen($rect.Left, $rect.Top, 0, 0, [System.Drawing.Size]::new($width, $height))
                        }} else {{
                            throw "Cannot capture minimized window with CopyFromScreen"
                        }}
                    }}
                    
                    $graphics.Dispose()
                    
                    # Save to temporary file
                    $tempPath = [System.IO.Path]::GetTempFileName() + ".png"
                    $bitmap.Save($tempPath, [System.Drawing.Imaging.ImageFormat]::Png)
                    $bitmap.Dispose()
                    
                    Write-Output $tempPath
                    
                }} finally {{
                    # Restore original window state if it was modified
                    if ($wasMinimized) {{
                        # Restore original transparency
                        [Win32]::SetWindowLong($windowHandle, [Win32]::GWL_EXSTYLE, $originalExStyle) | Out-Null
                        
                        # Re-minimize the window
                        [Win32]::ShowWindow($windowHandle, [Win32]::SW_MINIMIZE) | Out-Null
                    }}
                }}
                '''
                
                result = subprocess.run(
                    ['powershell.exe', '-Command', ps_script],
                    capture_output=True,
                    text=True,
                    check=True,
                    timeout=60,  # 60 second timeout for screenshot capture
                    encoding='utf-8',
                    errors='ignore'  # Ignore encoding errors to handle special characters
                )
                
                temp_windows_path = result.stdout.strip()
                if not temp_windows_path:
                    raise Exception("PowerShell did not return temp file path")
                
                # Convert Windows path to WSL path
                wsl_temp_path = self._windows_path_to_wsl(temp_windows_path)
                
                if output_path is None:
                    timestamp = int(time.time())
                    output_path = f"windows_capture_{window_id}_{timestamp}.png"
                
                # Copy from Windows temp to desired location
                subprocess.run(['cp', wsl_temp_path, output_path], check=True)
                
                # Clean up Windows temp file
                subprocess.run(
                    ['powershell.exe', '-Command', f'Remove-Item "{temp_windows_path}" -Force'],
                    check=False  # Don't fail if cleanup fails
                )
                
                logger.info(f"Windows application captured: {output_path}")
                return output_path
  • WindowCapture.capture_window: Linux/X11 implementation using wmctrl for window focus/geometry and pyscreenshot (PIL) for region capture.
    def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str:
        """
        Capture screenshot of a specific window.
        """
        try:
            # First focus the window
            if not self.focus_window(window_id):
                logger.warning(f"Could not focus window {window_id}, trying capture anyway")
            
            # Get window geometry
            geometry = self.get_window_geometry(window_id)
            
            if geometry:
                x, y, width, height = geometry
                # Capture the specific region
                screenshot = ImageGrab.grab(bbox=(x, y, x + width, y + height))
            else:
                logger.warning(f"Could not get geometry for window {window_id}, capturing full screen")
                screenshot = ImageGrab.grab()
            
            if output_path is None:
                timestamp = int(time.time())
                output_path = f"window_{window_id}_{timestamp}.png"
            
            screenshot.save(output_path)
            logger.info(f"Window captured: {output_path}")
            return output_path
            
        except Exception as e:
            logger.error(f"Failed to capture window {window_id}: {e}")
            raise
  • CrossPlatformWindowManager: Dispatches capture_window calls to platform-specific implementations (WindowsWindowManager for WSL/Windows, WindowCapture for Linux). Called by MCP handler.
    class CrossPlatformWindowManager:
        """
        Unified interface that automatically selects the appropriate window manager
        based on the runtime environment.
        """
        
        def __init__(self):
            self.environment = detect_environment()
            self.manager = self._create_manager()
            
        def _create_manager(self):
            """Create appropriate window manager based on environment."""
            if self.environment == 'wsl':
                # In WSL, try Windows manager first, fallback to Linux
                windows_manager = WindowsWindowManager()
                if windows_manager.powershell_available:
                    logger.info("Using Windows application manager (PowerShell from WSL2)")
                    return windows_manager
                else:
                    logger.info("PowerShell unavailable, falling back to Linux X11 manager")
                    return WindowCapture()
            elif self.environment == 'windows':
                logger.info("Using Windows application manager")
                return WindowsWindowManager()
            else:  # linux
                logger.info("Using Linux X11 window manager")
                return WindowCapture()
        
        def list_windows(self) -> List[Dict[str, str]]:
            """List all available windows using the appropriate manager."""
            try:
                windows = self.manager.list_windows()
                
                # Add environment info to each window
                for window in windows:
                    window['environment'] = self.environment
                    if 'type' not in window:
                        window['type'] = 'x11' if self.environment == 'linux' else self.environment
                        
                return windows
            except Exception as e:
                logger.error(f"Failed to list windows: {e}")
                return []
        
        def capture_window(self, window_id: str, output_path: Optional[str] = None) -> str:
            """Capture screenshot of a specific window."""
            return self.manager.capture_window(window_id, output_path)
        
        def capture_full_screen(self, output_path: Optional[str] = None) -> str:
            """Capture full screen screenshot."""
            if hasattr(self.manager, 'capture_full_screen'):
                return self.manager.capture_full_screen(output_path)
            else:
                # Fallback using pyscreenshot
                try:
                    screenshot = ImageGrab.grab()
                    
                    if output_path is None:
                        timestamp = int(time.time())
                        output_path = f"screenshot_{timestamp}.png"
                    
                    screenshot.save(output_path)
                    logger.info(f"Full screen captured: {output_path}")
                    return output_path
                    
                except Exception as e:
                    logger.error(f"Failed to capture full screen: {e}")
                    raise
        
        def get_environment_info(self) -> Dict[str, str]:
            """Get information about the current environment and capabilities."""
            info = {
                'environment': self.environment,
                'manager_type': type(self.manager).__name__
            }
            
            if isinstance(self.manager, WindowsWindowManager):
                info['powershell_available'] = self.manager.powershell_available
            elif isinstance(self.manager, WindowCapture):
                missing_deps = check_dependencies()
                info['missing_dependencies'] = missing_deps
                info['x11_available'] = len(missing_deps) == 0
                
            return info

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/PovedaAqui/auto-snap-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server