capture_document_pages
Capture multiple document pages automatically by navigating through them with configurable keys and saving screenshots to a specified directory.
Instructions
Capture multiple pages from a document window with automatic navigation.
Args:
window_id: Window ID containing the document
page_count: Number of pages to capture
output_dir: Directory to save captured pages
navigation_key: Key to press for navigation (Page_Down, Right, space)
delay_seconds: Delay between navigation and capture
Returns:
JSON string with capture results and list of captured files.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| window_id | Yes | ||
| page_count | Yes | ||
| output_dir | No | ||
| navigation_key | No | Page_Down | |
| delay_seconds | No |
Implementation Reference
- server.py:173-256 (handler)The handler function for the 'capture_document_pages' tool, registered via @mcp.tool(). It captures multiple pages from a document window, using the window manager's multi-page capture if available, or a fallback loop with individual captures and delays. Returns JSON results with captured file paths and metadata.@mcp.tool() async def capture_document_pages( window_id: str, page_count: int, output_dir: Optional[str] = None, navigation_key: str = "Page_Down", delay_seconds: float = 1.0 ) -> str: """ Capture multiple pages from a document window with automatic navigation. Args: window_id: Window ID containing the document page_count: Number of pages to capture output_dir: Directory to save captured pages navigation_key: Key to press for navigation (Page_Down, Right, space) delay_seconds: Delay between navigation and capture Returns: JSON string with capture results and list of captured files. """ try: # Get configured output directory (with backward compatibility) config = get_config() if output_dir is None: if config.should_use_legacy_mode(): output_dir = "captures" # Legacy default else: actual_output_dir = get_output_directory() else: actual_output_dir = get_output_directory(output_dir) # Convert to string for compatibility with existing manager interface output_dir_str = str(actual_output_dir) # For multi-page capture, use the underlying manager if it supports it wm = get_window_manager() if hasattr(wm.manager, 'capture_multiple_pages'): captured_files = wm.manager.capture_multiple_pages( window_id=window_id, page_count=page_count, output_dir=output_dir_str, navigation_key=navigation_key, delay_seconds=delay_seconds ) else: # Fallback: capture individual pages manually actual_output_dir.mkdir(parents=True, exist_ok=True) captured_files = [] for page_num in range(1, page_count + 1): filename = generate_page_filename(page_num) output_path = actual_output_dir / filename captured_path = wm.capture_window(window_id, str(output_path)) captured_files.append(captured_path) # Simple delay between captures (no navigation for Windows apps yet) if page_num < page_count: time.sleep(delay_seconds) import os total_size = sum(os.path.getsize(f) for f in captured_files if os.path.exists(f)) result = { "status": "success", "window_id": window_id, "pages_captured": len(captured_files), "output_directory": output_dir_str, "captured_files": captured_files, "total_size_mb": round(total_size / (1024 * 1024), 2) } return json.dumps(result, indent=2) except Exception as e: logger.error(f"Failed to capture document pages: {e}") return json.dumps({ "status": "error", "error": str(e), "window_id": window_id, "page_count": page_count, "captured_files": [] })