full_document_workflow
Capture document pages, enhance images if needed, and convert to PDF. Automates navigation, processing, and conversion for complete document workflow. Outputs results in JSON format.
Instructions
Complete workflow: capture document pages, optionally process them, and convert to PDF.
Args:
window_id: Window ID containing the document
page_count: Number of pages to capture
output_pdf: Path for the final PDF file
capture_dir: Temporary directory for captures
title: Optional PDF title
navigation_key: Key for page navigation
delay_seconds: Delay between navigation and capture
process_images_flag: Whether to enhance images before PDF conversion
Returns:
JSON string with complete workflow results.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| capture_dir | No | ||
| delay_seconds | No | ||
| navigation_key | No | Page_Down | |
| output_pdf | Yes | ||
| page_count | Yes | ||
| process_images_flag | No | ||
| title | No | ||
| window_id | Yes |
Implementation Reference
- server.py:415-564 (handler)The @mcp.tool() decorated async function that implements the full_document_workflow tool. It orchestrates capturing multiple document pages from a specified window, optionally processing/enhancing the images, converting them to a PDF, and optionally cleaning up temporary files. Returns JSON with workflow results.@mcp.tool() async def full_document_workflow( window_id: str, page_count: int, output_pdf: str, capture_dir: Optional[str] = None, title: Optional[str] = None, navigation_key: str = "Page_Down", delay_seconds: float = 1.0, process_images_flag: bool = True ) -> str: """ Complete workflow: capture document pages, optionally process them, and convert to PDF. Args: window_id: Window ID containing the document page_count: Number of pages to capture output_pdf: Path for the final PDF file capture_dir: Temporary directory for captures title: Optional PDF title navigation_key: Key for page navigation delay_seconds: Delay between navigation and capture process_images_flag: Whether to enhance images before PDF conversion Returns: JSON string with complete workflow results. """ try: # Get configured temp directory (with backward compatibility) config = get_config() if capture_dir is None: if config.should_use_legacy_mode(): capture_dir = "temp_captures" # Legacy default else: actual_capture_dir = get_temp_directory() else: actual_capture_dir = get_temp_directory(capture_dir) # Convert to string for compatibility with existing manager interface capture_dir_str = str(actual_capture_dir) workflow_results = { "status": "success", "steps": [] } # Step 1: Capture pages logger.info("Step 1: Capturing document pages") wm = get_window_manager() if hasattr(wm.manager, 'capture_multiple_pages'): captured_files = wm.manager.capture_multiple_pages( window_id=window_id, page_count=page_count, output_dir=capture_dir_str, navigation_key=navigation_key, delay_seconds=delay_seconds ) else: # Fallback for Windows applications actual_capture_dir.mkdir(parents=True, exist_ok=True) captured_files = [] for page_num in range(1, page_count + 1): filename = generate_page_filename(page_num) output_path = actual_capture_dir / filename captured_path = wm.capture_window(window_id, str(output_path)) captured_files.append(captured_path) if page_num < page_count: time.sleep(delay_seconds) workflow_results["steps"].append({ "step": "capture", "status": "success", "files_captured": len(captured_files), "output_directory": capture_dir_str }) # Step 2: Process images (if requested) processed_files = captured_files if process_images_flag: logger.info("Step 2: Processing captured images") ip = get_image_processor() processing_results = ip.process_batch( capture_dir_str, ["enhance"] ) if processing_results["enhanced_files"]: processed_files = processing_results["enhanced_files"] workflow_results["steps"].append({ "step": "processing", "status": "success", "enhanced_files": len(processing_results["enhanced_files"]) }) # Step 3: Convert to PDF logger.info("Step 3: Converting to PDF") pc = get_pdf_converter() pdf_path = pc.images_to_pdf( processed_files, output_pdf, sort_files=True, title=title or f"Document captured from window {window_id}" ) pdf_info = pc.get_pdf_info(pdf_path) workflow_results["steps"].append({ "step": "pdf_conversion", "status": "success", "output_pdf": pdf_path, "pdf_info": pdf_info }) # Step 4: Cleanup temporary files (optional) import shutil try: config = get_config() if config.get_config_summary().get("auto_cleanup_temp", True): # Only cleanup if it's a temp directory or legacy temp pattern if (capture_dir is None and not config.should_use_legacy_mode()) or \ (capture_dir_str.startswith("temp_") or "temp" in str(actual_capture_dir).lower()): shutil.rmtree(capture_dir_str) workflow_results["steps"].append({ "step": "cleanup", "status": "success", "cleaned_directory": capture_dir_str }) except Exception as cleanup_error: logger.warning(f"Failed to cleanup {capture_dir_str}: {cleanup_error}") workflow_results.update({ "window_id": window_id, "pages_captured": len(captured_files), "final_pdf": pdf_path, "pdf_info": pdf_info }) return json.dumps(workflow_results, indent=2) except Exception as e: logger.error(f"Workflow failed: {e}") return json.dumps({ "status": "error", "error": str(e), "window_id": window_id, "page_count": page_count, "output_pdf": output_pdf })