nutjs-windows-control

by Cheffromspace
Verified
import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { ListToolsRequestSchema, CallToolRequestSchema, TextContent } from "@modelcontextprotocol/sdk/types.js"; import { MousePosition, KeyboardInput, KeyCombination, ClipboardInput, KeyHoldOperation, ScreenshotOptions } from "../types/common.js"; import { WindowsControlResponse } from "../types/responses.js"; // All tool functions now come from the provider // Provider is now passed from the main server instance import { AutomationProvider } from "../interfaces/provider.js"; /** * Validates the mouse button parameter and returns a valid button value * @param button The button parameter to validate * @returns A validated mouse button value: 'left', 'right', or 'middle' */ function validateButton(button?: unknown): 'left' | 'right' | 'middle' { return (typeof button === 'string' && ['left', 'right', 'middle'].includes(button)) ? button as 'left' | 'right' | 'middle' : 'left'; } /** * Set up automation tools on the MCP server using the provided automation provider. * This function implements the provider pattern for all tool handlers, allowing * for dependency injection of automation implementations. * * The provider pattern offers several benefits: * - Testability: Makes unit testing easier by allowing mock providers * - Flexibility: Allows changing provider implementations without changing tool handlers * - Consistency: Ensures all automation is handled through a single provider interface * - Maintainability: Reduces direct dependencies on specific implementation details * * @param server The Model Context Protocol server instance * @param provider The automation provider implementation that will handle system interactions */ export function setupTools(server: Server, provider: AutomationProvider): void { // List available tools server.setRequestHandler(ListToolsRequestSchema, () => ({ tools: [ { name: "get_screenshot", description: "Take a screenshot optimized for AI readability, especially for text-heavy content. Uses default settings: JPEG format, 85% quality, grayscale enabled, and 1280px width (preserving aspect ratio). Supports region capture, format options, quality adjustment, and custom resize settings.", inputSchema: { type: "object", properties: { region: { type: "object", properties: { x: { type: "number", description: "X coordinate of the region" }, y: { type: "number", description: "Y coordinate of the region" }, width: { type: "number", description: "Width of the region" }, height: { type: "number", description: "Height of the region" } }, required: ["x", "y", "width", "height"], description: "Specific region to capture (optional)" }, format: { type: "string", enum: ["png", "jpeg"], default: "jpeg", description: "Output format of the screenshot" }, quality: { type: "number", minimum: 1, maximum: 100, default: 85, description: "JPEG quality (1-100, higher = better quality), only used for JPEG format" }, grayscale: { type: "boolean", default: true, description: "Convert to grayscale" }, compressionLevel: { type: "number", minimum: 0, maximum: 9, default: 6, description: "PNG compression level (0-9, higher = better compression), only used for PNG format" }, resize: { type: "object", properties: { width: { type: "number", default: 1280, description: "Target width" }, height: { type: "number", description: "Target height" }, fit: { type: "string", enum: ["contain", "cover", "fill", "inside", "outside"], default: "contain", description: "Resize fit option" } }, default: { width: 1280, fit: "contain" }, description: "Resize options for the screenshot" } } } }, { name: "click_at", description: "Move mouse to coordinates, click, then return to original position", inputSchema: { type: "object", properties: { x: { type: "number", description: "X coordinate" }, y: { type: "number", description: "Y coordinate" }, button: { type: "string", enum: ["left", "right", "middle"], default: "left", description: "Mouse button to click" } }, required: ["x", "y"] } }, { name: "move_mouse", description: "Move the mouse cursor to specific coordinates", inputSchema: { type: "object", properties: { x: { type: "number", description: "X coordinate" }, y: { type: "number", description: "Y coordinate" } }, required: ["x", "y"] } }, { name: "click_mouse", description: "Click the mouse at the current position", inputSchema: { type: "object", properties: { button: { type: "string", enum: ["left", "right", "middle"], default: "left", description: "Mouse button to click" } } } }, { name: "drag_mouse", description: "Drag the mouse from one position to another", inputSchema: { type: "object", properties: { fromX: { type: "number", description: "Starting X coordinate" }, fromY: { type: "number", description: "Starting Y coordinate" }, toX: { type: "number", description: "Ending X coordinate" }, toY: { type: "number", description: "Ending Y coordinate" }, button: { type: "string", enum: ["left", "right", "middle"], default: "left", description: "Mouse button to use for dragging" } }, required: ["fromX", "fromY", "toX", "toY"] } }, { name: "scroll_mouse", description: "Scroll the mouse wheel up or down", inputSchema: { type: "object", properties: { amount: { type: "number", description: "Amount to scroll (positive for down, negative for up)" } }, required: ["amount"] } }, { name: "type_text", description: "Type text using the keyboard", inputSchema: { type: "object", properties: { text: { type: "string", description: "Text to type" } }, required: ["text"] } }, { name: "press_key", description: "Press a specific keyboard key", inputSchema: { type: "object", properties: { key: { type: "string", description: "Key to press (e.g., 'enter', 'tab', 'escape')" } }, required: ["key"] } }, { name: "hold_key", description: "Hold or release a keyboard key with optional duration", inputSchema: { type: "object", properties: { key: { type: "string", description: "Key to hold/release (e.g., 'shift', 'control')" }, duration: { type: "number", description: "Duration to hold the key in milliseconds (only for 'down' state)" }, state: { type: "string", enum: ["down", "up"], description: "Whether to press down or release the key" } }, required: ["key", "state"] } }, { name: "press_key_combination", description: "Press multiple keys simultaneously (e.g., keyboard shortcuts)", inputSchema: { type: "object", properties: { keys: { type: "array", items: { type: "string" }, description: "Array of keys to press simultaneously (e.g., ['control', 'c'])" } }, required: ["keys"] } }, { name: "get_screen_size", description: "Get the screen dimensions", inputSchema: { type: "object", properties: {} } }, { name: "get_cursor_position", description: "Get the current cursor position", inputSchema: { type: "object", properties: {} } }, { name: "double_click", description: "Double click at current or specified position", inputSchema: { type: "object", properties: { x: { type: "number", description: "X coordinate (optional)" }, y: { type: "number", description: "Y coordinate (optional)" } } } }, { name: "get_active_window", description: "Get information about the currently active window", inputSchema: { type: "object", properties: {} } }, { name: "focus_window", description: "Focus a specific window by its title", inputSchema: { type: "object", properties: { title: { type: "string", description: "Title of the window to focus" } }, required: ["title"] } }, { name: "resize_window", description: "Resize a specific window by its title", inputSchema: { type: "object", properties: { title: { type: "string", description: "Title of the window to resize" }, width: { type: "number", description: "New width of the window" }, height: { type: "number", description: "New height of the window" } }, required: ["title", "width", "height"] } }, { name: "reposition_window", description: "Move a specific window to new coordinates", inputSchema: { type: "object", properties: { title: { type: "string", description: "Title of the window to move" }, x: { type: "number", description: "New X coordinate" }, y: { type: "number", description: "New Y coordinate" } }, required: ["title", "x", "y"] } }, { name: "minimize_window", description: "Minimize a specific window by its title (currently unsupported)", inputSchema: { type: "object", properties: { title: { type: "string", description: "Title of the window to minimize" } }, required: ["title"] } }, { name: "restore_window", description: "Restore a minimized window by its title (currently unsupported)", inputSchema: { type: "object", properties: { title: { type: "string", description: "Title of the window to restore" } }, required: ["title"] } }, { name: "get_clipboard_content", description: "Get the current text content from the clipboard", inputSchema: { type: "object", properties: {} } }, { name: "set_clipboard_content", description: "Set text content to the clipboard", inputSchema: { type: "object", properties: { text: { type: "string", description: "Text to copy to clipboard" } }, required: ["text"] } }, { name: "has_clipboard_text", description: "Check if the clipboard contains text", inputSchema: { type: "object", properties: {} } }, { name: "clear_clipboard", description: "Clear the clipboard content", inputSchema: { type: "object", properties: {} } } ] })); // Handle tool calls server.setRequestHandler(CallToolRequestSchema, async (request) => { try { const { name, arguments: args } = request.params; let response; // Use the provider passed from the server instance switch (name) { case "get_screenshot": { // Validate and convert screenshot options with AI-optimized defaults const screenshotOptions: ScreenshotOptions = { // Default values for text-heavy content readability format: 'jpeg', quality: 85, grayscale: true, resize: { width: 1280, fit: 'contain' } }; if (args?.region && typeof args.region === 'object' && 'x' in args.region && typeof args.region.x === 'number' && 'y' in args.region && typeof args.region.y === 'number' && 'width' in args.region && typeof args.region.width === 'number' && 'height' in args.region && typeof args.region.height === 'number') { screenshotOptions.region = { x: args.region.x, y: args.region.y, width: args.region.width, height: args.region.height }; } if (args?.format === 'jpeg' || args?.format === 'png') { screenshotOptions.format = args.format; } if (typeof args?.quality === 'number') { screenshotOptions.quality = args.quality; } if (typeof args?.grayscale === 'boolean') { screenshotOptions.grayscale = args.grayscale; } if (typeof args?.compressionLevel === 'number') { screenshotOptions.compressionLevel = args.compressionLevel; } if (args?.resize && typeof args.resize === 'object') { // Preserve the default resize settings that weren't explicitly overridden if (!screenshotOptions.resize) { screenshotOptions.resize = { width: 1280, fit: 'contain' }; } if ('width' in args.resize && typeof args.resize.width === 'number') { screenshotOptions.resize.width = args.resize.width; } if ('height' in args.resize && typeof args.resize.height === 'number') { screenshotOptions.resize.height = args.resize.height; } if ('fit' in args.resize && typeof args.resize.fit === 'string') { // Type-safe check for valid fit values const fitValue = args.resize.fit; if (fitValue === 'contain' || fitValue === 'cover' || fitValue === 'fill' || fitValue === 'inside' || fitValue === 'outside') { screenshotOptions.resize.fit = fitValue; } } } response = await provider.screen.getScreenshot(screenshotOptions); break; } case "click_at": if (typeof args?.x !== 'number' || typeof args?.y !== 'number') { throw new Error("Invalid click_at arguments"); } response = provider.mouse.clickAt( args.x, args.y, validateButton(args?.button) ); break; case "move_mouse": if (!isMousePosition(args)) { throw new Error("Invalid mouse position arguments"); } response = provider.mouse.moveMouse(args); break; case "click_mouse": response = provider.mouse.clickMouse( validateButton(args?.button) ); break; case "drag_mouse": if (typeof args?.fromX !== 'number' || typeof args?.fromY !== 'number' || typeof args?.toX !== 'number' || typeof args?.toY !== 'number') { throw new Error("Invalid drag mouse arguments"); } response = provider.mouse.dragMouse( { x: args.fromX, y: args.fromY }, { x: args.toX, y: args.toY }, validateButton(args?.button) ); break; case "scroll_mouse": if (typeof args?.amount !== 'number') { throw new Error("Invalid scroll amount argument"); } response = provider.mouse.scrollMouse(args.amount); break; case "type_text": if (!isKeyboardInput(args)) { throw new Error("Invalid keyboard input arguments"); } response = provider.keyboard.typeText(args); break; case "press_key": if (typeof args?.key !== 'string') { throw new Error("Invalid key press arguments"); } response = provider.keyboard.pressKey(args.key); break; case "hold_key": if (!isKeyHoldOperation(args)) { throw new Error("Invalid key hold arguments"); } response = await provider.keyboard.holdKey(args); break; case "press_key_combination": if (!isKeyCombination(args)) { throw new Error("Invalid key combination arguments"); } response = await provider.keyboard.pressKeyCombination(args); break; case "get_screen_size": response = provider.screen.getScreenSize(); break; case "get_cursor_position": response = provider.mouse.getCursorPosition(); break; case "double_click": if (args && typeof args.x === 'number' && typeof args.y === 'number') { response = provider.mouse.doubleClick({ x: args.x, y: args.y }); } else { response = provider.mouse.doubleClick(); } break; case "get_active_window": response = provider.screen.getActiveWindow(); break; case "focus_window": if (typeof args?.title !== 'string') { throw new Error("Invalid window title argument"); } response = provider.screen.focusWindow(args.title); break; case "resize_window": if (typeof args?.title !== 'string' || typeof args?.width !== 'number' || typeof args?.height !== 'number') { throw new Error("Invalid window resize arguments"); } response = provider.screen.resizeWindow(args.title, args.width, args.height); break; case "reposition_window": if (typeof args?.title !== 'string' || typeof args?.x !== 'number' || typeof args?.y !== 'number') { throw new Error("Invalid window reposition arguments"); } response = provider.screen.repositionWindow(args.title, args.x, args.y); break; case "minimize_window": if (typeof args?.title !== 'string') { throw new Error("Invalid window title argument"); } response = { success: false, message: "Minimize window operation is not supported" }; break; case "restore_window": if (typeof args?.title !== 'string') { throw new Error("Invalid window title argument"); } response = { success: false, message: "Restore window operation is not supported" }; break; case "get_clipboard_content": response = await provider.clipboard.getClipboardContent(); break; case "set_clipboard_content": if (!isClipboardInput(args)) { throw new Error("Invalid clipboard input arguments"); } response = await provider.clipboard.setClipboardContent(args); break; case "has_clipboard_text": response = await provider.clipboard.hasClipboardText(); break; case "clear_clipboard": response = await provider.clipboard.clearClipboard(); break; default: throw new Error(`Unknown tool: ${name}`); } // Handle special case for screenshot which returns content with image data const typedResponse = response as WindowsControlResponse; if ('content' in typedResponse && typedResponse.content && Array.isArray(typedResponse.content) && typedResponse.content.length > 0 && typedResponse.content[0] && typeof typedResponse.content[0] === 'object' && 'type' in typedResponse.content[0] && typedResponse.content[0].type === "image") { return { content: typedResponse.content }; } // For all other responses, return as text return { content: [{ type: "text", text: JSON.stringify(response, null, 2) }] }; } catch (error) { const errorContent: TextContent = { type: "text", text: `Error: ${error instanceof Error ? error.message : String(error)}` }; return { content: [errorContent], isError: true }; } }); } /** * Type guard to validate if an object matches the MousePosition interface * @param args The object to validate * @returns True if the object is a valid MousePosition */ function isMousePosition(args: unknown): args is MousePosition { if (typeof args !== 'object' || args === null) return false; const pos = args as Record<string, unknown>; return typeof pos.x === 'number' && typeof pos.y === 'number'; } /** * Type guard to validate if an object matches the KeyboardInput interface * @param args The object to validate * @returns True if the object is a valid KeyboardInput */ function isKeyboardInput(args: unknown): args is KeyboardInput { if (typeof args !== 'object' || args === null) return false; const input = args as Record<string, unknown>; return typeof input.text === 'string'; } /** * Type guard to validate if an object matches the KeyCombination interface * @param args The object to validate * @returns True if the object is a valid KeyCombination */ function isKeyCombination(args: unknown): args is KeyCombination { if (typeof args !== 'object' || args === null) return false; const combo = args as Record<string, unknown>; if (!Array.isArray(combo.keys)) return false; return combo.keys.every(key => typeof key === 'string'); } /** * Type guard to validate if an object matches the KeyHoldOperation interface * @param args The object to validate * @returns True if the object is a valid KeyHoldOperation */ function isKeyHoldOperation(args: unknown): args is KeyHoldOperation { if (typeof args !== 'object' || args === null) return false; const op = args as Record<string, unknown>; return ( typeof op.key === 'string' && (op.state === 'down' || op.state === 'up') && (op.duration === undefined || typeof op.duration === 'number') ); } /** * Type guard to validate if an object matches the ClipboardInput interface * @param args The object to validate * @returns True if the object is a valid ClipboardInput */ function isClipboardInput(args: unknown): args is ClipboardInput { if (typeof args !== 'object' || args === null) return false; const input = args as Record<string, unknown>; return typeof input.text === 'string'; }