nutjs-windows-control

by Cheffromspace
Verified
OS Automation
TypeScript
MIT License
482
Reddit Discord
Overview InspectNew Schema Related Servers Reviews Score
Need Help?View Source Code Report Issue
MCPControl
src
handlers
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { 
  ListToolsRequestSchema,
  CallToolRequestSchema,
  TextContent
} from "@modelcontextprotocol/sdk/types.js";
import { MousePosition, KeyboardInput, KeyCombination, ClipboardInput, KeyHoldOperation, ScreenshotOptions } from "../types/common.js";
import { WindowsControlResponse } from "../types/responses.js";
// All tool functions now come from the provider
// Provider is now passed from the main server instance
import { AutomationProvider } from "../interfaces/provider.js";

/**
 * Validates the mouse button parameter and returns a valid button value
 * @param button The button parameter to validate
 * @returns A validated mouse button value: 'left', 'right', or 'middle'
 */
function validateButton(button?: unknown): 'left' | 'right' | 'middle' {
  return (typeof button === 'string' && 
    ['left', 'right', 'middle'].includes(button)) ? 
    button as 'left' | 'right' | 'middle' : 'left';
}

/**
 * Set up automation tools on the MCP server using the provided automation provider.
 * This function implements the provider pattern for all tool handlers, allowing
 * for dependency injection of automation implementations.
 * 
 * The provider pattern offers several benefits:
 * - Testability: Makes unit testing easier by allowing mock providers
 * - Flexibility: Allows changing provider implementations without changing tool handlers
 * - Consistency: Ensures all automation is handled through a single provider interface
 * - Maintainability: Reduces direct dependencies on specific implementation details
 * 
 * @param server The Model Context Protocol server instance
 * @param provider The automation provider implementation that will handle system interactions
 */
export function setupTools(server: Server, provider: AutomationProvider): void {
  // List available tools
  server.setRequestHandler(ListToolsRequestSchema, () => ({
    tools: [
      {
        name: "get_screenshot",
        description: "Take a screenshot optimized for AI readability, especially for text-heavy content. Uses default settings: JPEG format, 85% quality, grayscale enabled, and 1280px width (preserving aspect ratio). Supports region capture, format options, quality adjustment, and custom resize settings.",
        inputSchema: {
          type: "object",
          properties: {
            region: {
              type: "object",
              properties: {
                x: { type: "number", description: "X coordinate of the region" },
                y: { type: "number", description: "Y coordinate of the region" },
                width: { type: "number", description: "Width of the region" },
                height: { type: "number", description: "Height of the region" }
              },
              required: ["x", "y", "width", "height"],
              description: "Specific region to capture (optional)"
            },
            format: {
              type: "string",
              enum: ["png", "jpeg"],
              default: "jpeg",
              description: "Output format of the screenshot"
            },
            quality: {
              type: "number",
              minimum: 1,
              maximum: 100,
              default: 85,
              description: "JPEG quality (1-100, higher = better quality), only used for JPEG format"
            },
            grayscale: {
              type: "boolean",
              default: true,
              description: "Convert to grayscale"
            },
            compressionLevel: {
              type: "number",
              minimum: 0,
              maximum: 9,
              default: 6,
              description: "PNG compression level (0-9, higher = better compression), only used for PNG format"
            },
            resize: {
              type: "object",
              properties: {
                width: { 
                  type: "number", 
                  default: 1280,
                  description: "Target width" 
                },
                height: { type: "number", description: "Target height" },
                fit: { 
                  type: "string", 
                  enum: ["contain", "cover", "fill", "inside", "outside"],
                  default: "contain",
                  description: "Resize fit option"
                }
              },
              default: { width: 1280, fit: "contain" },
              description: "Resize options for the screenshot"
            }
          }
        }
      },
      {
        name: "click_at",
        description: "Move mouse to coordinates, click, then return to original position",
        inputSchema: {
          type: "object",
          properties: {
            x: { type: "number", description: "X coordinate" },
            y: { type: "number", description: "Y coordinate" },
            button: { 
              type: "string", 
              enum: ["left", "right", "middle"],
              default: "left",
              description: "Mouse button to click" 
            }
          },
          required: ["x", "y"]
        }
      },
      {
        name: "move_mouse",
        description: "Move the mouse cursor to specific coordinates",
        inputSchema: {
          type: "object",
          properties: {
            x: { type: "number", description: "X coordinate" },
            y: { type: "number", description: "Y coordinate" }
          },
          required: ["x", "y"]
        }
      },
      {
        name: "click_mouse",
        description: "Click the mouse at the current position",
        inputSchema: {
          type: "object",
          properties: {
            button: { 
              type: "string", 
              enum: ["left", "right", "middle"],
              default: "left",
              description: "Mouse button to click" 
            }
          }
        }
      },
      {
        name: "drag_mouse",
        description: "Drag the mouse from one position to another",
        inputSchema: {
          type: "object",
          properties: {
            fromX: { type: "number", description: "Starting X coordinate" },
            fromY: { type: "number", description: "Starting Y coordinate" },
            toX: { type: "number", description: "Ending X coordinate" },
            toY: { type: "number", description: "Ending Y coordinate" },
            button: { 
              type: "string", 
              enum: ["left", "right", "middle"],
              default: "left",
              description: "Mouse button to use for dragging" 
            }
          },
          required: ["fromX", "fromY", "toX", "toY"]
        }
      },
      {
        name: "scroll_mouse",
        description: "Scroll the mouse wheel up or down",
        inputSchema: {
          type: "object",
          properties: {
            amount: { 
              type: "number", 
              description: "Amount to scroll (positive for down, negative for up)" 
            }
          },
          required: ["amount"]
        }
      },
      {
        name: "type_text",
        description: "Type text using the keyboard",
        inputSchema: {
          type: "object",
          properties: {
            text: { type: "string", description: "Text to type" }
          },
          required: ["text"]
        }
      },
      {
        name: "press_key",
        description: "Press a specific keyboard key",
        inputSchema: {
          type: "object",
          properties: {
            key: { 
              type: "string",
              description: "Key to press (e.g., 'enter', 'tab', 'escape')" 
            }
          },
          required: ["key"]
        }
      },
      {
        name: "hold_key",
        description: "Hold or release a keyboard key with optional duration",
        inputSchema: {
          type: "object",
          properties: {
            key: { 
              type: "string",
              description: "Key to hold/release (e.g., 'shift', 'control')" 
            },
            duration: { 
              type: "number",
              description: "Duration to hold the key in milliseconds (only for 'down' state)"
            },
            state: {
              type: "string",
              enum: ["down", "up"],
              description: "Whether to press down or release the key"
            }
          },
          required: ["key", "state"]
        }
      },
      {
        name: "press_key_combination",
        description: "Press multiple keys simultaneously (e.g., keyboard shortcuts)",
        inputSchema: {
          type: "object",
          properties: {
            keys: {
              type: "array",
              items: { type: "string" },
              description: "Array of keys to press simultaneously (e.g., ['control', 'c'])"
            }
          },
          required: ["keys"]
        }
      },
      {
        name: "get_screen_size",
        description: "Get the screen dimensions",
        inputSchema: {
          type: "object",
          properties: {}
        }
      },
      {
        name: "get_cursor_position",
        description: "Get the current cursor position",
        inputSchema: {
          type: "object",
          properties: {}
        }
      },
      {
        name: "double_click",
        description: "Double click at current or specified position",
        inputSchema: {
          type: "object",
          properties: {
            x: { type: "number", description: "X coordinate (optional)" },
            y: { type: "number", description: "Y coordinate (optional)" }
          }
        }
      },
      {
        name: "get_active_window",
        description: "Get information about the currently active window",
        inputSchema: {
          type: "object",
          properties: {}
        }
      },
      {
        name: "focus_window",
        description: "Focus a specific window by its title",
        inputSchema: {
          type: "object",
          properties: {
            title: { type: "string", description: "Title of the window to focus" }
          },
          required: ["title"]
        }
      },
      {
        name: "resize_window",
        description: "Resize a specific window by its title",
        inputSchema: {
          type: "object",
          properties: {
            title: { type: "string", description: "Title of the window to resize" },
            width: { type: "number", description: "New width of the window" },
            height: { type: "number", description: "New height of the window" }
          },
          required: ["title", "width", "height"]
        }
      },
      {
        name: "reposition_window",
        description: "Move a specific window to new coordinates",
        inputSchema: {
          type: "object",
          properties: {
            title: { type: "string", description: "Title of the window to move" },
            x: { type: "number", description: "New X coordinate" },
            y: { type: "number", description: "New Y coordinate" }
          },
          required: ["title", "x", "y"]
        }
      },
      {
        name: "minimize_window",
        description: "Minimize a specific window by its title (currently unsupported)",
        inputSchema: {
          type: "object",
          properties: {
            title: { type: "string", description: "Title of the window to minimize" }
          },
          required: ["title"]
        }
      },
      {
        name: "restore_window",
        description: "Restore a minimized window by its title (currently unsupported)",
        inputSchema: {
          type: "object",
          properties: {
            title: { type: "string", description: "Title of the window to restore" }
          },
          required: ["title"]
        }
      },
      {
        name: "get_clipboard_content",
        description: "Get the current text content from the clipboard",
        inputSchema: {
          type: "object",
          properties: {}
        }
      },
      {
        name: "set_clipboard_content",
        description: "Set text content to the clipboard",
        inputSchema: {
          type: "object",
          properties: {
            text: { type: "string", description: "Text to copy to clipboard" }
          },
          required: ["text"]
        }
      },
      {
        name: "has_clipboard_text",
        description: "Check if the clipboard contains text",
        inputSchema: {
          type: "object",
          properties: {}
        }
      },
      {
        name: "clear_clipboard",
        description: "Clear the clipboard content",
        inputSchema: {
          type: "object",
          properties: {}
        }
      }
    ]
  }));

  // Handle tool calls
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
    try {
      const { name, arguments: args } = request.params;
      let response;
      
      // Use the provider passed from the server instance

      switch (name) {
        case "get_screenshot": {
          // Validate and convert screenshot options with AI-optimized defaults
          const screenshotOptions: ScreenshotOptions = {
            // Default values for text-heavy content readability
            format: 'jpeg',
            quality: 85,
            grayscale: true,
            resize: {
              width: 1280,
              fit: 'contain'
            }
          };
          
          if (args?.region && 
              typeof args.region === 'object' && 
              'x' in args.region && typeof args.region.x === 'number' && 
              'y' in args.region && typeof args.region.y === 'number' &&
              'width' in args.region && typeof args.region.width === 'number' &&
              'height' in args.region && typeof args.region.height === 'number') {
            screenshotOptions.region = {
              x: args.region.x,
              y: args.region.y,
              width: args.region.width,
              height: args.region.height
            };
          }
          
          if (args?.format === 'jpeg' || args?.format === 'png') {
            screenshotOptions.format = args.format;
          }
          
          if (typeof args?.quality === 'number') {
            screenshotOptions.quality = args.quality;
          }
          
          if (typeof args?.grayscale === 'boolean') {
            screenshotOptions.grayscale = args.grayscale;
          }
          
          if (typeof args?.compressionLevel === 'number') {
            screenshotOptions.compressionLevel = args.compressionLevel;
          }
          
          if (args?.resize && typeof args.resize === 'object') {
            // Preserve the default resize settings that weren't explicitly overridden
            if (!screenshotOptions.resize) {
              screenshotOptions.resize = { width: 1280, fit: 'contain' };
            }
            
            if ('width' in args.resize && typeof args.resize.width === 'number') {
              screenshotOptions.resize.width = args.resize.width;
            }
            
            if ('height' in args.resize && typeof args.resize.height === 'number') {
              screenshotOptions.resize.height = args.resize.height;
            }
            
            if ('fit' in args.resize && typeof args.resize.fit === 'string') {
              // Type-safe check for valid fit values
              const fitValue = args.resize.fit;
              if (fitValue === 'contain' || fitValue === 'cover' || 
                  fitValue === 'fill' || fitValue === 'inside' || fitValue === 'outside') {
                screenshotOptions.resize.fit = fitValue;
              }
            }
          }
          
          response = await provider.screen.getScreenshot(screenshotOptions);
          break;
        }
          
        case "click_at":
          if (typeof args?.x !== 'number' || typeof args?.y !== 'number') {
            throw new Error("Invalid click_at arguments");
          }
          response = provider.mouse.clickAt(
            args.x,
            args.y,
            validateButton(args?.button)
          );
          break;

        case "move_mouse":
          if (!isMousePosition(args)) {
            throw new Error("Invalid mouse position arguments");
          }
          response = provider.mouse.moveMouse(args);
          break;

        case "click_mouse":
          response = provider.mouse.clickMouse(
            validateButton(args?.button)
          );
          break;

        case "drag_mouse":
          if (typeof args?.fromX !== 'number' || 
              typeof args?.fromY !== 'number' ||
              typeof args?.toX !== 'number' ||
              typeof args?.toY !== 'number') {
            throw new Error("Invalid drag mouse arguments");
          }
          response = provider.mouse.dragMouse(
            { x: args.fromX, y: args.fromY },
            { x: args.toX, y: args.toY },
            validateButton(args?.button)
          );
          break;


        case "scroll_mouse":
          if (typeof args?.amount !== 'number') {
            throw new Error("Invalid scroll amount argument");
          }
          response = provider.mouse.scrollMouse(args.amount);
          break;

        case "type_text":
          if (!isKeyboardInput(args)) {
            throw new Error("Invalid keyboard input arguments");
          }
          response = provider.keyboard.typeText(args);
          break;

        case "press_key":
          if (typeof args?.key !== 'string') {
            throw new Error("Invalid key press arguments");
          }
          response = provider.keyboard.pressKey(args.key);
          break;

        case "hold_key":
          if (!isKeyHoldOperation(args)) {
            throw new Error("Invalid key hold arguments");
          }
          response = await provider.keyboard.holdKey(args);
          break;

        case "press_key_combination":
          if (!isKeyCombination(args)) {
            throw new Error("Invalid key combination arguments");
          }
          response = await provider.keyboard.pressKeyCombination(args);
          break;

        case "get_screen_size":
          response = provider.screen.getScreenSize();
          break;

        case "get_cursor_position":
          response = provider.mouse.getCursorPosition();
          break;

        case "double_click":
          if (args && typeof args.x === 'number' && typeof args.y === 'number') {
            response = provider.mouse.doubleClick({ x: args.x, y: args.y });
          } else {
            response = provider.mouse.doubleClick();
          }
          break;

        case "get_active_window":
          response = provider.screen.getActiveWindow();
          break;

        case "focus_window":
          if (typeof args?.title !== 'string') {
            throw new Error("Invalid window title argument");
          }
          response = provider.screen.focusWindow(args.title);
          break;

        case "resize_window":
          if (typeof args?.title !== 'string' || 
              typeof args?.width !== 'number' || 
              typeof args?.height !== 'number') {
            throw new Error("Invalid window resize arguments");
          }
          response = provider.screen.resizeWindow(args.title, args.width, args.height);
          break;

        case "reposition_window":
          if (typeof args?.title !== 'string' || 
              typeof args?.x !== 'number' || 
              typeof args?.y !== 'number') {
            throw new Error("Invalid window reposition arguments");
          }
          response = provider.screen.repositionWindow(args.title, args.x, args.y);
          break;
          
        case "minimize_window":
          if (typeof args?.title !== 'string') {
            throw new Error("Invalid window title argument");
          }
          response = { success: false, message: "Minimize window operation is not supported" };
          break;

        case "restore_window":
          if (typeof args?.title !== 'string') {
            throw new Error("Invalid window title argument");
          }
          response = { success: false, message: "Restore window operation is not supported" };
          break;

        case "get_clipboard_content":
          response = await provider.clipboard.getClipboardContent();
          break;

        case "set_clipboard_content":
          if (!isClipboardInput(args)) {
            throw new Error("Invalid clipboard input arguments");
          }
          response = await provider.clipboard.setClipboardContent(args);
          break;

        case "has_clipboard_text":
          response = await provider.clipboard.hasClipboardText();
          break;

        case "clear_clipboard":
          response = await provider.clipboard.clearClipboard();
          break;

        default:
          throw new Error(`Unknown tool: ${name}`);
      }

      // Handle special case for screenshot which returns content with image data
      const typedResponse = response as WindowsControlResponse;
      if ('content' in typedResponse && 
          typedResponse.content && 
          Array.isArray(typedResponse.content) && 
          typedResponse.content.length > 0 && 
          typedResponse.content[0] && 
          typeof typedResponse.content[0] === 'object' &&
          'type' in typedResponse.content[0] && 
          typedResponse.content[0].type === "image") {
        return {
          content: typedResponse.content
        };
      }
      
      // For all other responses, return as text
      return {
        content: [{
          type: "text",
          text: JSON.stringify(response, null, 2)
        }]
      };

    } catch (error) {
      const errorContent: TextContent = {
        type: "text",
        text: `Error: ${error instanceof Error ? error.message : String(error)}`
      };

      return {
        content: [errorContent],
        isError: true
      };
    }
  });
}

/**
 * Type guard to validate if an object matches the MousePosition interface
 * @param args The object to validate
 * @returns True if the object is a valid MousePosition
 */
function isMousePosition(args: unknown): args is MousePosition {
  if (typeof args !== 'object' || args === null) return false;
  const pos = args as Record<string, unknown>;
  return typeof pos.x === 'number' && typeof pos.y === 'number';
}

/**
 * Type guard to validate if an object matches the KeyboardInput interface
 * @param args The object to validate
 * @returns True if the object is a valid KeyboardInput
 */
function isKeyboardInput(args: unknown): args is KeyboardInput {
  if (typeof args !== 'object' || args === null) return false;
  const input = args as Record<string, unknown>;
  return typeof input.text === 'string';
}

/**
 * Type guard to validate if an object matches the KeyCombination interface
 * @param args The object to validate
 * @returns True if the object is a valid KeyCombination
 */
function isKeyCombination(args: unknown): args is KeyCombination {
  if (typeof args !== 'object' || args === null) return false;
  const combo = args as Record<string, unknown>;
  if (!Array.isArray(combo.keys)) return false;
  return combo.keys.every(key => typeof key === 'string');
}

/**
 * Type guard to validate if an object matches the KeyHoldOperation interface
 * @param args The object to validate
 * @returns True if the object is a valid KeyHoldOperation
 */
function isKeyHoldOperation(args: unknown): args is KeyHoldOperation {
  if (typeof args !== 'object' || args === null) return false;
  const op = args as Record<string, unknown>;
  return (
    typeof op.key === 'string' &&
    (op.state === 'down' || op.state === 'up') &&
    (op.duration === undefined || typeof op.duration === 'number')
  );
}

/**
 * Type guard to validate if an object matches the ClipboardInput interface
 * @param args The object to validate
 * @returns True if the object is a valid ClipboardInput
 */
function isClipboardInput(args: unknown): args is ClipboardInput {
  if (typeof args !== 'object' || args === null) return false;
  const input = args as Record<string, unknown>;
  return typeof input.text === 'string';
}