import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import screenshot from "screenshot-desktop";
import sharp from "sharp";
import { compressScreenshot, imageContent } from "../utils/image.js";
import { runPowerShell } from "../utils/powershell.js";
export function registerScreenshotTools(server: McpServer) {
// screenshot_fullscreen
server.tool(
"screenshot_fullscreen",
"Capture the entire screen or a specific display",
{ display: z.number().optional().describe("Display number (0-indexed). Omit for primary display.") },
async ({ display }) => {
try {
const displays = await screenshot.listDisplays();
const screenId = display !== undefined && display < displays.length
? displays[display].id
: undefined;
const imgBuffer = await screenshot({ format: "png", screen: screenId }) as Buffer;
const base64 = await compressScreenshot(imgBuffer);
return imageContent(base64);
} catch (err: any) {
return { content: [{ type: "text", text: `Screenshot failed: ${err.message}` }], isError: true };
}
}
);
// screenshot_region
server.tool(
"screenshot_region",
"Capture a rectangular region of the screen",
{
x: z.number().describe("Left edge X coordinate"),
y: z.number().describe("Top edge Y coordinate"),
width: z.number().min(1).describe("Width in pixels"),
height: z.number().min(1).describe("Height in pixels"),
},
async ({ x, y, width, height }) => {
try {
const imgBuffer = await screenshot({ format: "png" }) as Buffer;
const cropped = await sharp(imgBuffer)
.extract({ left: x, top: y, width, height })
.png()
.toBuffer();
const base64 = await compressScreenshot(cropped);
return imageContent(base64);
} catch (err: any) {
return { content: [{ type: "text", text: `Region screenshot failed: ${err.message}` }], isError: true };
}
}
);
// screenshot_window
server.tool(
"screenshot_window",
"Capture a specific window by title (partial match)",
{ title: z.string().describe("Window title (partial match)") },
async ({ title }) => {
try {
// Get window rect via PowerShell
const psScript = `
Add-Type @"
using System;
using System.Runtime.InteropServices;
public class Win32 {
[DllImport("user32.dll")]
public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect);
[DllImport("user32.dll")]
public static extern bool SetForegroundWindow(IntPtr hWnd);
}
public struct RECT {
public int Left, Top, Right, Bottom;
}
"@
$procs = Get-Process | Where-Object { $_.MainWindowTitle -like "*${title}*" -and $_.MainWindowHandle -ne 0 } | Select-Object -First 1
if (-not $procs) { throw "No window found matching '${title}'" }
$hwnd = $procs.MainWindowHandle
$rect = New-Object RECT
[Win32]::GetWindowRect($hwnd, [ref]$rect) | Out-Null
[Win32]::SetForegroundWindow($hwnd) | Out-Null
Start-Sleep -Milliseconds 200
"$($rect.Left),$($rect.Top),$($rect.Right - $rect.Left),$($rect.Bottom - $rect.Top)"
`;
const result = await runPowerShell(psScript);
const [wx, wy, ww, wh] = result.split(",").map(Number);
if (ww <= 0 || wh <= 0) {
return { content: [{ type: "text", text: "Window has zero size (minimized?)" }], isError: true };
}
const imgBuffer = await screenshot({ format: "png" }) as Buffer;
const metadata = await sharp(imgBuffer).metadata();
const imgW = metadata.width!;
const imgH = metadata.height!;
// Clamp to screen bounds
const cx = Math.max(0, wx);
const cy = Math.max(0, wy);
const cw = Math.min(ww, imgW - cx);
const ch = Math.min(wh, imgH - cy);
const cropped = await sharp(imgBuffer)
.extract({ left: cx, top: cy, width: cw, height: ch })
.png()
.toBuffer();
const base64 = await compressScreenshot(cropped);
return imageContent(base64);
} catch (err: any) {
return { content: [{ type: "text", text: `Window screenshot failed: ${err.message}` }], isError: true };
}
}
);
}