screenshot
Capture a screenshot of the current screen or window to enable visual input for MCP clients, supporting interaction through image-based communication and live webcam integration.
Instructions
Gets a screenshot of the current screen or window
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- webcam-server-factory.ts:245-307 (handler)MCP tool handler for 'screenshot': checks for connected clients, sends screenshot request via data channel to browser client, awaits data URL response or error, parses it, and returns text message with embedded image.async () => { const userClients = getUserClients(user); if (userClients.size === 0) { return { isError: true, content: [ { type: "text", text: `Have you opened your web browser?. Direct the human to go to ${getMcpHost()}?user=${user}, switch on their webcam and try again.`, }, ], }; } const clientId = Array.from(userClients.keys())[0]; if (!clientId) { throw new Error("No clients connected"); } const userCallbacks = getUserCallbacks(user); // Modified promise to handle both success and error cases const result = await new Promise<string | { error: string }>( (resolve) => { Logger.info(`Taking screenshot for ${clientId} (user: ${user}`); userCallbacks.set(clientId, resolve); userClients .get(clientId) ?.write(`data: ${JSON.stringify({ type: "screenshot" })}\n\n`); } ); // Handle error case if (typeof result === "object" && "error" in result) { return { isError: true, content: [ { type: "text", text: `Failed to capture screenshot: ${result.error}`, }, ], }; } const { mimeType, base64Data } = parseDataUrl(result); return { content: [ { type: "text", text: "Here is the requested screenshot", }, { type: "image", data: base64Data, mimeType: mimeType, }, ], }; }
- webcam-server-factory.ts:236-308 (registration)Registration of the 'screenshot' tool on the MCP server with name, description, empty input schema, UI hints, and inline handler.mcpServer.tool( "screenshot", "Gets a screenshot of the current screen or window", {}, { openWorldHint: true, readOnlyHint: true, title: "Take a Screenshot", }, async () => { const userClients = getUserClients(user); if (userClients.size === 0) { return { isError: true, content: [ { type: "text", text: `Have you opened your web browser?. Direct the human to go to ${getMcpHost()}?user=${user}, switch on their webcam and try again.`, }, ], }; } const clientId = Array.from(userClients.keys())[0]; if (!clientId) { throw new Error("No clients connected"); } const userCallbacks = getUserCallbacks(user); // Modified promise to handle both success and error cases const result = await new Promise<string | { error: string }>( (resolve) => { Logger.info(`Taking screenshot for ${clientId} (user: ${user}`); userCallbacks.set(clientId, resolve); userClients .get(clientId) ?.write(`data: ${JSON.stringify({ type: "screenshot" })}\n\n`); } ); // Handle error case if (typeof result === "object" && "error" in result) { return { isError: true, content: [ { type: "text", text: `Failed to capture screenshot: ${result.error}`, }, ], }; } const { mimeType, base64Data } = parseDataUrl(result); return { content: [ { type: "text", text: "Here is the requested screenshot", }, { type: "image", data: base64Data, mimeType: mimeType, }, ], }; } );
- src/utils/screenCapture.ts:1-53 (helper)Client-side helper function that captures the screen using navigator.mediaDevices.getDisplayMedia, renders to canvas, optionally resizes, and returns PNG data URL. Used by browser client upon receiving 'screenshot' request.export async function captureScreen(): Promise<string> { let stream: MediaStream | undefined; try { stream = await navigator.mediaDevices.getDisplayMedia({ video: true, audio: false, }); const canvas = document.createElement("canvas"); const video = document.createElement("video"); await new Promise((resolve) => { video.onloadedmetadata = () => { canvas.width = video.videoWidth; canvas.height = video.videoHeight; video.play(); resolve(null); }; if (stream) { video.srcObject = stream; } else { throw Error("No stream available"); } }); const context = canvas.getContext("2d"); context?.drawImage(video, 0, 0, canvas.width, canvas.height); // Check if resizing is needed const MAX_DIMENSION = 1568; if (canvas.width > MAX_DIMENSION || canvas.height > MAX_DIMENSION) { const scaleFactor = MAX_DIMENSION / Math.max(canvas.width, canvas.height); const newWidth = Math.round(canvas.width * scaleFactor); const newHeight = Math.round(canvas.height * scaleFactor); const resizeCanvas = document.createElement("canvas"); resizeCanvas.width = newWidth; resizeCanvas.height = newHeight; const resizeContext = resizeCanvas.getContext("2d"); resizeContext?.drawImage(canvas, 0, 0, newWidth, newHeight); return resizeCanvas.toDataURL("image/png"); } return canvas.toDataURL("image/png"); } catch (error) { console.error("Error capturing screenshot:", error); throw error; } finally { if (stream) { stream.getTracks().forEach((track) => track.stop()); } } }
- webcam-server-factory.ts:22-31 (helper)Utility function to parse received data URL from client into mimeType and base64 data for embedding in response.function parseDataUrl(dataUrl: string): ParsedDataUrl { const matches = dataUrl.match(/^data:([^;]+);base64,(.+)$/); if (!matches) { throw new Error("Invalid data URL format"); } return { mimeType: matches[1], base64Data: matches[2], }; }