capture_screenshot
Take a screenshot of an Android device screen to capture visual content for display or analysis. Returns a base64-encoded PNG image that shows current on-screen activity.
Instructions
Capture a screenshot of the current Android device screen. Returns a base64-encoded PNG image that can be displayed or analyzed visually. Use this to see what is currently on screen.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| save_to_disk | No | Save the screenshot to disk as well | |
| device_id | No | Device serial number |
Implementation Reference
- src/vision/screenshot.ts:28-78 (handler)The actual implementation of the screenshot capture logic using ADB and Sharp for image processing.
export async function captureScreenshot( deviceId?: string, options?: { save?: boolean; resize?: { width: number; height: number } } ): Promise<ScreenshotResult> { const resolved = await deviceManager.resolveDeviceId(deviceId); // Capture raw PNG via exec-out (faster than pull) const rawBuffer = await adbExecOut(['screencap', '-p'], resolved, 15000); if (!rawBuffer || rawBuffer.length < 100) { throw new Error('Screenshot capture returned empty or invalid data'); } // Process with sharp let image = sharp(rawBuffer); const metadata = await image.metadata(); const width = metadata.width || 0; const height = metadata.height || 0; // Optionally resize if (options?.resize) { image = image.resize(options.resize.width, options.resize.height, { fit: 'inside' }); } const processedBuffer = await image.png({ quality: 80 }).toBuffer(); const base64 = processedBuffer.toString('base64'); const result: ScreenshotResult = { base64, width, height, timestamp: Date.now(), }; // Optionally save to disk if (options?.save) { const config = getConfig(); const dir = config.screenshotDir; if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } const filename = `screenshot_${resolved}_${Date.now()}.png`; const filepath = join(dir, filename); writeFileSync(filepath, processedBuffer); result.savedPath = filepath; log.info('Screenshot saved', { filepath, deviceId: resolved }); } log.info('Screenshot captured', { width, height, sizeKb: Math.round(processedBuffer.length / 1024), deviceId: resolved }); return result; } - src/controllers/vision-tools.ts:13-54 (registration)MCP tool registration for 'capture_screenshot' which bridges the MCP request to the screenshot implementation.
server.registerTool( 'capture_screenshot', { description: 'Capture a screenshot of the current Android device screen. Returns a base64-encoded PNG image that can be displayed or analyzed visually. Use this to see what is currently on screen.', inputSchema: { save_to_disk: z.boolean().optional().default(false).describe('Save the screenshot to disk as well'), device_id: z.string().optional().describe('Device serial number'), }, }, async ({ save_to_disk, device_id }) => { return await metrics.measure('capture_screenshot', device_id || 'default', async () => { const result = await captureScreenshot(device_id, { save: save_to_disk }); // Store for diffing const deviceKey = device_id || 'default'; const rawBuffer = await captureScreenshotBuffer(device_id); lastScreenshots.set(deviceKey, rawBuffer); const content: Array<{ type: 'text'; text: string } | { type: 'image'; data: string; mimeType: string }> = []; // Return the image as both image content and text metadata content.push({ type: 'image' as const, data: result.base64, mimeType: 'image/png', }); content.push({ type: 'text' as const, text: JSON.stringify({ success: true, width: result.width, height: result.height, timestamp: result.timestamp, ...(result.savedPath ? { savedPath: result.savedPath } : {}), }, null, 2), }); return { content }; }); } );