/**
* Simulator UI interaction and screenshot tools
*/
import { writeFile, unlink } from 'fs/promises';
import { tmpdir } from 'os';
import { join } from 'path';
import sharp from 'sharp';
import type { ToolDefinition, ToolResult, ScreenshotOptions } from '../../shared/types.js';
import { simctl, TIMEOUTS } from '../../shared/executor.js';
import { resolveDevice } from '../../shared/simulator.js';
import { SimulatorError } from '../../shared/errors.js';
import {
ScreenshotSchema,
TapSchema,
SwipeSchema,
LongPressSchema
} from '../../schemas/simulator.js';
/**
* Tool: simulator_screenshot
* Capture a screenshot from the iOS simulator with automatic compression
*/
export const screenshotTool: ToolDefinition<typeof ScreenshotSchema> = {
name: 'simulator_screenshot',
description: 'Capture a screenshot from the iOS simulator. Automatically compresses to JPEG for optimal LLM processing (typically 80% smaller than PNG).',
inputSchema: {
type: 'object',
properties: {
device: {
type: 'string',
description: 'Device UDID or "booted" for any booted device',
default: 'booted'
},
quality: {
type: 'number',
description: 'JPEG quality (1-100)',
default: 80,
minimum: 1,
maximum: 100
},
maxWidth: {
type: 'number',
description: 'Maximum width in pixels',
default: 800,
minimum: 100
},
maxHeight: {
type: 'number',
description: 'Maximum height in pixels',
default: 1400,
minimum: 100
}
}
},
schema: ScreenshotSchema,
handler: async (args) => {
const { device: deviceId, quality, maxWidth, maxHeight } = args;
// Resolve device
const resolvedDevice = await resolveDevice(deviceId);
// Create temporary file for screenshot
const tmpFile = join(tmpdir(), `ios-screenshot-${Date.now()}.png`);
try {
// Capture screenshot using simctl
const result = await simctl(['io', resolvedDevice, 'screenshot', tmpFile], {
timeout: TIMEOUTS.SCREENSHOT
});
if (!result.success) {
throw new SimulatorError('Failed to capture screenshot', {
code: 'SCREENSHOT_FAILED',
details: {
device: resolvedDevice,
stderr: result.stderr
},
recovery: 'Ensure the simulator is booted and visible'
});
}
// Compress screenshot using sharp
const compressed = await compressScreenshot(tmpFile, {
quality,
maxWidth,
maxHeight
});
// Return as base64 image
return {
content: [
{
type: 'image',
data: compressed.base64,
mimeType: 'image/jpeg'
},
{
type: 'text',
text: JSON.stringify(
{
success: true,
size: `${compressed.width}x${compressed.height}`,
originalSize: `${compressed.originalWidth}x${compressed.originalHeight}`,
compressionRatio: `${Math.round((1 - compressed.sizeBytes / compressed.originalSizeBytes) * 100)}% smaller`,
sizeKB: Math.round(compressed.sizeBytes / 1024),
quality
},
null,
2
)
}
]
};
} finally {
// Clean up temp file
try {
await unlink(tmpFile);
} catch (error) {
// Ignore cleanup errors
}
}
}
};
/**
* Tool: simulator_tap
* Tap at specific coordinates on the simulator screen
*/
export const tapTool: ToolDefinition<typeof TapSchema> = {
name: 'simulator_tap',
description: 'Tap at specific x,y coordinates on the iOS simulator screen. Coordinates are in pixels from top-left corner.',
inputSchema: {
type: 'object',
properties: {
device: {
type: 'string',
description: 'Device UDID or "booted"',
default: 'booted'
},
x: {
type: 'number',
description: 'X coordinate in pixels'
},
y: {
type: 'number',
description: 'Y coordinate in pixels'
}
},
required: ['x', 'y']
},
schema: TapSchema,
handler: async (args) => {
const { device: deviceId, x, y } = args;
// Resolve device
const resolvedDevice = await resolveDevice(deviceId);
// Execute tap using simctl
const result = await simctl(
['io', resolvedDevice, 'tap', x.toString(), y.toString()],
{ timeout: TIMEOUTS.DEFAULT }
);
if (!result.success) {
throw new SimulatorError('Failed to execute tap', {
code: 'TAP_FAILED',
details: {
device: resolvedDevice,
coordinates: { x, y },
stderr: result.stderr
},
recovery: 'Ensure coordinates are within screen bounds'
});
}
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
action: 'tap',
coordinates: { x, y },
device: resolvedDevice
},
null,
2
)
}
]
};
}
};
/**
* Tool: simulator_swipe
* Swipe gesture from one point to another
*/
export const swipeTool: ToolDefinition<typeof SwipeSchema> = {
name: 'simulator_swipe',
description: 'Perform a swipe gesture on the iOS simulator from start coordinates to end coordinates. Useful for scrolling, dismissing, or navigating.',
inputSchema: {
type: 'object',
properties: {
device: {
type: 'string',
description: 'Device UDID or "booted"',
default: 'booted'
},
x1: {
type: 'number',
description: 'Start X coordinate in pixels'
},
y1: {
type: 'number',
description: 'Start Y coordinate in pixels'
},
x2: {
type: 'number',
description: 'End X coordinate in pixels'
},
y2: {
type: 'number',
description: 'End Y coordinate in pixels'
},
duration: {
type: 'number',
description: 'Swipe duration in seconds',
default: 0.3,
minimum: 0.1,
maximum: 10
}
},
required: ['x1', 'y1', 'x2', 'y2']
},
schema: SwipeSchema,
handler: async (args) => {
const { device: deviceId, x1, y1, x2, y2, duration } = args;
// Resolve device
const resolvedDevice = await resolveDevice(deviceId);
// Execute swipe using simctl
const result = await simctl(
[
'io',
resolvedDevice,
'swipe',
x1.toString(),
y1.toString(),
x2.toString(),
y2.toString(),
'--duration',
duration.toString()
],
{ timeout: TIMEOUTS.DEFAULT }
);
if (!result.success) {
throw new SimulatorError('Failed to execute swipe', {
code: 'SWIPE_FAILED',
details: {
device: resolvedDevice,
start: { x: x1, y: y1 },
end: { x: x2, y: y2 },
duration,
stderr: result.stderr
},
recovery: 'Ensure coordinates are within screen bounds'
});
}
// Calculate swipe direction
const deltaX = x2 - x1;
const deltaY = y2 - y1;
const direction =
Math.abs(deltaX) > Math.abs(deltaY)
? deltaX > 0 ? 'right' : 'left'
: deltaY > 0 ? 'down' : 'up';
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
action: 'swipe',
direction,
start: { x: x1, y: y1 },
end: { x: x2, y: y2 },
distance: Math.round(Math.sqrt(deltaX ** 2 + deltaY ** 2)),
duration,
device: resolvedDevice
},
null,
2
)
}
]
};
}
};
/**
* Tool: simulator_long_press
* Long press at specific coordinates
*/
export const longPressTool: ToolDefinition<typeof LongPressSchema> = {
name: 'simulator_long_press',
description: 'Perform a long press (tap and hold) at specific coordinates on the iOS simulator. Useful for context menus, drag operations, or special interactions.',
inputSchema: {
type: 'object',
properties: {
device: {
type: 'string',
description: 'Device UDID or "booted"',
default: 'booted'
},
x: {
type: 'number',
description: 'X coordinate in pixels'
},
y: {
type: 'number',
description: 'Y coordinate in pixels'
},
duration: {
type: 'number',
description: 'Press duration in seconds',
default: 1.0,
minimum: 0.5,
maximum: 10
}
},
required: ['x', 'y']
},
schema: LongPressSchema,
handler: async (args) => {
const { device: deviceId, x, y, duration } = args;
// Resolve device
const resolvedDevice = await resolveDevice(deviceId);
// Long press is implemented as a swipe with zero movement
const result = await simctl(
[
'io',
resolvedDevice,
'swipe',
x.toString(),
y.toString(),
x.toString(),
y.toString(),
'--duration',
duration.toString()
],
{ timeout: TIMEOUTS.DEFAULT + duration * 1000 }
);
if (!result.success) {
throw new SimulatorError('Failed to execute long press', {
code: 'LONG_PRESS_FAILED',
details: {
device: resolvedDevice,
coordinates: { x, y },
duration,
stderr: result.stderr
},
recovery: 'Ensure coordinates are within screen bounds'
});
}
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
action: 'long_press',
coordinates: { x, y },
duration,
device: resolvedDevice
},
null,
2
)
}
]
};
}
};
/**
* Compress screenshot for optimal LLM consumption
* Converts PNG to JPEG with quality control and size constraints
*/
async function compressScreenshot(
pngPath: string,
options: ScreenshotOptions
): Promise<{
base64: string;
width: number;
height: number;
originalWidth: number;
originalHeight: number;
sizeBytes: number;
originalSizeBytes: number;
}> {
const { quality = 80, maxWidth = 800, maxHeight = 1400 } = options;
// Read original image
const image = sharp(pngPath);
const metadata = await image.metadata();
const originalWidth = metadata.width || 0;
const originalHeight = metadata.height || 0;
const originalSizeBytes = metadata.size || 0;
// Calculate new dimensions maintaining aspect ratio
let newWidth = originalWidth;
let newHeight = originalHeight;
if (originalWidth > maxWidth || originalHeight > maxHeight) {
const widthRatio = maxWidth / originalWidth;
const heightRatio = maxHeight / originalHeight;
const ratio = Math.min(widthRatio, heightRatio);
newWidth = Math.round(originalWidth * ratio);
newHeight = Math.round(originalHeight * ratio);
}
// Resize and convert to JPEG
const jpegBuffer = await image
.resize(newWidth, newHeight, {
fit: 'inside',
withoutEnlargement: true
})
.jpeg({ quality })
.toBuffer();
return {
base64: jpegBuffer.toString('base64'),
width: newWidth,
height: newHeight,
originalWidth,
originalHeight,
sizeBytes: jpegBuffer.length,
originalSizeBytes
};
}