import { z } from 'zod';
import { tap, tapElement, findElement, getElementId } from '../executor/wda.js';
import { getOrCreateSession } from '../utils/wda-session.js';
import { WdaElementNotFoundError } from '../types/wda.js';
// Base schema for shape (used by MCP server registration)
export const wdaTapSchema = z.object({
x: z.number().min(0).optional().describe('X coordinate to tap (in points)'),
y: z.number().min(0).optional().describe('Y coordinate to tap (in points)'),
accessibility_id: z
.string()
.optional()
.describe('Accessibility identifier of element to tap'),
element_id: z
.string()
.optional()
.describe('Element ID from a previous wda_find call'),
bundle_id: z
.string()
.optional()
.describe('Bundle ID of app to activate for this session'),
port: z.number().optional().describe('WDA server port (default: 8100)'),
});
export type WdaTapInput = z.infer<typeof wdaTapSchema>;
function validateInput(input: WdaTapInput): void {
const hasCoordinates = input.x !== undefined && input.y !== undefined;
const hasAccessibilityId = input.accessibility_id !== undefined;
const hasElementId = input.element_id !== undefined;
if (!hasCoordinates && !hasAccessibilityId && !hasElementId) {
throw new Error(
'Either (x, y) coordinates, accessibility_id, or element_id must be provided'
);
}
// Validate partial coordinates
if ((input.x !== undefined) !== (input.y !== undefined)) {
throw new Error('Both x and y coordinates must be provided together');
}
}
export const wdaTapTool = {
name: 'wda_tap',
description:
'Tap at coordinates or on an element. Supports tapping by (x, y) coordinates, accessibility ID, or element ID from wda_find.',
inputSchema: wdaTapSchema,
handler: async (input: WdaTapInput) => {
validateInput(input);
const options = { port: input.port };
const sessionId = await getOrCreateSession(input.bundle_id, options);
// Tap by element ID
if (input.element_id) {
await tapElement(sessionId, input.element_id, options);
return {
content: [
{
type: 'text' as const,
text: `Tapped element with ID: ${input.element_id}`,
},
],
};
}
// Tap by accessibility ID
if (input.accessibility_id) {
const element = await findElement(
sessionId,
'accessibility id',
input.accessibility_id,
options
);
if (!element) {
throw new WdaElementNotFoundError('accessibility id', input.accessibility_id);
}
const elementId = getElementId(element);
if (!elementId) {
throw new Error('Invalid element response from WDA');
}
await tapElement(sessionId, elementId, options);
return {
content: [
{
type: 'text' as const,
text: `Tapped element with accessibility ID: ${input.accessibility_id}`,
},
],
};
}
// Tap at coordinates
await tap(sessionId, input.x!, input.y!, options);
return {
content: [
{
type: 'text' as const,
text: `Tapped at coordinates (${input.x}, ${input.y})`,
},
],
};
},
};