tap_text
Tap UI elements in iOS simulators by matching visible text labels to automate testing and interaction workflows.
Instructions
Find a UI element by visible text and tap its center.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| text | Yes | Visible text/label of the element to tap | |
| udid | No | Simulator UDID (optional, defaults to booted simulator) |
Implementation Reference
- src/index.ts:896-940 (handler)The tapText handler function which finds elements by text, chooses the best one, and performs the tap using idb.
private async tapText(text: string, udid?: string) { const target = await resolveUdid(udid); try { const elements = await this.fetchUiTree(target); const matches = findElementsByText(elements, text); if (matches.length === 0) { throw new McpError(ErrorCode.InvalidRequest, `No element found with text matching "${text}"`); } const el = pickBestTextMatch(matches, text, elements); const screen = getScreenFrame(elements); const primary = inferTapPointForText(el, text, screen); const candidates = buildTapPointCandidates(el, primary, screen); const beforeSignature = this.buildUiSignature(elements); let chosen = candidates[0]; let transitioned = false; for (let i = 0; i < candidates.length; i++) { const point = candidates[i]; await execAsync(`idb ui tap --udid ${target} ${point.x} ${point.y}`); chosen = point; const shouldProbeTransition = candidates.length > 1 && i < candidates.length - 1; if (!shouldProbeTransition) break; await new Promise((resolve) => setTimeout(resolve, 350)); const afterProbe = await this.fetchUiTree(target); if (this.buildUiSignature(afterProbe) !== beforeSignature) { transitioned = true; break; } } return { content: [{ type: 'text', text: `Tapped element "${el.label ?? el.value ?? text}" at (${chosen.x}, ${chosen.y}) on ${target} using ${chosen.strategy}${transitioned ? ' (transition-detected)' : ''}`, }], }; } catch (error: any) { if (error instanceof McpError) throw error; throw new McpError(ErrorCode.InternalError, `Failed to tap text: ${error.message}`); } } - src/index.ts:460-471 (registration)Tool definition for 'tap_text'.
name: 'tap_text', description: 'Find a UI element by visible text and tap its center.', inputSchema: { type: 'object', properties: { text: { type: 'string', description: 'Visible text/label of the element to tap' }, udid: { type: 'string', description: 'Simulator UDID (optional, defaults to booted simulator)' }, }, required: ['text'], additionalProperties: false, }, },