Skip to main content
Glama
interact-with-ui.ts11.5 kB
/** * interact_with_ui Tool Handler * MCP tool for performing UI interactions (tap, swipe, input text) */ import { isPlatform, InteractionType, INTERACTION_TYPES, SwipeDirection } from '../../models/constants.js'; import { InteractionResult, Point, UIElement } from '../../models/ui-context.js'; import { Errors } from '../../models/errors.js'; import { tap, inputText, swipe, dumpUiHierarchy } from '../../platforms/android/adb.js'; import { listDevices as listIOSDevices, getBootedDevice } from '../../platforms/ios/simctl.js'; import { executeShell } from '../../utils/shell.js'; import { parseAndroidHierarchy, findElementInHierarchy } from '../../utils/xml-parser.js'; import { getToolRegistry, createInputSchema } from '../register.js'; /** * Input arguments for interact_with_ui tool */ export interface InteractWithUIArgs { /** Target platform */ platform: string; /** Interaction type */ action: string; /** Target element ID or text (for element-based interactions) */ element?: string; /** X coordinate (for coordinate-based interactions) */ x?: number; /** Y coordinate (for coordinate-based interactions) */ y?: number; /** Text to input (for input_text action) */ text?: string; /** Swipe direction (for swipe action) */ direction?: string; /** Duration in ms (for long_press and swipe) */ durationMs?: number; /** Target device ID or name */ deviceId?: string; } /** * Interact with UI tool handler */ export async function interactWithUI(args: InteractWithUIArgs): Promise<InteractionResult> { const { platform, action, element, x, y, text, direction, durationMs = 300, deviceId, } = args; // Validate platform if (!isPlatform(platform)) { throw Errors.invalidArguments(`Invalid platform: ${platform}. Must be 'android' or 'ios'`); } // Validate action if (!INTERACTION_TYPES.includes(action as InteractionType)) { throw Errors.invalidArguments( `Invalid action: ${action}. Must be one of: ${INTERACTION_TYPES.join(', ')}` ); } const interactionType = action as InteractionType; const startTime = Date.now(); // Determine target coordinates let targetCoords: Point; let targetElement: UIElement | undefined; if (element) { // Find element by ID or text const foundElement = await findTargetElement(platform, element, deviceId); if (!foundElement) { throw Errors.elementNotFound(element); } targetElement = foundElement; targetCoords = foundElement.center; } else if (x !== undefined && y !== undefined) { targetCoords = { x, y }; } else if (interactionType !== 'input_text' && interactionType !== 'clear') { throw Errors.invalidArguments('Either element or coordinates (x, y) must be provided'); } else { // For input_text and clear, we operate on the focused element targetCoords = { x: 0, y: 0 }; } // Perform interaction try { if (platform === 'android') { await performAndroidInteraction(interactionType, targetCoords, { text, direction: direction as SwipeDirection, durationMs, deviceId, }); } else { await performIOSInteraction(interactionType, targetCoords, { text, direction: direction as SwipeDirection, durationMs, deviceId, }); } return { success: true, interactionType: action, targetElement: targetElement ? { id: targetElement.id, type: targetElement.type, bounds: targetElement.bounds, } : undefined, coordinates: targetCoords, durationMs: Date.now() - startTime, }; } catch (error) { return { success: false, interactionType: action, coordinates: targetCoords, durationMs: Date.now() - startTime, error: error instanceof Error ? error.message : String(error), }; } } /** * Find target element on device */ async function findTargetElement( platform: string, elementQuery: string, deviceId?: string ): Promise<UIElement | undefined> { if (platform === 'android') { // Dump UI hierarchy and find element const hierarchyXml = await dumpUiHierarchy(deviceId); const elements = await parseAndroidHierarchy(hierarchyXml, { flatten: true }); return findElementInHierarchy(elements, elementQuery); } else { // iOS element finding - simplified implementation // Full implementation would require XCTest integration console.warn('[interact_with_ui] iOS element finding requires XCTest integration'); return undefined; } } /** * Perform Android UI interaction */ async function performAndroidInteraction( action: InteractionType, coords: Point, options: { text?: string; direction?: SwipeDirection; durationMs: number; deviceId?: string; } ): Promise<void> { const { text, direction, durationMs, deviceId } = options; switch (action) { case 'tap': await tap(coords.x, coords.y, deviceId); break; case 'long_press': // Implement as swipe with same start and end await swipe(coords.x, coords.y, coords.x, coords.y, durationMs, deviceId); break; case 'swipe': if (!direction) { throw Errors.invalidArguments('direction is required for swipe action'); } const swipeCoords = calculateSwipeCoordinates(coords, direction, 500); await swipe( swipeCoords.startX, swipeCoords.startY, swipeCoords.endX, swipeCoords.endY, durationMs, deviceId ); break; case 'input_text': if (!text) { throw Errors.invalidArguments('text is required for input_text action'); } await inputText(text, deviceId); break; case 'clear': // Select all and delete await executeShell('adb', [ ...(deviceId ? ['-s', deviceId] : []), 'shell', 'input', 'keyevent', 'KEYCODE_CTRL_A', ]); await executeShell('adb', [ ...(deviceId ? ['-s', deviceId] : []), 'shell', 'input', 'keyevent', 'KEYCODE_DEL', ]); break; } } /** * Perform iOS UI interaction * Note: iOS simulator doesn't support direct touch input via simctl. * For UI automation, use Maestro (run_maestro_flow) instead. */ async function performIOSInteraction( action: InteractionType, _coords: Point, // Coords unused - iOS simctl doesn't support coordinate-based touch options: { text?: string; direction?: SwipeDirection; durationMs: number; deviceId?: string; } ): Promise<void> { const { text, deviceId } = options; // Get target device UDID let udid: string; if (deviceId) { const devices = await listIOSDevices(); const found = devices.find((d) => d.id === deviceId || d.name === deviceId); if (!found) { throw Errors.deviceNotFound(deviceId, devices.map((d) => `${d.id} (${d.name})`)); } udid = found.id; } else { const booted = await getBootedDevice(); if (!booted) { throw Errors.invalidArguments('No running iOS simulator found'); } udid = booted.id; } // iOS simctl doesn't support direct touch interactions (tap, swipe, long_press) // For these, use Maestro via run_maestro_flow tool instead switch (action) { case 'tap': case 'long_press': case 'swipe': throw Errors.invalidArguments( `iOS simulator doesn't support direct ${action} via simctl. ` + `Use run_maestro_flow tool for iOS UI automation instead.` ); case 'input_text': if (!text) { throw Errors.invalidArguments('text is required for input_text action'); } // Use pbcopy + paste simulation via AppleScript // First copy text to clipboard await executeShell('bash', ['-c', `echo -n "${text.replace(/"/g, '\\"')}" | pbcopy`]); // Then paste via simctl (paste from host clipboard to simulator) await executeShell('xcrun', ['simctl', 'pbsync', udid, 'host']); // Trigger paste via keyboard shortcut using AppleScript await executeShell('osascript', [ '-e', 'tell application "Simulator" to activate', '-e', 'tell application "System Events" to keystroke "v" using command down', ]); break; case 'clear': // Select all and delete via AppleScript await executeShell('osascript', [ '-e', 'tell application "Simulator" to activate', '-e', 'tell application "System Events" to keystroke "a" using command down', '-e', 'tell application "System Events" to key code 51', // Delete key ]); break; } } /** * Calculate swipe coordinates based on direction */ function calculateSwipeCoordinates( center: Point, direction: SwipeDirection, distance: number ): { startX: number; startY: number; endX: number; endY: number } { switch (direction) { case 'up': return { startX: center.x, startY: center.y + distance / 2, endX: center.x, endY: center.y - distance / 2, }; case 'down': return { startX: center.x, startY: center.y - distance / 2, endX: center.x, endY: center.y + distance / 2, }; case 'left': return { startX: center.x + distance / 2, startY: center.y, endX: center.x - distance / 2, endY: center.y, }; case 'right': return { startX: center.x - distance / 2, startY: center.y, endX: center.x + distance / 2, endY: center.y, }; } } /** * Register the interact_with_ui tool */ export function registerInteractWithUITool(): void { getToolRegistry().register( 'interact_with_ui', { description: 'Perform UI interactions like tap, swipe, or text input. Can target elements by ID/text or by coordinates.', inputSchema: createInputSchema( { platform: { type: 'string', enum: ['android', 'ios'], description: 'Target platform', }, action: { type: 'string', enum: ['tap', 'long_press', 'swipe', 'input_text', 'clear'], description: 'Type of interaction to perform', }, element: { type: 'string', description: 'Element ID, resource ID, or text to interact with', }, x: { type: 'number', description: 'X coordinate for coordinate-based interaction', }, y: { type: 'number', description: 'Y coordinate for coordinate-based interaction', }, text: { type: 'string', description: 'Text to input (for input_text action)', }, direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Swipe direction (for swipe action)', }, durationMs: { type: 'number', description: 'Duration in milliseconds (for long_press and swipe, default: 300)', }, deviceId: { type: 'string', description: 'Device ID or name (optional)', }, }, ['platform', 'action'] ), }, (args) => interactWithUI(args as unknown as InteractWithUIArgs) ); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/abd3lraouf/specter-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server