Inject Input Events
inject-inputSimulate user input on Android devices: tap, type text, swipe, send key events, or click UI elements by resource-id or text. Automates interactions for testing and accessibility.
Instructions
Simulate user input interactions (tap, text, swipe, keyevents) or click by UI element.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| command | Yes | Input command type | |
| args | No | Arguments for the command (e.g. [x, y] for tap, ["text"] for text). Optional if elementId/elementText provided. | |
| elementId | No | Find element by resource-id and tap its center (e.g. "com.example:id/button") | |
| elementText | No | Find element by text content and tap its center (e.g. "Login") | |
| timeoutMs | No | Timeout in milliseconds |
Implementation Reference
- src/tools/deviceTool.js:116-205 (handler)The async handler function that executes the 'inject-input' tool logic. It processes the command (tap, text, swipe, keyevent, back, home), resolves element clicks by parsing UI hierarchy XML, and runs the corresponding adb shell input command.
async (params) => { let { command, args } = params; const { elementId, elementText, timeoutMs } = params; args = args || []; // Logic to resolve element click if (elementId || elementText) { if (command !== 'tap') { throw new Error('elementId/elementText can only be used with command="tap".'); } // 1. Dump UI const devicePath = '/data/local/tmp/mcp_input_dump.xml'; await runAdbCommand(['shell', 'uiautomator', 'dump', devicePath], timeoutMs); const xmlContent = await runAdbCommand(['shell', 'cat', devicePath], timeoutMs); // 2. Find Node // Simple Regex search avoids heavy XML parser deps. // We look for a <node ... resource-id="..." ... bounds="..." /> or text="..." // Note: Attributes order isn't guaranteed, so we scan for the tag. let targetBounds = null; // We split by <node to iterate simpler const nodes = xmlContent.split('<node '); for (const nodeStr of nodes) { // Check if this node matches our criteria let matches = false; if (elementId && nodeStr.includes(`resource-id="${elementId}"`)) matches = true; if (elementText && nodeStr.includes(`text="${elementText}"`)) matches = true; if (matches) { // Extract bounds const boundsMatch = nodeStr.match(/bounds="(\[\d+,\d+\]\[\d+,\d+\])"/); if (boundsMatch) { targetBounds = boundsMatch[1]; break; // Found first match } } } if (!targetBounds) { throw new Error(`Could not find element with id="${elementId}" or text="${elementText}" in current UI.`); } const center = getCenterFromBounds(targetBounds); if (!center) { throw new Error(`Invalid bounds found: ${targetBounds}`); } // 3. Update args to be a tap at these coordinates args = [String(center.x), String(center.y)]; } // Check args for standard commands let adbArgs = ['shell', 'input']; switch (command) { case 'tap': if (args.length !== 2) throw new Error('tap requires x and y coordinates (or use elementId/elementText)'); adbArgs.push('tap', args[0], args[1]); break; case 'text': if (args.length !== 1) throw new Error('text requires a single string argument'); let safeText = String(args[0]).replace(/\s/g, '%s'); adbArgs.push('text', safeText); break; case 'swipe': if (args.length < 4) throw new Error('swipe requires at least x1, y1, x2, y2'); adbArgs.push('swipe', ...args); break; case 'keyevent': case 'back': case 'home': // Allow command='back' without args to mean keyevent 4 if (command === 'back') { adbArgs.push('keyevent', '4'); } else if (command === 'home') { adbArgs.push('keyevent', '3'); } else { if (args.length < 1) throw new Error('keyevent requires keycode'); adbArgs.push('keyevent', ...args); } break; default: throw new Error(`Unknown command: ${command}`); } await runAdbCommand(adbArgs, timeoutMs); return { content: [{ type: 'text', text: `Executed input ${command} ${JSON.stringify(args)}` }] }; } ); - src/tools/deviceTool.js:25-31 (schema)Zod schema (injectInputSchema) defining the input validation for the 'inject-input' tool: command (enum of tap/text/swipe/keyevent/back/home), optional args array, optional elementId, optional elementText, and timeoutMs.
const injectInputSchema = z.object({ command: z.enum(['tap', 'text', 'swipe', 'keyevent', 'back', 'home']).describe('Input command type'), args: z.array(z.string().or(z.number())).optional().describe('Arguments for the command (e.g. [x, y] for tap, ["text"] for text). Optional if elementId/elementText provided.'), elementId: z.string().optional().describe('Find element by resource-id and tap its center (e.g. "com.example:id/button")'), elementText: z.string().optional().describe('Find element by text content and tap its center (e.g. "Login")'), timeoutMs: z.number().int().min(1000).max(20000).default(10000).describe('Timeout in milliseconds') }); - src/tools/deviceTool.js:109-205 (registration)Registration of the 'inject-input' tool via server.registerTool() with its title, description, inputSchema, and handler.
server.registerTool( 'inject-input', { title: 'Inject Input Events', description: 'Simulate user input interactions (tap, text, swipe, keyevents) or click by UI element.', inputSchema: injectInputSchema }, async (params) => { let { command, args } = params; const { elementId, elementText, timeoutMs } = params; args = args || []; // Logic to resolve element click if (elementId || elementText) { if (command !== 'tap') { throw new Error('elementId/elementText can only be used with command="tap".'); } // 1. Dump UI const devicePath = '/data/local/tmp/mcp_input_dump.xml'; await runAdbCommand(['shell', 'uiautomator', 'dump', devicePath], timeoutMs); const xmlContent = await runAdbCommand(['shell', 'cat', devicePath], timeoutMs); // 2. Find Node // Simple Regex search avoids heavy XML parser deps. // We look for a <node ... resource-id="..." ... bounds="..." /> or text="..." // Note: Attributes order isn't guaranteed, so we scan for the tag. let targetBounds = null; // We split by <node to iterate simpler const nodes = xmlContent.split('<node '); for (const nodeStr of nodes) { // Check if this node matches our criteria let matches = false; if (elementId && nodeStr.includes(`resource-id="${elementId}"`)) matches = true; if (elementText && nodeStr.includes(`text="${elementText}"`)) matches = true; if (matches) { // Extract bounds const boundsMatch = nodeStr.match(/bounds="(\[\d+,\d+\]\[\d+,\d+\])"/); if (boundsMatch) { targetBounds = boundsMatch[1]; break; // Found first match } } } if (!targetBounds) { throw new Error(`Could not find element with id="${elementId}" or text="${elementText}" in current UI.`); } const center = getCenterFromBounds(targetBounds); if (!center) { throw new Error(`Invalid bounds found: ${targetBounds}`); } // 3. Update args to be a tap at these coordinates args = [String(center.x), String(center.y)]; } // Check args for standard commands let adbArgs = ['shell', 'input']; switch (command) { case 'tap': if (args.length !== 2) throw new Error('tap requires x and y coordinates (or use elementId/elementText)'); adbArgs.push('tap', args[0], args[1]); break; case 'text': if (args.length !== 1) throw new Error('text requires a single string argument'); let safeText = String(args[0]).replace(/\s/g, '%s'); adbArgs.push('text', safeText); break; case 'swipe': if (args.length < 4) throw new Error('swipe requires at least x1, y1, x2, y2'); adbArgs.push('swipe', ...args); break; case 'keyevent': case 'back': case 'home': // Allow command='back' without args to mean keyevent 4 if (command === 'back') { adbArgs.push('keyevent', '4'); } else if (command === 'home') { adbArgs.push('keyevent', '3'); } else { if (args.length < 1) throw new Error('keyevent requires keycode'); adbArgs.push('keyevent', ...args); } break; default: throw new Error(`Unknown command: ${command}`); } await runAdbCommand(adbArgs, timeoutMs); return { content: [{ type: 'text', text: `Executed input ${command} ${JSON.stringify(args)}` }] }; } ); - src/tools/deviceTool.js:34-45 (helper)Helper function getCenterFromBounds(bounds) that parses a bounds string like '[x1,y1][x2,y2]' and returns the center point coordinates, used when resolving element clicks.
function getCenterFromBounds(bounds) { const match = bounds.match(/\[(\d+),(\d+)\]\[(\d+),(\d+)\]/); if (!match) return null; const x1 = parseInt(match[1], 10); const y1 = parseInt(match[2], 10); const x2 = parseInt(match[3], 10); const y2 = parseInt(match[4], 10); return { x: Math.round((x1 + x2) / 2), y: Math.round((y1 + y2) / 2) }; } - src/index.js:30-30 (registration)Top-level registration call that wires up the deviceTool module (including inject-input) to the MCP server.
registerDeviceTool(server);