inject-input
Simulate user interactions on Android devices by injecting taps, text input, swipes, and key events through ADB integration for automated testing and development workflows.
Instructions
Simulate user input interactions (tap, text, swipe, keyevents) or click by UI element.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| command | Yes | Input command type | |
| args | No | Arguments for the command (e.g. [x, y] for tap, ["text"] for text). Optional if elementId/elementText provided. | |
| elementId | No | Find element by resource-id and tap its center (e.g. "com.example:id/button") | |
| elementText | No | Find element by text content and tap its center (e.g. "Login") | |
| timeoutMs | No | Timeout in milliseconds |
Implementation Reference
- src/tools/deviceTool.js:116-204 (handler)Main handler for 'inject-input' tool. Parses input params, optionally finds UI element coordinates via uiautomator dump and regex search, computes tap center, then executes corresponding ADB 'input' shell command (tap, text, swipe, keyevent).async (params) => { let { command, args } = params; const { elementId, elementText, timeoutMs } = params; args = args || []; // Logic to resolve element click if (elementId || elementText) { if (command !== 'tap') { throw new Error('elementId/elementText can only be used with command="tap".'); } // 1. Dump UI const devicePath = '/data/local/tmp/mcp_input_dump.xml'; await runAdbCommand(['shell', 'uiautomator', 'dump', devicePath], timeoutMs); const xmlContent = await runAdbCommand(['shell', 'cat', devicePath], timeoutMs); // 2. Find Node // Simple Regex search avoids heavy XML parser deps. // We look for a <node ... resource-id="..." ... bounds="..." /> or text="..." // Note: Attributes order isn't guaranteed, so we scan for the tag. let targetBounds = null; // We split by <node to iterate simpler const nodes = xmlContent.split('<node '); for (const nodeStr of nodes) { // Check if this node matches our criteria let matches = false; if (elementId && nodeStr.includes(`resource-id="${elementId}"`)) matches = true; if (elementText && nodeStr.includes(`text="${elementText}"`)) matches = true; if (matches) { // Extract bounds const boundsMatch = nodeStr.match(/bounds="(\[\d+,\d+\]\[\d+,\d+\])"/); if (boundsMatch) { targetBounds = boundsMatch[1]; break; // Found first match } } } if (!targetBounds) { throw new Error(`Could not find element with id="${elementId}" or text="${elementText}" in current UI.`); } const center = getCenterFromBounds(targetBounds); if (!center) { throw new Error(`Invalid bounds found: ${targetBounds}`); } // 3. Update args to be a tap at these coordinates args = [String(center.x), String(center.y)]; } // Check args for standard commands let adbArgs = ['shell', 'input']; switch (command) { case 'tap': if (args.length !== 2) throw new Error('tap requires x and y coordinates (or use elementId/elementText)'); adbArgs.push('tap', args[0], args[1]); break; case 'text': if (args.length !== 1) throw new Error('text requires a single string argument'); let safeText = String(args[0]).replace(/\s/g, '%s'); adbArgs.push('text', safeText); break; case 'swipe': if (args.length < 4) throw new Error('swipe requires at least x1, y1, x2, y2'); adbArgs.push('swipe', ...args); break; case 'keyevent': case 'back': case 'home': // Allow command='back' without args to mean keyevent 4 if (command === 'back') { adbArgs.push('keyevent', '4'); } else if (command === 'home') { adbArgs.push('keyevent', '3'); } else { if (args.length < 1) throw new Error('keyevent requires keycode'); adbArgs.push('keyevent', ...args); } break; default: throw new Error(`Unknown command: ${command}`); } await runAdbCommand(adbArgs, timeoutMs); return { content: [{ type: 'text', text: `Executed input ${command} ${JSON.stringify(args)}` }] }; }
- src/tools/deviceTool.js:25-31 (schema)Zod input schema defining parameters for inject-input: command type, optional args or element selectors, timeout.const injectInputSchema = z.object({ command: z.enum(['tap', 'text', 'swipe', 'keyevent', 'back', 'home']).describe('Input command type'), args: z.array(z.string().or(z.number())).optional().describe('Arguments for the command (e.g. [x, y] for tap, ["text"] for text). Optional if elementId/elementText provided.'), elementId: z.string().optional().describe('Find element by resource-id and tap its center (e.g. "com.example:id/button")'), elementText: z.string().optional().describe('Find element by text content and tap its center (e.g. "Login")'), timeoutMs: z.number().int().min(1000).max(20000).default(10000).describe('Timeout in milliseconds') });
- src/tools/deviceTool.js:109-205 (registration)Registers the 'inject-input' tool on the MCP server with title, description, inputSchema, and handler function inside registerDeviceTool.server.registerTool( 'inject-input', { title: 'Inject Input Events', description: 'Simulate user input interactions (tap, text, swipe, keyevents) or click by UI element.', inputSchema: injectInputSchema }, async (params) => { let { command, args } = params; const { elementId, elementText, timeoutMs } = params; args = args || []; // Logic to resolve element click if (elementId || elementText) { if (command !== 'tap') { throw new Error('elementId/elementText can only be used with command="tap".'); } // 1. Dump UI const devicePath = '/data/local/tmp/mcp_input_dump.xml'; await runAdbCommand(['shell', 'uiautomator', 'dump', devicePath], timeoutMs); const xmlContent = await runAdbCommand(['shell', 'cat', devicePath], timeoutMs); // 2. Find Node // Simple Regex search avoids heavy XML parser deps. // We look for a <node ... resource-id="..." ... bounds="..." /> or text="..." // Note: Attributes order isn't guaranteed, so we scan for the tag. let targetBounds = null; // We split by <node to iterate simpler const nodes = xmlContent.split('<node '); for (const nodeStr of nodes) { // Check if this node matches our criteria let matches = false; if (elementId && nodeStr.includes(`resource-id="${elementId}"`)) matches = true; if (elementText && nodeStr.includes(`text="${elementText}"`)) matches = true; if (matches) { // Extract bounds const boundsMatch = nodeStr.match(/bounds="(\[\d+,\d+\]\[\d+,\d+\])"/); if (boundsMatch) { targetBounds = boundsMatch[1]; break; // Found first match } } } if (!targetBounds) { throw new Error(`Could not find element with id="${elementId}" or text="${elementText}" in current UI.`); } const center = getCenterFromBounds(targetBounds); if (!center) { throw new Error(`Invalid bounds found: ${targetBounds}`); } // 3. Update args to be a tap at these coordinates args = [String(center.x), String(center.y)]; } // Check args for standard commands let adbArgs = ['shell', 'input']; switch (command) { case 'tap': if (args.length !== 2) throw new Error('tap requires x and y coordinates (or use elementId/elementText)'); adbArgs.push('tap', args[0], args[1]); break; case 'text': if (args.length !== 1) throw new Error('text requires a single string argument'); let safeText = String(args[0]).replace(/\s/g, '%s'); adbArgs.push('text', safeText); break; case 'swipe': if (args.length < 4) throw new Error('swipe requires at least x1, y1, x2, y2'); adbArgs.push('swipe', ...args); break; case 'keyevent': case 'back': case 'home': // Allow command='back' without args to mean keyevent 4 if (command === 'back') { adbArgs.push('keyevent', '4'); } else if (command === 'home') { adbArgs.push('keyevent', '3'); } else { if (args.length < 1) throw new Error('keyevent requires keycode'); adbArgs.push('keyevent', ...args); } break; default: throw new Error(`Unknown command: ${command}`); } await runAdbCommand(adbArgs, timeoutMs); return { content: [{ type: 'text', text: `Executed input ${command} ${JSON.stringify(args)}` }] }; } );
- src/tools/deviceTool.js:34-45 (helper)Helper function to parse UI node bounds string like '[x1,y1][x2,y2]' and compute center coordinates for tapping.function getCenterFromBounds(bounds) { const match = bounds.match(/\[(\d+),(\d+)\]\[(\d+),(\d+)\]/); if (!match) return null; const x1 = parseInt(match[1], 10); const y1 = parseInt(match[2], 10); const x2 = parseInt(match[3], 10); const y2 = parseInt(match[4], 10); return { x: Math.round((x1 + x2) / 2), y: Math.round((y1 + y2) / 2) }; }
- src/index.js:30-30 (registration)Calls registerDeviceTool(server) which internally registers 'inject-input' among other device tools.registerDeviceTool(server);