humanizer_click
Click page elements using CSS/XPath selectors, ARIA roles, visible text, form labels, or screen coordinates. Supports left, right, middle, and double clicks with configurable wait time.
Instructions
Click an element. Pass one of: selector (CSS/XPath), role + optional name, text, label, or raw x+y coords as fallback. Locator-based calls auto-wait for visible.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| target_id | Yes | Target ID from interceptor_browser_launch or interceptor_camoufox_launch | |
| selector | No | CSS or XPath selector (e.g. 'button.submit', '//button[@id="go"]') | |
| role | No | ARIA role (e.g. 'button', 'link', 'textbox') | |
| name | No | Accessible name; used with role (e.g. 'Sign in') | |
| text | No | Visible text to match (e.g. 'Accept cookies') | |
| label | No | Form-field label text (e.g. 'Email address') | |
| x | No | X coordinate fallback when no locator is given | |
| y | No | Y coordinate fallback when no locator is given | |
| button | No | Mouse button (default: left) | left |
| click_count | No | Number of clicks (default: 1, use 2 for double-click) | |
| timeout_ms | No | Max ms to wait for locator to be visible + actionable (default: 15000) |
Implementation Reference
- src/tools/humanizer.ts:54-107 (registration)Registration of the 'humanizer_click' MCP tool via server.tool(), including its Zod schema for input validation and the async handler that delegates to humanizerEngine.click()
server.tool( "humanizer_click", "Click an element. Pass one of: selector (CSS/XPath), role + optional name, " + "text, label, or raw x+y coords as fallback. Locator-based calls auto-wait for visible.", { target_id: z.string().describe("Target ID from interceptor_browser_launch or interceptor_camoufox_launch"), selector: z.string().optional().describe("CSS or XPath selector (e.g. 'button.submit', '//button[@id=\"go\"]')"), role: z.string().optional().describe("ARIA role (e.g. 'button', 'link', 'textbox')"), name: z.string().optional().describe("Accessible name; used with role (e.g. 'Sign in')"), text: z.string().optional().describe("Visible text to match (e.g. 'Accept cookies')"), label: z.string().optional().describe("Form-field label text (e.g. 'Email address')"), x: z.number().optional().describe("X coordinate fallback when no locator is given"), y: z.number().optional().describe("Y coordinate fallback when no locator is given"), button: z.enum(["left", "right", "middle"]).optional().default("left") .describe("Mouse button (default: left)"), click_count: z.number().optional().default(1) .describe("Number of clicks (default: 1, use 2 for double-click)"), timeout_ms: z.number().optional().default(15000) .describe("Max ms to wait for locator to be visible + actionable (default: 15000)"), }, async ({ target_id, selector, role, name, text, label, x, y, button, click_count, timeout_ms }) => { try { const result = await humanizerEngine.click(target_id, { selector, role, name, text, label, x, y, button, clickCount: click_count, timeoutMs: timeout_ms, }); return { content: [{ type: "text", text: JSON.stringify({ status: "success", target_id, action: "click", resolved_by: result.resolvedBy, clicked_at: result.clickedAt, button, click_count, stats: { total_ms: result.totalMs, events_dispatched: result.eventsDispatched }, }), }], }; } catch (e) { return { content: [{ type: "text", text: JSON.stringify({ status: "error", target_id, action: "click", error: errorToString(e) }) }] }; } }, ); - src/humanizer/engine.ts:93-135 (handler)The core click() method on HumanizerEngine that resolves the element via locator (selector, role, text, label) or coordinate fallback, performs the Playwright click, and returns stats
async click( targetId: string, opts: ClickTarget & { button?: "left" | "right" | "middle"; clickCount?: number; timeoutMs?: number; } = {}, ): Promise<{ totalMs: number; eventsDispatched: number; clickedAt: Point; resolvedBy: string }> { const page = await getPageForTarget(targetId); const button = opts.button ?? "left"; const clickCount = opts.clickCount ?? 1; const timeout = opts.timeoutMs ?? 15_000; const start = Date.now(); const resolvedBy = resolvedByLabel(opts); const locator = resolveLocator(page, opts); if (locator) { await locator.click({ button, clickCount, timeout }); const box = await locator.boundingBox({ timeout: 5_000 }).catch(() => null); const center: Point = box ? { x: box.x + box.width / 2, y: box.y + box.height / 2 } : { x: 0, y: 0 }; const state = getMouseState(targetId); state.x = center.x; state.y = center.y; return { totalMs: Date.now() - start, eventsDispatched: 1, clickedAt: center, resolvedBy }; } if (opts.x !== undefined && opts.y !== undefined) { await page.mouse.click(opts.x, opts.y, { button, clickCount }); const state = getMouseState(targetId); state.x = opts.x; state.y = opts.y; return { totalMs: Date.now() - start, eventsDispatched: 1, clickedAt: { x: opts.x, y: opts.y }, resolvedBy, }; } throw new Error("Provide one of: selector, role (+ name), text, label, or x+y coordinates."); } - src/tools/humanizer.ts:58-73 (schema)Zod input schema for humanizer_click tool: target_id, selector, role, name, text, label, x, y, button, click_count, timeout_ms
{ target_id: z.string().describe("Target ID from interceptor_browser_launch or interceptor_camoufox_launch"), selector: z.string().optional().describe("CSS or XPath selector (e.g. 'button.submit', '//button[@id=\"go\"]')"), role: z.string().optional().describe("ARIA role (e.g. 'button', 'link', 'textbox')"), name: z.string().optional().describe("Accessible name; used with role (e.g. 'Sign in')"), text: z.string().optional().describe("Visible text to match (e.g. 'Accept cookies')"), label: z.string().optional().describe("Form-field label text (e.g. 'Email address')"), x: z.number().optional().describe("X coordinate fallback when no locator is given"), y: z.number().optional().describe("Y coordinate fallback when no locator is given"), button: z.enum(["left", "right", "middle"]).optional().default("left") .describe("Mouse button (default: left)"), click_count: z.number().optional().default(1) .describe("Number of clicks (default: 1, use 2 for double-click)"), timeout_ms: z.number().optional().default(15000) .describe("Max ms to wait for locator to be visible + actionable (default: 15000)"), }, - src/humanizer/engine.ts:43-70 (helper)ClickTarget interface and resolveLocator() / resolvedByLabel() helper functions that translate the abstract click options into Playwright locator calls
export interface ClickTarget { selector?: string; role?: string; name?: string; text?: string; label?: string; x?: number; y?: number; } function resolveLocator(page: Page, opts: ClickTarget): Locator | null { if (opts.selector) return page.locator(opts.selector); if (opts.role) { // eslint-disable-next-line @typescript-eslint/no-explicit-any return page.getByRole(opts.role as any, opts.name ? { name: opts.name } : undefined); } if (opts.text) return page.getByText(opts.text); if (opts.label) return page.getByLabel(opts.label); return null; } function resolvedByLabel(opts: ClickTarget): string { if (opts.selector) return "selector"; if (opts.role) return "role"; if (opts.text) return "text"; if (opts.label) return "label"; return "coords"; } - src/browser/session.ts:127-137 (helper)getPageForTarget() — resolves any target_id (cloakbrowser or camoufox) to a Playwright Page, used by humanizerEngine.click() to get the page instance
export async function getPageForTarget(targetId: string): Promise<Page> { const entry = getEntry(targetId); if (isCamoufoxTargetId(targetId)) { return ensureCamoufoxPage(entry as CamoufoxEntryWithDriver); } const browserEntry = entry as BrowserTargetEntry; if (browserEntry.page.isClosed()) { throw new Error(`Page for browser target '${targetId}' is closed.`); } return browserEntry.page; }