YetiBrowser MCP

tools.ts•16.9 KiB

import zodToJsonSchema from "zod-to-json-schema"; import { z } from "zod"; import type { ToolResponse } from "@yetidevworks/shared"; import { ExtensionContext } from "./context.js"; export interface Tool { schema: { name: string; description: string; inputSchema: unknown; }; handle: (context: ExtensionContext, params: unknown) => Promise<ToolResponse>; } const ElementTargetSchema = z.object({ selector: z .string() .min(1) .describe("CSS selector that uniquely identifies the target element"), description: z .string() .optional() .describe("Optional human-readable description of the element"), }); const NavigateSchema = z.object({ url: z.string().describe("The URL to navigate to"), }); const WaitSchema = z.object({ seconds: z .number() .min(0) .describe("The time to wait in seconds"), }); const PressKeySchema = z.object({ key: z .string() .min(1) .describe("Name of the key to press or a character to generate, such as `ArrowLeft` or `a`"), }); const TypeSchema = ElementTargetSchema.extend({ text: z.string().describe("Text to type into the element"), submit: z .boolean() .default(false) .describe("Whether to submit entered text (press Enter after)"), }); const SelectOptionSchema = ElementTargetSchema.extend({ values: z .array(z.string()) .nonempty() .describe( "Array of values to select in the dropdown. This can be a single value or multiple values.", ), }); const ScreenshotSchema = z.object({ fullPage: z .boolean() .optional() .describe("Whether to capture the full page instead of the visible viewport"), }); const WaitForSelectorSchema = z.object({ selector: z .string() .min(1) .describe("CSS selector to wait for"), timeoutMs: z .number() .int() .min(0) .max(120_000) .optional() .describe("Optional timeout in milliseconds. Defaults to 5000ms."), visible: z .boolean() .optional() .describe("When true, wait until the element is visible (non-zero size and not hidden)"), }); const FormFieldValueSchema = z.union([z.string(), z.number(), z.boolean(), z.null()]); const FormFieldSchema = z .object({ selector: z .string() .min(1) .describe("CSS selector for the form control to update"), value: FormFieldValueSchema.optional().describe("Value to apply to the field (string/number/boolean)"), values: z .array(z.string()) .nonempty() .optional() .describe("List of values to select for multi-select inputs"), submit: z .boolean() .optional() .describe("Submit the containing form after setting this field"), description: z .string() .optional() .describe("Optional description to help debugging errors"), type: z .enum(["auto", "text", "textarea", "select", "checkbox", "radio", "contentEditable"]) .optional() .describe("Override automatic element detection"), }) .refine((data) => typeof data.value !== "undefined" || data.values, { message: "Provide either a value or values for each form field", path: ["value"], }); const FillFormSchema = z.object({ fields: z.array(FormFieldSchema).min(1), }); const EvaluateSchema = z.object({ script: z .string() .min(1) .describe("JavaScript function expression to execute in the page context, e.g. `(el) => el.textContent`"), args: z .array(z.any()) .optional() .describe("Optional array of arguments passed to the function"), timeoutMs: z .number() .int() .min(0) .max(120_000) .optional() .describe("Optional timeout in milliseconds. Defaults to no timeout."), }); const HandleDialogSchema = z.object({ action: z .enum(["accept", "dismiss"]) .describe("Whether to accept or dismiss the active JavaScript dialog"), promptText: z .string() .optional() .describe("Optional text to enter into prompt dialogs before accepting"), }); const DragSchema = z.object({ fromSelector: z .string() .min(1) .describe("CSS selector for the element to start the drag from"), toSelector: z .string() .min(1) .describe("CSS selector for the element to drop onto"), steps: z .number() .int() .min(1) .max(200) .optional() .describe("Optional number of intermediate drag steps. Defaults to 12."), description: z .string() .optional() .describe("Optional human-readable description of the drag target"), }); function noInputSchema() { return zodToJsonSchema(z.object({})); } function buildSnapshotTool(name: string, description: string, command: "navigate" | "goBack" | "goForward") { const isNavigate = command === "navigate"; const inputSchema = isNavigate ? zodToJsonSchema(NavigateSchema) : noInputSchema(); return { schema: { name, description, inputSchema, }, handle: async (context: ExtensionContext, params: unknown) => { if (isNavigate) { const { url } = NavigateSchema.parse(params); await context.call("navigate", { url }); return context.captureSnapshot(`Navigated to ${url}`); } if (command === "goBack") { await context.call("goBack", {}); return context.captureSnapshot("Navigated back"); } await context.call("goForward", {}); return context.captureSnapshot("Navigated forward"); }, } satisfies Tool; } export function createTools(): Tool[] { const snapshotTool: Tool = { schema: { name: "browser_snapshot", description: "Capture accessibility snapshot of the current page. Use this for getting references to elements to interact with.", inputSchema: noInputSchema(), }, handle: async (context) => { return context.captureSnapshot(); }, }; const snapshotDiffTool: Tool = { schema: { name: "browser_snapshot_diff", description: "Compare the most recent snapshot with the previous one to highlight DOM changes", inputSchema: noInputSchema(), }, handle: async (context) => { return context.diffLatestSnapshots(); }, }; const waitTool: Tool = { schema: { name: "browser_wait", description: "Wait for a specified time in seconds", inputSchema: zodToJsonSchema(WaitSchema), }, handle: async (context, params) => { const { seconds } = WaitSchema.parse(params); await context.call("wait", { seconds }); return { content: [ { type: "text", text: `Waited for ${seconds} seconds`, }, ], }; }, }; const waitForTool: Tool = { schema: { name: "browser_wait_for", description: "Wait until a selector appears (optionally visible) before continuing. Returns a fresh snapshot after the element is detected.", inputSchema: zodToJsonSchema(WaitForSelectorSchema), }, handle: async (context, params) => { const { selector, timeoutMs, visible } = WaitForSelectorSchema.parse(params); await context.call("waitFor", { selector, timeoutMs, visible }); const parts = [ `Waited for selector "${selector}".`, visible ? "Required element to be visible." : undefined, typeof timeoutMs === "number" ? `Timeout: ${timeoutMs}ms.` : undefined, ].filter(Boolean); parts.push('Call `browser_snapshot` if you need a refreshed DOM listing.'); return { content: [ { type: "text", text: parts.join(" "), }, ], }; }, }; const pressKeyTool: Tool = { schema: { name: "browser_press_key", description: "Press a key on the keyboard", inputSchema: zodToJsonSchema(PressKeySchema), }, handle: async (context, params) => { const { key } = PressKeySchema.parse(params); await context.call("pressKey", { key }); return { content: [ { type: "text", text: `Pressed key ${key}`, }, ], }; }, }; const clickTool: Tool = { schema: { name: "browser_click", description: "Perform click on a web page", inputSchema: zodToJsonSchema(ElementTargetSchema), }, handle: async (context, params) => { const parsed = ElementTargetSchema.parse(params); await context.call("click", { selector: parsed.selector, description: parsed.description }); return context.captureSnapshot( `Clicked "${parsed.description ?? parsed.selector}"`, ); }, }; const hoverTool: Tool = { schema: { name: "browser_hover", description: "Hover over element on page", inputSchema: zodToJsonSchema(ElementTargetSchema), }, handle: async (context, params) => { const parsed = ElementTargetSchema.parse(params); await context.call("hover", { selector: parsed.selector, description: parsed.description }); return context.captureSnapshot( `Hovered over "${parsed.description ?? parsed.selector}"`, ); }, }; const dragTool: Tool = { schema: { name: "browser_drag", description: "Drag an element (like cards in a kanban board) onto a target element. Useful for sortable UIs.", inputSchema: zodToJsonSchema(DragSchema), }, handle: async (context, params) => { const { fromSelector, toSelector, steps, description } = DragSchema.parse(params); await context.call("drag", { fromSelector, toSelector, steps, description }); const summary = [ `Dragged "${fromSelector}" onto "${toSelector}".`, typeof steps === "number" ? `Steps: ${steps}.` : undefined, description ? `Context: ${description}.` : undefined, 'Run `browser_snapshot` if you need a DOM snapshot after the drag.', ] .filter(Boolean) .join(" "); return { content: [ { type: "text", text: summary, }, ], }; }, }; const typeTool: Tool = { schema: { name: "browser_type", description: "Type text into editable element", inputSchema: zodToJsonSchema(TypeSchema), }, handle: async (context, params) => { const { selector, text, submit, description } = TypeSchema.parse(params); await context.call("type", { selector, text, submit, description }); return context.captureSnapshot( `Typed "${text}" into "${description ?? selector}"`, ); }, }; const fillFormTool: Tool = { schema: { name: "browser_fill_form", description: "Fill multiple form fields in a single call. Supports inputs, textareas, selects, checkboxes, and radios.", inputSchema: zodToJsonSchema(FillFormSchema), }, handle: async (context, params) => { const { fields } = FillFormSchema.parse(params); const result = await context.call("fillForm", { fields }); const summaryLines = [ `Filled ${result.filled}/${result.attempted} fields.`, result.errors.length ? `Issues:\n${result.errors.map((err) => `- ${err}`).join("\n")}` : "No validation errors reported.", 'Call `browser_snapshot` if you need to inspect the page state after filling.', ]; return { content: [ { type: "text", text: summaryLines.join("\n"), }, ], }; }, }; const selectOptionTool: Tool = { schema: { name: "browser_select_option", description: "Select an option in a dropdown", inputSchema: zodToJsonSchema(SelectOptionSchema), }, handle: async (context, params) => { const { selector, values, description } = SelectOptionSchema.parse(params); await context.call("selectOption", { selector, values, description }); return context.captureSnapshot( `Selected option in "${description ?? selector}"`, ); }, }; const screenshotTool: Tool = { schema: { name: "browser_screenshot", description: "Take a screenshot of the current page", inputSchema: zodToJsonSchema(ScreenshotSchema), }, handle: async (context, params) => { const { fullPage } = ScreenshotSchema.parse(params); const result = await context.call("screenshot", { fullPage }); return { content: [ { type: "image", data: result.data, mimeType: result.mimeType, }, ], }; }, }; const consoleLogsTool: Tool = { schema: { name: "browser_get_console_logs", description: "Get the console logs from the browser", inputSchema: noInputSchema(), }, handle: async (context) => { const logs = await context.call("getConsoleLogs", {}); const text = logs .map((log) => { const time = new Date(log.timestamp).toISOString(); const lines = [`[${time}] [${log.level}] ${log.message}`]; if (log.stack) { lines.push(log.stack); } return lines.join("\n"); }) .join("\n\n"); return { content: [ { type: "text", text: text || "No console output captured", }, ], }; }, }; const pageStateTool: Tool = { schema: { name: "browser_page_state", description: "Extract form data, storage values, and cookies from the active page", inputSchema: noInputSchema(), }, handle: async (context) => { const state = await context.call("pageState", {}); const summary = [ `Page state captured ${state.capturedAt}`, `- Forms inspected: ${state.forms.length}`, `- localStorage keys: ${state.localStorage.length}`, `- sessionStorage keys: ${state.sessionStorage.length}`, `- Cookies: ${state.cookies.length}`, "", "```json", JSON.stringify(state, null, 2), "```", ].join("\n"); return { content: [ { type: "text", text: summary, }, ], }; }, }; const evaluateTool: Tool = { schema: { name: "browser_evaluate", description: "Run custom JavaScript inside the page context and return the JSON-serializable result.", inputSchema: zodToJsonSchema(EvaluateSchema), }, handle: async (context, params) => { const { script, args, timeoutMs } = EvaluateSchema.parse(params); const { value } = await context.call("evaluate", { script, args, timeoutMs }); let formatted: string; try { formatted = JSON.stringify(value, null, 2); } catch (error) { formatted = String(value); } const MAX_OUTPUT = 4_000; if (formatted.length > MAX_OUTPUT) { formatted = `${formatted.slice(0, MAX_OUTPUT)}… (truncated)`; } return { content: [ { type: "text", text: `Evaluation result:\n\`\`\`json\n${formatted}\n\`\`\``, }, ], }; }, }; const handleDialogTool: Tool = { schema: { name: "browser_handle_dialog", description: "Accept or dismiss the currently open alert/confirm/prompt dialog in the active tab.", inputSchema: zodToJsonSchema(HandleDialogSchema), }, handle: async (context, params) => { const { action, promptText } = HandleDialogSchema.parse(params); await context.call("handleDialog", { action, promptText }); const summary = `Dialog ${action === "accept" ? "accepted" : "dismissed"}${ promptText ? ` with prompt text "${promptText}"` : "" }.`; return { content: [ { type: "text", text: summary, }, ], }; }, }; const connectionInfoTool: Tool = { schema: { name: "browser_connection_info", description: "Show the MCP bridge WebSocket port, connection state, and extension info", inputSchema: noInputSchema(), }, handle: async (context) => { const info = context.getConnectionInfo(); const lines = [ `WebSocket port: ${info.wsPort}`, `Extension connected: ${info.connected ? "yes" : "no"}`, ]; if (info.extension) { const versionSuffix = info.extension.version ? ` v${info.extension.version}` : ""; lines.push(`Extension hello: ${info.extension.client}${versionSuffix}`); } return { content: [ { type: "text", text: lines.join("\n"), }, ], }; }, }; return [ snapshotTool, snapshotDiffTool, buildSnapshotTool("browser_navigate", "Navigate to a URL", "navigate"), buildSnapshotTool("browser_go_back", "Go back to the previous page", "goBack"), buildSnapshotTool("browser_go_forward", "Go forward to the next page", "goForward"), waitTool, waitForTool, pressKeyTool, clickTool, hoverTool, dragTool, typeTool, fillFormTool, selectOptionTool, screenshotTool, consoleLogsTool, pageStateTool, evaluateTool, handleDialogTool, connectionInfoTool, ]; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yetidevworks/yetibrowser-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

tools.ts•16.9 KiB