Skip to main content
Glama
tools.ts17.3 kB
import zodToJsonSchema from "zod-to-json-schema"; import { z } from "zod"; import type { ToolResponse } from "@yetidevworks/shared"; import { ExtensionContext } from "./context.js"; export interface Tool { schema: { name: string; description: string; inputSchema: unknown; }; handle: (context: ExtensionContext, params: unknown) => Promise<ToolResponse>; } const ElementTargetSchema = z.object({ selector: z .string() .min(1) .describe("CSS selector that uniquely identifies the target element"), description: z .string() .optional() .describe("Optional human-readable description of the element"), }); const NavigateSchema = z.object({ url: z.string().describe("The URL to navigate to"), }); const WaitSchema = z.object({ seconds: z .number() .min(0) .describe("The time to wait in seconds"), }); const PressKeySchema = z.object({ key: z .string() .min(1) .describe("Name of the key to press or a character to generate, such as `ArrowLeft` or `a`"), }); const TypeSchema = ElementTargetSchema.extend({ text: z.string().describe("Text to type into the element"), submit: z .boolean() .default(false) .describe("Whether to submit entered text (press Enter after)"), }); const SelectOptionSchema = ElementTargetSchema.extend({ values: z .array(z.string()) .nonempty() .describe( "Array of values to select in the dropdown. This can be a single value or multiple values.", ), }); const ScreenshotSchema = z.object({ fullPage: z .boolean() .optional() .describe("Whether to capture the full page instead of the visible viewport"), }); const WaitForSelectorSchema = z.object({ selector: z .string() .min(1) .describe("CSS selector to wait for"), timeoutMs: z .number() .int() .min(0) .max(120_000) .optional() .describe("Optional timeout in milliseconds. Defaults to 5000ms."), visible: z .boolean() .optional() .describe("When true, wait until the element is visible (non-zero size and not hidden)"), }); const FormFieldValueSchema = z.union([z.string(), z.number(), z.boolean(), z.null()]); const FormFieldSchema = z .object({ selector: z .string() .min(1) .describe("CSS selector for the form control to update"), value: FormFieldValueSchema.optional().describe("Value to apply to the field (string/number/boolean)"), values: z .array(z.string()) .nonempty() .optional() .describe("List of values to select for multi-select inputs"), submit: z .boolean() .optional() .describe("Submit the containing form after setting this field"), description: z .string() .optional() .describe("Optional description to help debugging errors"), type: z .enum(["auto", "text", "textarea", "select", "checkbox", "radio", "contentEditable"]) .optional() .describe("Override automatic element detection"), }) .refine((data) => typeof data.value !== "undefined" || data.values, { message: "Provide either a value or values for each form field", path: ["value"], }); const FillFormSchema = z.object({ fields: z.array(FormFieldSchema).min(1), }); const EvaluateSchema = z.object({ script: z .string() .min(1) .describe("JavaScript function expression to execute in the page context, e.g. `(el) => el.textContent`"), args: z .array(z.any()) .optional() .describe("Optional array of arguments passed to the function"), timeoutMs: z .number() .int() .min(0) .max(120_000) .optional() .describe("Optional timeout in milliseconds. Defaults to no timeout."), }); const HandleDialogSchema = z.object({ action: z .enum(["accept", "dismiss"]) .describe("Whether to accept or dismiss the active JavaScript dialog"), promptText: z .string() .optional() .describe("Optional text to enter into prompt dialogs before accepting"), }); const DragSchema = z.object({ fromSelector: z .string() .min(1) .describe("CSS selector for the element to start the drag from"), toSelector: z .string() .min(1) .describe("CSS selector for the element to drop onto"), steps: z .number() .int() .min(1) .max(200) .optional() .describe("Optional number of intermediate drag steps. Defaults to 12."), description: z .string() .optional() .describe("Optional human-readable description of the drag target"), }); function noInputSchema() { return zodToJsonSchema(z.object({})); } function buildSnapshotTool(name: string, description: string, command: "navigate" | "goBack" | "goForward") { const isNavigate = command === "navigate"; const inputSchema = isNavigate ? zodToJsonSchema(NavigateSchema) : noInputSchema(); return { schema: { name, description, inputSchema, }, handle: async (context: ExtensionContext, params: unknown) => { if (isNavigate) { const { url } = NavigateSchema.parse(params); await context.call("navigate", { url }); return context.captureSnapshot(`Navigated to ${url}`); } if (command === "goBack") { await context.call("goBack", {}); return context.captureSnapshot("Navigated back"); } await context.call("goForward", {}); return context.captureSnapshot("Navigated forward"); }, } satisfies Tool; } export function createTools(): Tool[] { const snapshotTool: Tool = { schema: { name: "browser_snapshot", description: "Capture accessibility snapshot of the current page. Use this for getting references to elements to interact with.", inputSchema: noInputSchema(), }, handle: async (context) => { return context.captureSnapshot(); }, }; const snapshotDiffTool: Tool = { schema: { name: "browser_snapshot_diff", description: "Compare the most recent snapshot with the previous one to highlight DOM changes", inputSchema: noInputSchema(), }, handle: async (context) => { return context.diffLatestSnapshots(); }, }; const waitTool: Tool = { schema: { name: "browser_wait", description: "Wait for a specified time in seconds", inputSchema: zodToJsonSchema(WaitSchema), }, handle: async (context, params) => { const { seconds } = WaitSchema.parse(params); await context.call("wait", { seconds }); return { content: [ { type: "text", text: `Waited for ${seconds} seconds`, }, ], }; }, }; const waitForTool: Tool = { schema: { name: "browser_wait_for", description: "Wait until a selector appears (optionally visible) before continuing. Returns a fresh snapshot after the element is detected.", inputSchema: zodToJsonSchema(WaitForSelectorSchema), }, handle: async (context, params) => { const { selector, timeoutMs, visible } = WaitForSelectorSchema.parse(params); await context.call("waitFor", { selector, timeoutMs, visible }); const parts = [ `Waited for selector "${selector}".`, visible ? "Required element to be visible." : undefined, typeof timeoutMs === "number" ? `Timeout: ${timeoutMs}ms.` : undefined, ].filter(Boolean); parts.push('Call `browser_snapshot` if you need a refreshed DOM listing.'); return { content: [ { type: "text", text: parts.join(" "), }, ], }; }, }; const pressKeyTool: Tool = { schema: { name: "browser_press_key", description: "Press a key on the keyboard", inputSchema: zodToJsonSchema(PressKeySchema), }, handle: async (context, params) => { const { key } = PressKeySchema.parse(params); await context.call("pressKey", { key }); return { content: [ { type: "text", text: `Pressed key ${key}`, }, ], }; }, }; const clickTool: Tool = { schema: { name: "browser_click", description: "Perform click on a web page", inputSchema: zodToJsonSchema(ElementTargetSchema), }, handle: async (context, params) => { const parsed = ElementTargetSchema.parse(params); await context.call("click", { selector: parsed.selector, description: parsed.description }); return context.captureSnapshot( `Clicked "${parsed.description ?? parsed.selector}"`, ); }, }; const hoverTool: Tool = { schema: { name: "browser_hover", description: "Hover over element on page", inputSchema: zodToJsonSchema(ElementTargetSchema), }, handle: async (context, params) => { const parsed = ElementTargetSchema.parse(params); await context.call("hover", { selector: parsed.selector, description: parsed.description }); return context.captureSnapshot( `Hovered over "${parsed.description ?? parsed.selector}"`, ); }, }; const dragTool: Tool = { schema: { name: "browser_drag", description: "Drag an element (like cards in a kanban board) onto a target element. Useful for sortable UIs.", inputSchema: zodToJsonSchema(DragSchema), }, handle: async (context, params) => { const { fromSelector, toSelector, steps, description } = DragSchema.parse(params); await context.call("drag", { fromSelector, toSelector, steps, description }); const summary = [ `Dragged "${fromSelector}" onto "${toSelector}".`, typeof steps === "number" ? `Steps: ${steps}.` : undefined, description ? `Context: ${description}.` : undefined, 'Run `browser_snapshot` if you need a DOM snapshot after the drag.', ] .filter(Boolean) .join(" "); return { content: [ { type: "text", text: summary, }, ], }; }, }; const typeTool: Tool = { schema: { name: "browser_type", description: "Type text into editable element", inputSchema: zodToJsonSchema(TypeSchema), }, handle: async (context, params) => { const { selector, text, submit, description } = TypeSchema.parse(params); await context.call("type", { selector, text, submit, description }); return context.captureSnapshot( `Typed "${text}" into "${description ?? selector}"`, ); }, }; const fillFormTool: Tool = { schema: { name: "browser_fill_form", description: "Fill multiple form fields in a single call. Supports inputs, textareas, selects, checkboxes, and radios.", inputSchema: zodToJsonSchema(FillFormSchema), }, handle: async (context, params) => { const { fields } = FillFormSchema.parse(params); const result = await context.call("fillForm", { fields }); const summaryLines = [ `Filled ${result.filled}/${result.attempted} fields.`, result.errors.length ? `Issues:\n${result.errors.map((err) => `- ${err}`).join("\n")}` : "No validation errors reported.", 'Call `browser_snapshot` if you need to inspect the page state after filling.', ]; return { content: [ { type: "text", text: summaryLines.join("\n"), }, ], }; }, }; const selectOptionTool: Tool = { schema: { name: "browser_select_option", description: "Select an option in a dropdown", inputSchema: zodToJsonSchema(SelectOptionSchema), }, handle: async (context, params) => { const { selector, values, description } = SelectOptionSchema.parse(params); await context.call("selectOption", { selector, values, description }); return context.captureSnapshot( `Selected option in "${description ?? selector}"`, ); }, }; const screenshotTool: Tool = { schema: { name: "browser_screenshot", description: "Take a screenshot of the current page", inputSchema: zodToJsonSchema(ScreenshotSchema), }, handle: async (context, params) => { const { fullPage } = ScreenshotSchema.parse(params); const result = await context.call("screenshot", { fullPage }); return { content: [ { type: "image", data: result.data, mimeType: result.mimeType, }, ], }; }, }; const consoleLogsTool: Tool = { schema: { name: "browser_get_console_logs", description: "Get the console logs from the browser", inputSchema: noInputSchema(), }, handle: async (context) => { const logs = await context.call("getConsoleLogs", {}); const text = logs .map((log) => { const time = new Date(log.timestamp).toISOString(); const lines = [`[${time}] [${log.level}] ${log.message}`]; if (log.stack) { lines.push(log.stack); } return lines.join("\n"); }) .join("\n\n"); return { content: [ { type: "text", text: text || "No console output captured", }, ], }; }, }; const pageStateTool: Tool = { schema: { name: "browser_page_state", description: "Extract form data, storage values, and cookies from the active page", inputSchema: noInputSchema(), }, handle: async (context) => { const state = await context.call("pageState", {}); const summary = [ `Page state captured ${state.capturedAt}`, `- Forms inspected: ${state.forms.length}`, `- localStorage keys: ${state.localStorage.length}`, `- sessionStorage keys: ${state.sessionStorage.length}`, `- Cookies: ${state.cookies.length}`, "", "```json", JSON.stringify(state, null, 2), "```", ].join("\n"); return { content: [ { type: "text", text: summary, }, ], }; }, }; const evaluateTool: Tool = { schema: { name: "browser_evaluate", description: "Run custom JavaScript inside the page context and return the JSON-serializable result.", inputSchema: zodToJsonSchema(EvaluateSchema), }, handle: async (context, params) => { const { script, args, timeoutMs } = EvaluateSchema.parse(params); const { value } = await context.call("evaluate", { script, args, timeoutMs }); let formatted: string; try { formatted = JSON.stringify(value, null, 2); } catch (error) { formatted = String(value); } const MAX_OUTPUT = 4_000; if (formatted.length > MAX_OUTPUT) { formatted = `${formatted.slice(0, MAX_OUTPUT)}… (truncated)`; } return { content: [ { type: "text", text: `Evaluation result:\n\`\`\`json\n${formatted}\n\`\`\``, }, ], }; }, }; const handleDialogTool: Tool = { schema: { name: "browser_handle_dialog", description: "Accept or dismiss the currently open alert/confirm/prompt dialog in the active tab.", inputSchema: zodToJsonSchema(HandleDialogSchema), }, handle: async (context, params) => { const { action, promptText } = HandleDialogSchema.parse(params); await context.call("handleDialog", { action, promptText }); const summary = `Dialog ${action === "accept" ? "accepted" : "dismissed"}${ promptText ? ` with prompt text "${promptText}"` : "" }.`; return { content: [ { type: "text", text: summary, }, ], }; }, }; const connectionInfoTool: Tool = { schema: { name: "browser_connection_info", description: "Show the MCP bridge WebSocket port, connection state, and extension info", inputSchema: noInputSchema(), }, handle: async (context) => { const info = context.getConnectionInfo(); const lines = [ `WebSocket port: ${info.wsPort}`, `Extension connected: ${info.connected ? "yes" : "no"}`, ]; if (info.extension) { const versionSuffix = info.extension.version ? ` v${info.extension.version}` : ""; lines.push(`Extension hello: ${info.extension.client}${versionSuffix}`); } return { content: [ { type: "text", text: lines.join("\n"), }, ], }; }, }; return [ snapshotTool, snapshotDiffTool, buildSnapshotTool("browser_navigate", "Navigate to a URL", "navigate"), buildSnapshotTool("browser_go_back", "Go back to the previous page", "goBack"), buildSnapshotTool("browser_go_forward", "Go forward to the next page", "goForward"), waitTool, waitForTool, pressKeyTool, clickTool, hoverTool, dragTool, typeTool, fillFormTool, selectOptionTool, screenshotTool, consoleLogsTool, pageStateTool, evaluateTool, handleDialogTool, connectionInfoTool, ]; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yetidevworks/yetibrowser-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server