Playwright Browserbase MCP Server

2,054

Overview InspectNew Endpoints Schema Related Servers Reviews Score

snapshot.ts•15.5 kB

import { z } from "zod"; import type { TextContent, ImageContent, } from "@modelcontextprotocol/sdk/types.js"; import type { Locator, PageScreenshotOptions } from "playwright-core"; import { defineTool, type ToolResult, } from "./tool.js"; import type { Context, ToolActionResult } from "../context.js"; import { PageSnapshot } from "../pageSnapshot.js"; import { outputFile } from "../config.js"; // --- Tool: Snapshot --- const SnapshotInputSchema = z.object({}); type SnapshotInput = z.infer<typeof SnapshotInputSchema>; const snapshot = defineTool<typeof SnapshotInputSchema>({ capability: "core", schema: { name: "browserbase_snapshot", description: "Capture a new accessibility snapshot of the current page state. Use this if the page has changed to ensure subsequent actions use an up-to-date page representation.", inputSchema: SnapshotInputSchema, }, handle: async ( context: Context, params: SnapshotInput ): Promise<ToolResult> => { const action = async (): Promise<ToolActionResult> => { const content: (TextContent | ImageContent)[] = [ { type: "text", text: "Accessibility snapshot captured." }, ]; return { content }; }; return { action, code: [`// Request accessibility snapshot`], captureSnapshot: true, waitForNetwork: false, resultOverride: { content: [{ type: "text", text: "Accessibility snapshot initiated." }], }, }; }, }); // --- Element Schema & Types --- const elementSchema = z.object({ element: z.string().describe("Human-readable element description"), ref: z .string() .describe("Exact target element reference from the page snapshot"), }); type ElementInput = z.infer<typeof elementSchema>; // --- Tool: Click (Adapted Handle, Example Action) --- const click = defineTool({ capability: "core", schema: { name: "browserbase_click", description: "Perform click on a web page using ref", inputSchema: elementSchema, }, handle: async ( context: Context, params: ElementInput ): Promise<ToolResult> => { // Get locator directly from snapshot const snapshot = context.snapshotOrDie(); const locator = snapshot.refLocator(params.ref); const code = [ `// Click ${params.element}`, // Use generateLocator for code string `// await page.${await generateLocator(locator)}.click();`, ]; const action = async (): Promise<ToolActionResult> => { try { // Use the locator directly for the action await locator.click({ force: true, timeout: 30000 }); // Increased timeout like logs } catch (actionError) { const errorMessage = actionError instanceof Error ? actionError.message : String(actionError); throw new Error( `Failed to click element '${params.element}'. Error: ${errorMessage}` ); } return { content: [{ type: "text", text: `Clicked ${params.element}` }], }; }; return { code, action, captureSnapshot: true, waitForNetwork: true, }; }, }); // --- Tool: Drag (Adapted Handle, Example Action) --- const dragInputSchema = z.object({ startElement: z.string().describe("Source element description"), startRef: z .string() .describe("Exact source element reference from the page snapshot"), endElement: z.string().describe("Target element description"), endRef: z .string() .describe("Exact target element reference from the page snapshot"), }); type DragInput = z.infer<typeof dragInputSchema>; const drag = defineTool<typeof dragInputSchema>({ capability: "core", schema: { name: "browserbase_drag", description: "Perform drag and drop between two elements using ref.", inputSchema: dragInputSchema, }, handle: async (context: Context, params: DragInput): Promise<ToolResult> => { // Get locators directly from snapshot const snapshot = context.snapshotOrDie(); const startLocator = snapshot.refLocator(params.startRef); const endLocator = snapshot.refLocator(params.endRef); const code = [ `// Drag ${params.startElement} to ${params.endElement}`, // Use generateLocator for code string `// await page.${await generateLocator( startLocator )}.dragTo(page.${await generateLocator(endLocator)});`, ]; const action = async (): Promise<ToolActionResult> => { try { // Use locators directly for the action await startLocator.dragTo(endLocator, { timeout: 5000 }); } catch (dragError) { const errorMsg = dragError instanceof Error ? dragError.message : String(dragError); throw new Error( `Failed to drag '${params.startElement}' to '${params.endElement}'. Error: ${errorMsg}` ); } return { content: [ { type: "text", text: `Dragged ${params.startElement} to ${params.endElement}`, }, ], }; }; return { action, code, captureSnapshot: true, waitForNetwork: true }; }, }); // --- Tool: Hover (Adapted Handle, Example Action) --- const hover = defineTool<typeof elementSchema>({ capability: "core", schema: { name: "browserbase_hover", description: "Hover over element on page using ref.", inputSchema: elementSchema, }, handle: async ( context: Context, params: ElementInput ): Promise<ToolResult> => { // Get locator directly from snapshot const snapshot = context.snapshotOrDie(); const locator = snapshot.refLocator(params.ref); const code = [ `// Hover over ${params.element}`, // Use generateLocator for code string `// await page.${await generateLocator(locator)}.hover();`, ]; const action = async (): Promise<ToolActionResult> => { try { // Use locator directly for the action await locator.hover({ timeout: 5000 }); } catch (hoverError) { const errorMsg = hoverError instanceof Error ? hoverError.message : String(hoverError); throw new Error( `Failed to hover over element '${params.element}'. Error: ${errorMsg}` ); } return { content: [{ type: "text", text: `Hovered over: ${params.element}` }], }; }; return { action, code, captureSnapshot: true, waitForNetwork: true }; }, }); // --- Tool: Type (Adapted Handle, Example Action) --- const typeSchema = elementSchema.extend({ text: z.string().describe("Text to type into the element"), submit: z .boolean() .optional() .describe("Whether to submit entered text (press Enter after)"), slowly: z .boolean() .optional() .default(true) .describe("Whether to type one character at a time."), }); type TypeInput = z.infer<typeof typeSchema>; const type = defineTool<typeof typeSchema>({ capability: "core", schema: { name: "browserbase_type", description: "Type text into editable element using ref.", inputSchema: typeSchema, }, handle: async (context: Context, params: TypeInput): Promise<ToolResult> => { // Get locator directly from snapshot const snapshot = context.snapshotOrDie(); const locator = snapshot.refLocator(params.ref); const code: string[] = []; const steps: (() => Promise<void>)[] = []; if (params.slowly) { code.push( `// Press "${params.text}" sequentially into "${params.element}"` ); code.push( `// await page.${await generateLocator( locator )}.pressSequentially('${params.text.replace(/'/g, "\\'")}');` ); steps.push(() => locator.pressSequentially(params.text, { delay: 50 }) ); } else { code.push(`// Fill "${params.text}" into "${params.element}"`); code.push( `// await page.${await generateLocator( locator )}.fill('${params.text.replace(/'/g, "\\'")}');` ); steps.push(async () => { await locator.waitFor({ state: "visible"}); if (!(await locator.isEditable())) { throw new Error( `Element '${params.element}' was visible but not editable.` ); } await locator.fill("", { force: true, timeout: 5000 }); // Force empty fill first await locator.fill(params.text, { force: true, timeout: 5000 }); // Force fill with text }); } if (params.submit) { code.push(`// Submit text`); code.push( `// await page.${await generateLocator(locator)}.press('Enter');` ); steps.push(() => locator.press("Enter", { timeout: 5000 })); } const action = async (): Promise<ToolActionResult> => { try { // Execute the steps sequentially await steps.reduce((acc, step) => acc.then(step), Promise.resolve()); } catch (typeError) { const errorMsg = typeError instanceof Error ? typeError.message : String(typeError); throw new Error( `Failed to type into or submit element '${params.element}'. Error: ${errorMsg}` ); } return { content: [ { type: "text", text: `Typed "${params.text}" into: ${params.element}${ params.submit ? " and submitted" : "" }`, }, ], }; }; return { action, code, captureSnapshot: true, waitForNetwork: true }; }, }); // --- Tool: Select Option (Adapted Handle, Example Action) --- const selectOptionSchema = elementSchema.extend({ values: z .array(z.string()) .describe("Array of values to select in the dropdown."), }); type SelectOptionInput = z.infer<typeof selectOptionSchema>; const selectOption = defineTool<typeof selectOptionSchema>({ capability: "core", schema: { name: "browserbase_select_option", description: "Select an option in a dropdown using ref.", inputSchema: selectOptionSchema, }, handle: async ( context: Context, params: SelectOptionInput ): Promise<ToolResult> => { // Get locator directly from snapshot const snapshot = context.snapshotOrDie(); const locator = snapshot.refLocator(params.ref); const code = [ `// Select options [${params.values.join(", ")}] in ${params.element}`, // Remove javascript.formatObject, use simple JSON.stringify for code comment `// await page.${await generateLocator( locator )}.selectOption(${JSON.stringify(params.values)});`, ]; const action = async (): Promise<ToolActionResult> => { try { // Use locator directly for the action await locator.waitFor({ state: "visible", timeout: 5000 }); await locator.selectOption(params.values, { timeout: 5000 }); } catch (selectError) { const errorMsg = selectError instanceof Error ? selectError.message : String(selectError); throw new Error( `Failed to select option(s) in element '${params.element}'. Error: ${errorMsg}` ); } return { content: [ { type: "text", text: `Selected options in: ${params.element}` }, ], }; }; return { action, code, captureSnapshot: true, waitForNetwork: true }; }, }); // --- Tool: Screenshot (Adapted Handle, Example Action) --- const screenshotSchema = z.object({ raw: z .boolean() .optional() .describe( "Whether to return without compression (PNG). Default is false (JPEG)." ), element: z .string() .optional() .describe("Human-readable element description."), ref: z .string() .optional() .describe("Exact target element reference from the page snapshot.") }); type ScreenshotInput = z.infer<typeof screenshotSchema>; const screenshot = defineTool<typeof screenshotSchema>({ capability: "core", schema: { name: "browserbase_take_screenshot", description: `Take a screenshot of the current page or element using ref.`, inputSchema: screenshotSchema, }, handle: async ( context: Context, params: ScreenshotInput ): Promise<ToolResult> => { if (!!params.element !== !!params.ref) { throw new Error("Both element and ref must be provided or neither."); } const page = await context.getActivePage(); if (!page) { throw new Error("No active page found for screenshot"); } // Conditionally get snapshot only if ref is provided let pageSnapshot: PageSnapshot | null = null; if (params.ref) { pageSnapshot = context.snapshotOrDie(); } const fileType = params.raw ? "png" : "jpeg"; const fileName = await outputFile( context.config, `screenshot-${Date.now()}.${fileType}` ); const baseOptions: PageScreenshotOptions = { scale: "css", timeout: 15000, // Kept existing timeout }; let options: PageScreenshotOptions; if (fileType === "jpeg") { options = { ...baseOptions, type: "jpeg", quality: 50, // Quality is only for jpeg path: fileName, }; } else { options = { ...baseOptions, type: "png", path: fileName, }; } const isElementScreenshot = params.element && params.ref; const code: string[] = []; code.push( `// Screenshot ${ isElementScreenshot ? params.element : "viewport" } and save it as ${fileName}` ); // Conditionally get locator only if ref and snapshot are available const locator = params.ref && pageSnapshot ? pageSnapshot.refLocator(params.ref) : null; // Use JSON.stringify for code generation as javascript.formatObject is not available const optionsForCode = { ...options }; // delete optionsForCode.path; // Path is an internal detail for saving, not usually part of the "command" log if (locator) { code.push( `// await page.${await generateLocator( locator )}.screenshot(${JSON.stringify(optionsForCode)});` ); } else { code.push(`// await page.screenshot(${JSON.stringify(optionsForCode)});`); } const action = async (): Promise<ToolActionResult> => { // Access config via context.config const includeBase64 = !context.config.tools?.browserbase_take_screenshot?.omitBase64; // Use the page directly for full page screenshots if locator is null const screenshotBuffer = locator ? await locator.screenshot(options) : await page.screenshot(options); if (includeBase64) { const rawBase64 = screenshotBuffer.toString("base64"); return { content: [ { type: "image", format: fileType, // format might be redundant if mimeType is present, but kept for now mimeType: fileType === "png" ? `image/png` : `image/jpeg`, data: rawBase64, }, ], }; } else { // If base64 is not included, return an empty content array return { content: [] }; } }; return { code, action, captureSnapshot: true, waitForNetwork: false, }; }, }); export async function generateLocator(locator: Locator): Promise<string> { return (locator as any)._generateLocatorString(); } export default [snapshot, click, drag, hover, type, selectOption, screenshot];

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ampcome-mcps/browserbase-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server