Skip to main content
Glama

rod-mcp

by go-rod
common.go9.85 kB
package tools import ( "context" "errors" "fmt" "github.com/charmbracelet/log" "github.com/go-rod/rod-mcp/types" "github.com/go-rod/rod-mcp/utils" "github.com/go-rod/rod/lib/input" "github.com/go-rod/rod/lib/proto" "github.com/mark3labs/mcp-go/mcp" "github.com/mark3labs/mcp-go/server" "os" "path/filepath" "time" ) const ( defaultWaitStableDur = 1 * time.Second defaultDomDiff = 0.2 ) const ( NavigationToolKey = "rod_navigate" GoBackToolKey = "rod_go_back" GoForwardToolKey = "rod_go_forward" ReloadToolKey = "rod_reload" PressKeyToolKey = "rod_press" PdfToolKey = "rod_pdf" ScreenshotToolKey = "rod_screenshot" EvaluateToolKey = "rod_evaluate" CloseBrowserToolKey = "rod_close_browser" ) var ( Navigation = mcp.NewTool("rod_navigate", mcp.WithDescription("Navigate to a URL"), mcp.WithString("url", mcp.Description("URL to navigate to"), mcp.Required()), ) GoBack = mcp.NewTool(GoBackToolKey, mcp.WithDescription("Go back in the browser history, go back to the previous page"), ) GoForward = mcp.NewTool(GoForwardToolKey, mcp.WithDescription("Go forward in the browser history, go to the next page"), ) ReLoad = mcp.NewTool(ReloadToolKey, mcp.WithDescription("Reload the current page"), ) PressKey = mcp.NewTool(PressKeyToolKey, mcp.WithDescription("Press a key on the keyboard"), mcp.WithString("key", mcp.Description("Name of the key to press or a character to generate, such as `ArrowLeft` or `a`"), mcp.Required()), ) Pdf = mcp.NewTool(PdfToolKey, mcp.WithDescription("Generate a PDF from the current page"), mcp.WithString("file_path", mcp.Description("Path to save the PDF file"), mcp.Required()), mcp.WithString("file_name", mcp.Description("Name of the PDF file"), mcp.Required()), ) CloseBrowser = mcp.NewTool(CloseBrowserToolKey, mcp.WithDescription("Close the browser"), ) Screenshot = mcp.NewTool(ScreenshotToolKey, mcp.WithDescription("Take a screenshot of the current page or a specific element"), mcp.WithString("name", mcp.Description("Name of the screenshot"), mcp.Required()), mcp.WithString("selector", mcp.Description("CSS selector of the element to take a screenshot of")), mcp.WithNumber("width", mcp.Description("Width in pixels (default: 800)")), mcp.WithNumber("height", mcp.Description("Height in pixels (default: 600)")), ) Evaluate = mcp.NewTool(EvaluateToolKey, mcp.WithDescription("Execute JavaScript in the browser console"), mcp.WithString("script", mcp.Description("A function name or an unnamed function definition"), mcp.Required()), ) ) var ( NavigationHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { url := request.Params.Arguments["url"].(string) if !utils.IsHttp(url) { log.Errorf("Invalid URL: %s", url) return nil, errors.New("invalid URL") } page, err := rodCtx.EnsurePage() if err != nil { log.Errorf("Failed to navigate to %s: %s", url, err.Error()) return nil, errors.New(fmt.Sprintf("Failed to navigate to %s: %s", url, err.Error())) } err = page.Navigate(url) if err != nil { log.Errorf("Failed to navigate to %s: %s", url, err.Error()) return nil, errors.New(fmt.Sprintf("Failed to navigate to %s: %s", url, err.Error())) } page.WaitDOMStable(defaultWaitStableDur, defaultDomDiff) return mcp.NewToolResultText(fmt.Sprintf("Navigated to %s", url)), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: rodCtx.CurrentMode() == types.Text}) } GoBackHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { page, err := rodCtx.ControlledPage() if err != nil { log.Errorf("Failed to go back: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to go back: %s", err.Error())) } err = page.NavigateBack() if err != nil { log.Errorf("Failed to go back: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to go back: %s", err.Error())) } page.WaitDOMStable(defaultWaitStableDur, defaultDomDiff) return mcp.NewToolResultText("Go back successfully"), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: rodCtx.CurrentMode() == types.Text}) } GoForwardHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { page, err := rodCtx.ControlledPage() if err != nil { log.Errorf("Failed to go forward: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to go forward: %s", err.Error())) } err = page.NavigateForward() if err != nil { log.Errorf("Failed to go forward: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to go forward: %s", err.Error())) } page.WaitDOMStable(defaultWaitStableDur, defaultDomDiff) return mcp.NewToolResultText("Go forward successfully"), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: rodCtx.CurrentMode() == types.Text}) } ReLoadHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { page, err := rodCtx.ControlledPage() if err != nil { log.Errorf("Failed to reload current page: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to reload current page: %s", err.Error())) } err = page.Reload() if err != nil { log.Errorf("Failed to reload current page: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to reload current page: %s", err.Error())) } page.WaitDOMStable(defaultWaitStableDur, defaultDomDiff) return mcp.NewToolResultText("Reload current page successfully"), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: rodCtx.CurrentMode() == types.Text}) } PressKeyHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { page, err := rodCtx.ControlledPage() if err != nil { log.Errorf("Failed to press key: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to press key: %s", err.Error())) } key := request.Params.Arguments["key"].(rune) err = page.Keyboard.Type(input.Key(key)) if err != nil { log.Errorf("Failed to press key %s: %s", string(key), err.Error()) return nil, errors.New(fmt.Sprintf("Failed to press key %s: %s", string(key), err.Error())) } page.WaitDOMStable(defaultWaitStableDur, defaultDomDiff) return mcp.NewToolResultText(fmt.Sprintf("Press key %s successfully", string(key))), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: rodCtx.CurrentMode() == types.Text}) } CloseBrowserHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { err := rodCtx.CloseBrowser() if err != nil { log.Errorf("Failed to close browser: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to close browser: %s", err.Error())) } return mcp.NewToolResultText("Close browser successfully"), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: false}) } EvaluateHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { page, err := rodCtx.ControlledPage() if err != nil { log.Errorf("Failed to evaluate: %s", err.Error()) } script := request.Params.Arguments["script"].(string) r, err := proto.RuntimeEvaluate{ Expression: script, ObjectGroup: "console", IncludeCommandLineAPI: true, }.Call(page) if err != nil { log.Errorf("Failed to evaluate code: %s", err.Error()) return nil, errors.New(fmt.Sprintf("Failed to evaluate code: %s", err.Error())) } return mcp.NewToolResultText(fmt.Sprintf("Evaluate code successfully with result: %s", r.Result.Value.String())), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: false}) } ScreenshotHandler = func(rodCtx *types.Context) server.ToolHandlerFunc { handler := func(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) { page, err := rodCtx.ControlledPage() if err != nil { log.Errorf("Failed to screenshot: %s", err.Error()) } req := &proto.PageCaptureScreenshot{ Format: proto.PageCaptureScreenshotFormatPng, } bin, err := page.Screenshot(false, req) if err != nil { log.Errorf("Failed to screenshot: %s", err.Error()) } fileName := request.Params.Arguments["name"].(string) toFile := []string{"tmp", "screenshots", fileName + ".png"} filePath := filepath.Join(toFile...) err = os.WriteFile(filePath, bin, 0o664) if err != nil { log.Errorf("Failed to screenshot: %s", err.Error()) } return mcp.NewToolResultText(fmt.Sprintf("Save to %s", filePath)), nil } return rodCtx.Execute(handler, types.ToolHandlerCallOpts{WitSnapshot: false}) } ) var ( CommonTools = []mcp.Tool{ Navigation, GoBack, GoForward, ReLoad, PressKey, Screenshot, Evaluate, CloseBrowser, } CommonToolHandlers = map[string]ToolHandler{ NavigationToolKey: NavigationHandler, GoBackToolKey: GoBackHandler, GoForwardToolKey: GoForwardHandler, ReloadToolKey: ReLoadHandler, PressKeyToolKey: PressKeyHandler, ScreenshotToolKey: ScreenshotHandler, EvaluateToolKey: EvaluateHandler, CloseBrowserToolKey: CloseBrowserHandler, } )

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/go-rod/rod-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server