Scenic MCP

Overview Inspect Schema Related Servers Score Discussions

tools.ex•42.1 kB

defmodule ScenicMcp.Tools do @moduledoc """ Tool handlers for MCP server. Provides handlers for: - Keyboard input - Mouse input - Screenshot capture - Viewport inspection All functions return either `{:ok, result}` or `{:error, reason}` tuples for consistent error handling. """ require Logger # ======================================================================== # Scenic Process Lookup # ======================================================================== @doc """ Get the PID of the configured viewport process. Returns `{:ok, pid}` if found, or `{:error, reason}` if not found. """ @spec viewport_pid() :: {:ok, pid()} | {:error, String.t()} def viewport_pid do viewport_name = ScenicMcp.Config.viewport_name() case Process.whereis(viewport_name) do nil -> {:error, "Unable to find Scenic viewport process ':#{viewport_name}'. " <> "Ensure your Scenic application is running and the viewport is registered with this name. " <> "You can configure a different name with: config :scenic_mcp, viewport_name: :your_name"} pid -> {:ok, pid} end end @doc """ Get the viewport state. Returns `{:ok, state}` if successful, or `{:error, reason}` if the viewport cannot be found or the state cannot be retrieved. """ @spec viewport_state() :: {:ok, map()} | {:error, String.t()} def viewport_state do with {:ok, pid} <- viewport_pid() do state = :sys.get_state(pid, 5000) {:ok, state} end catch :exit, reason -> {:error, "Failed to get viewport state: #{inspect(reason)}"} end @doc """ Get the PID of the configured driver process. Returns `{:ok, pid}` if found, or `{:error, reason}` if not found. """ @spec driver_pid() :: {:ok, pid()} | {:error, String.t()} def driver_pid do driver_name = ScenicMcp.Config.driver_name() case Process.whereis(driver_name) do pid when is_pid(pid) -> {:ok, pid} _otherwise -> {:error, "Unable to find Scenic driver process ':#{driver_name}'"} end catch :exit, reason -> {:error, "Failed to find driver process: #{inspect(reason)}"} end @doc """ Get the driver state. Returns `{:ok, state}` if successful, or `{:error, reason}` if the driver cannot be found or the state cannot be retrieved. """ @spec driver_state() :: {:ok, any()} | {:error, String.t()} def driver_state do with {:ok, pid} <- driver_pid() do state = :sys.get_state(pid, 5000) {:ok, state} end catch :exit, reason -> {:error, "Failed to get driver state: #{inspect(reason)}"} end # ======================================================================== # Input Handling # ======================================================================== @doc """ Send input to the Scenic driver. Returns `{:ok, :sent}` if successful, or `{:error, reason}` if the driver cannot be found or the input cannot be sent. """ @spec send_input(any()) :: {:ok, :sent} | {:error, String.t()} def send_input(input) do with {:ok, driver_struct} <- driver_state() do Scenic.Driver.send_input(driver_struct, input) {:ok, :sent} end catch :exit, reason -> {:error, "Failed to send input: #{inspect(reason)}"} end # ======================================================================== # Tool Handlers (called from server.ex) # ======================================================================== @doc """ Handle keyboard input. Accepts either: - `%{"text" => string}` - Type text character by character - `%{"key" => string, "modifiers" => list}` - Press special key with optional modifiers Returns `{:ok, result_map}` or `{:error, reason}`. """ @spec handle_send_keys(map()) :: {:ok, map()} | {:error, String.t()} def handle_send_keys(%{"text" => text}) when is_binary(text) do with {:ok, driver_struct} <- driver_state() do text |> String.graphemes() |> Enum.each(fn char -> # Convert character to key atom and determine if shift is needed # Scenic uses :key tuples, not :codepoint codepoint = char |> String.to_charlist() |> List.first() {key_atom, modifiers} = codepoint_to_key_with_mods(codepoint) # Send key press and release Scenic.Driver.send_input(driver_struct, {:key, {key_atom, 1, modifiers}}) Process.sleep(5) Scenic.Driver.send_input(driver_struct, {:key, {key_atom, 0, modifiers}}) end) {:ok, %{status: "ok", message: "Text sent: #{text}"}} end end def handle_send_keys(%{"key" => key} = params) when is_binary(key) do with {:ok, driver_struct} <- driver_state() do modifiers = parse_modifiers(Map.get(params, "modifiers", [])) key_atom = normalize_key(key) # Key state: 1 = press, 0 = release Scenic.Driver.send_input(driver_struct, {:key, {key_atom, 1, modifiers}}) Process.sleep(10) Scenic.Driver.send_input(driver_struct, {:key, {key_atom, 0, modifiers}}) {:ok, %{status: "ok", message: "Key sent: #{key}"}} end end def handle_send_keys(_params) do {:error, "Invalid parameters: must provide either 'text' or 'key' parameter"} end @doc """ Handle mouse movement. Accepts `%{"x" => number, "y" => number}`. Returns `{:ok, result_map}` or `{:error, reason}`. """ @spec handle_mouse_move(map()) :: {:ok, map()} | {:error, String.t()} def handle_mouse_move(%{"x" => x, "y" => y}) do with {:ok, driver_struct} <- driver_state() do Scenic.Driver.send_input(driver_struct, {:cursor_pos, {x, y}}) {:ok, %{status: "ok", message: "Mouse moved to (#{x}, #{y})"}} end end def handle_mouse_move(_params) do {:error, "Invalid parameters: must provide 'x' and 'y' coordinates"} end @doc """ Handle mouse clicks. Accepts `%{"x" => number, "y" => number, "button" => string}`. Button is optional and defaults to "left". Returns `{:ok, result_map}` or `{:error, reason}`. """ @spec handle_mouse_click(map()) :: {:ok, map()} | {:error, String.t()} def handle_mouse_click(%{"x" => x, "y" => y} = params) do with {:ok, driver_struct} <- driver_state() do button = parse_button(Map.get(params, "button", "left")) # Move to position Scenic.Driver.send_input(driver_struct, {:cursor_pos, {x, y}}) # Click - Scenic format: {:cursor_button, {button, state, modifiers, coords}} # state: 1 = press, 0 = release Scenic.Driver.send_input(driver_struct, {:cursor_button, {button, 1, [], {x, y}}}) Process.sleep(10) Scenic.Driver.send_input(driver_struct, {:cursor_button, {button, 0, [], {x, y}}}) {:ok, %{status: "ok", message: "Mouse clicked at (#{x}, #{y})"}} end end def handle_mouse_click(_params) do {:error, "Invalid parameters: must provide 'x' and 'y' coordinates"} end def inspect_viewport(args \\ nil) do handle_get_scenic_graph(args) end @doc """ Get viewport graph information. Returns `{:ok, result_map}` with viewport structure information, or `{:error, reason}` if the viewport cannot be inspected. """ @spec handle_get_scenic_graph(any()) :: {:ok, map()} | {:error, String.t()} def handle_get_scenic_graph(_args \\ nil) do with {:ok, vp_state} <- viewport_state() do case vp_state do %{script_table: script_table} = state when script_table != nil -> scripts = :ets.tab2list(script_table) visual_description = build_scene_description(scripts) semantic_info = build_semantic_description(Map.get(state, :semantic_table)) {:ok, %{ status: "ok", script_count: length(scripts), visual_description: visual_description, semantic_elements: semantic_info, raw_scripts: Enum.map(scripts, fn {id, _compiled, _pid} -> id {id, _compiled} -> id end) }} _ -> {:error, "No script table found in viewport state. The viewport may not be fully initialized. State keys: #{inspect(Map.keys(vp_state))}"} end end end @doc """ Find clickable elements in the current viewport. Returns `{:ok, result_map}` with a list of clickable elements and their center coordinates, or `{:error, reason}` if elements cannot be found. Optional params: - `filter`: Filter by element ID (matches against atom keys) """ @spec find_clickable_elements(map()) :: {:ok, map()} | {:error, String.t()} def find_clickable_elements(params) do with {:ok, vp_state} <- viewport_state() do case Map.get(vp_state, :semantic_table) do nil -> {:error, "No semantic table found - the viewport may not have semantic DOM enabled"} semantic_table -> # Phase 1 Semantic Registration Format # ETS stores: {{scene_name, entry_id}, %Entry{}} # where Entry has: id, type, clickable, screen_bounds, local_bounds, etc. all_entries = :ets.tab2list(semantic_table) |> Enum.map(fn {{scene_name, _entry_id}, entry} -> # entry is a %Scenic.Semantic.Compiler.Entry{} struct {entry.id, entry, scene_name} end) # Group by ID (in case multiple scenes have same ID) # Prefer :_root_ scene, otherwise use first one found flat_elements = all_entries |> Enum.group_by(fn {id, _entry, _scene_name} -> id end) |> Enum.map(fn {_id, versions} -> # Prefer entries from :_root_ scene best_version = Enum.max_by(versions, fn {_id, _entry, scene_name} -> if scene_name in [:_root_, "_root_"], do: 1_000_000, else: 0 end) {id, entry, _scene_name} = best_version {id, entry} end) filter = Map.get(params, "filter") clickable_elements = flat_elements |> Enum.filter(fn {_id, entry} -> # Phase 1: clickable flag is directly on the Entry struct Map.get(entry, :clickable, false) end) |> maybe_filter_by_id(filter) |> Enum.map(fn {id, entry} -> # Phase 1: Use screen_bounds (will fall back to local_bounds in Phase 1) # Bounds format: %{left: x, top: y, width: w, height: h} bounds = entry.screen_bounds # Calculate center from bounds center = if bounds do %{ "x" => bounds.left + bounds.width / 2, "y" => bounds.top + bounds.height / 2 } else nil end # Convert bounds to old format for compatibility bounds_map = if bounds do %{ "left" => bounds.left, "top" => bounds.top, "width" => bounds.width, "height" => bounds.height } else nil end %{ id: inspect(id), raw_id: id, type: entry.type, bounds: bounds_map, center: center, clickable: entry.clickable, label: entry.label, role: entry.role, z_index: entry.z_index } |> sanitize_for_json() end) {:ok, %{ status: "ok", count: length(clickable_elements), elements: clickable_elements }} end end end @doc """ Click on an element by its semantic ID. This is a high-level convenience function similar to Playwright's `page.click(selector)`. It finds the element, calculates its center, and clicks it automatically. Params: - `element_id`: The semantic ID to click (string or atom, e.g., ":load_component_button") Returns `{:ok, result_map}` with click details, or `{:error, reason}` if element not found. """ @spec click_element(map()) :: {:ok, map()} | {:error, String.t()} def click_element(%{"element_id" => element_id}) when is_binary(element_id) do with {:ok, result} <- find_clickable_elements(%{"filter" => element_id}), element <- List.first(result.elements) do if element do # Handle both atom and string keys (after sanitization) center = get_in_sanitized(element, [:center]) || get_in_sanitized(element, ["center"]) case center do %{"x" => x, "y" => y} when is_number(x) and is_number(y) -> # Click at the element's center case handle_mouse_click(%{"x" => x, "y" => y}) do {:ok, _click_result} -> {:ok, %{ status: "ok", message: "Clicked element #{element_id}", element: element, clicked_at: %{x: x, y: y} }} {:error, reason} -> {:error, "Failed to click element: #{reason}"} end _ -> {:error, "Element found but has no valid center coordinates: #{inspect(center)}"} end else {:error, "Element '#{element_id}' not found or not clickable"} end end end def click_element(_params) do {:error, "Invalid parameters: must provide 'element_id' parameter"} end @doc """ Move mouse to hover over an element by its semantic ID. Similar to Playwright's hover functionality - finds the element and moves the mouse to its center without clicking. Params: - `element_id`: The semantic ID to hover over (string or atom) Returns `{:ok, result_map}` with hover details, or `{:error, reason}` if element not found. """ @spec hover_element(map()) :: {:ok, map()} | {:error, String.t()} def hover_element(%{"element_id" => element_id}) when is_binary(element_id) do with {:ok, result} <- find_clickable_elements(%{"filter" => element_id}), element <- List.first(result.elements) do if element do # Handle both atom and string keys (after sanitization) center = get_in_sanitized(element, [:center]) || get_in_sanitized(element, ["center"]) case center do %{"x" => x, "y" => y} when is_number(x) and is_number(y) -> case handle_mouse_move(%{"x" => x, "y" => y}) do {:ok, _move_result} -> {:ok, %{ status: "ok", message: "Hovering over element #{element_id}", element: element, position: %{x: x, y: y} }} {:error, reason} -> {:error, "Failed to move mouse to element: #{reason}"} end _ -> {:error, "Element found but has no valid center coordinates: #{inspect(center)}"} end else {:error, "Element '#{element_id}' not found"} end end end def hover_element(_params) do {:error, "Invalid parameters: must provide 'element_id' parameter"} end @doc """ Capture a screenshot of the Scenic application. Accepts `%{"format" => "path" | "base64", "filename" => string}`. Both parameters are optional. Returns `{:ok, result_map}` with screenshot information, or `{:error, reason}` if the screenshot cannot be captured. """ @spec take_screenshot(map()) :: {:ok, map()} | {:error, String.t()} def take_screenshot(params) do format = Map.get(params, "format", "path") filename = Map.get(params, "filename") path = if filename do if String.ends_with?(filename, ".png") do filename else filename <> ".png" end else timestamp = DateTime.utc_now() |> DateTime.to_string() |> String.replace(~r/[:\s]/, "_") "/tmp/scenic_screenshot_#{timestamp}.png" end with {:ok, vp_pid} <- viewport_pid(), {:ok, driver_pid} <- get_driver_from_viewport(vp_pid), driver_state <- :sys.get_state(driver_pid), :ok <- Scenic.Driver.Local.screenshot(driver_state, path) do if format == "base64" do case File.read(path) do {:ok, binary} -> encoded = Base.encode64(binary) {:ok, %{ status: "ok", path: path, format: "base64", data: encoded, size: byte_size(binary) }} {:error, reason} -> {:error, "Failed to read screenshot file: #{inspect(reason)}"} end else {:ok, %{status: "ok", path: path, format: "path"}} end else {:error, reason} -> {:error, reason} other -> {:error, "Screenshot failed: #{inspect(other)}"} end end # ======================================================================== # Helper Functions # ======================================================================== defp get_driver_from_viewport(vp_pid) do case :sys.get_state(vp_pid) do %{driver_pids: [driver_pid | _]} -> {:ok, driver_pid} _ -> {:error, "No driver found in viewport state. Ensure your Scenic driver is properly configured."} end catch :exit, reason -> {:error, "Failed to get driver from viewport: #{inspect(reason)}"} end defp build_scene_description(scripts) do scripts |> Enum.map(fn {id, _compiled_script, _pid} -> {id, nil} {id, _compiled_script} -> {id, nil} other -> {other, nil} end) |> Enum.map(fn {id, _} -> component_name = case id do {name, _uid} when is_atom(name) -> Atom.to_string(name) name when is_atom(name) -> Atom.to_string(name) _ -> inspect(id) end %{ id: inspect(id), component: component_name } end) |> Enum.group_by(& &1.component) |> Enum.map(fn {component, items} -> "#{component} (#{length(items)} instances)" end) |> Enum.join(", ") end defp build_semantic_description(nil), do: %{count: 0, elements: [], summary: "No semantic DOM available"} defp build_semantic_description(semantic_table) do try do # Each entry in semantic_table is {graph_key, %{elements: %{id => element_info}, timestamp: ...}} # We need to flatten the nested elements map # IMPORTANT: Keep only the LATEST version of each element ID (by timestamp) raw_table = :ets.tab2list(semantic_table) IO.inspect(raw_table |> Enum.map(fn {key, data} -> {key, Map.keys(Map.get(data, :elements, %{})), Map.get(data, :timestamp)} end), label: "DEBUG: Raw semantic table entries") flat_elements = raw_table |> Enum.flat_map(fn {graph_key, data} -> timestamp = Map.get(data, :timestamp, 0) # Extract the nested elements map case Map.get(data, :elements) do elements when is_map(elements) -> Enum.map(elements, fn {id, element_info} -> {id, element_info, timestamp, graph_key} end) _ -> [] end end) # Keep the best version of each ID # Prefer entries from :_root_ graph, otherwise take the latest by timestamp |> Enum.group_by(fn {id, _element_info, _timestamp, _graph_key} -> id end) |> Enum.map(fn {_id, versions} -> # Prefer _root_ entries, otherwise take latest timestamp best_version = Enum.max_by(versions, fn {_id, _info, ts, graph_key} -> root_priority = if graph_key in [:_root_, "_root_"], do: 1_000_000_000_000, else: 0 root_priority + ts end) {id, element_info, _timestamp, _graph_key} = best_version {id, element_info} end) summary = flat_elements |> Enum.map(fn {_id, element_info} -> element_info |> Map.get(:semantic, %{}) |> Map.get(:type, :unknown) |> to_string() end) |> Enum.frequencies() |> Enum.map(fn {type, count} -> "#{count} #{type}" end) |> Enum.join(", ") %{ count: length(flat_elements), elements: Enum.map(flat_elements, fn {id, element_info} -> element_info |> Map.put(:key, inspect(id)) |> sanitize_for_json() end), summary: summary, by_type: Enum.frequencies( Enum.map(flat_elements, fn {_id, element_info} -> Map.get(element_info, :semantic, %{}) |> Map.get(:type, :unknown) end) ), clickable_count: Enum.count(flat_elements, fn {_id, element_info} -> element_info |> Map.get(:semantic, %{}) |> Map.get(:clickable, false) end) } rescue _ -> %{count: 0, elements: [], summary: "Error reading semantic table"} end end # Convert a character to a key event that applications can understand. # Returns {:ok, key_event} for supported characters, :error otherwise. defp char_to_key_event(char) do case char do # Lowercase letters "a" -> {:ok, {:key, {:key_a, 1, []}}} "b" -> {:ok, {:key, {:key_b, 1, []}}} "c" -> {:ok, {:key, {:key_c, 1, []}}} "d" -> {:ok, {:key, {:key_d, 1, []}}} "e" -> {:ok, {:key, {:key_e, 1, []}}} "f" -> {:ok, {:key, {:key_f, 1, []}}} "g" -> {:ok, {:key, {:key_g, 1, []}}} "h" -> {:ok, {:key, {:key_h, 1, []}}} "i" -> {:ok, {:key, {:key_i, 1, []}}} "j" -> {:ok, {:key, {:key_j, 1, []}}} "k" -> {:ok, {:key, {:key_k, 1, []}}} "l" -> {:ok, {:key, {:key_l, 1, []}}} "m" -> {:ok, {:key, {:key_m, 1, []}}} "n" -> {:ok, {:key, {:key_n, 1, []}}} "o" -> {:ok, {:key, {:key_o, 1, []}}} "p" -> {:ok, {:key, {:key_p, 1, []}}} "q" -> {:ok, {:key, {:key_q, 1, []}}} "r" -> {:ok, {:key, {:key_r, 1, []}}} "s" -> {:ok, {:key, {:key_s, 1, []}}} "t" -> {:ok, {:key, {:key_t, 1, []}}} "u" -> {:ok, {:key, {:key_u, 1, []}}} "v" -> {:ok, {:key, {:key_v, 1, []}}} "w" -> {:ok, {:key, {:key_w, 1, []}}} "x" -> {:ok, {:key, {:key_x, 1, []}}} "y" -> {:ok, {:key, {:key_y, 1, []}}} "z" -> {:ok, {:key, {:key_z, 1, []}}} # Uppercase letters (with shift modifier) "A" -> {:ok, {:key, {:key_a, 1, ["shift"]}}} "B" -> {:ok, {:key, {:key_b, 1, ["shift"]}}} "C" -> {:ok, {:key, {:key_c, 1, ["shift"]}}} "D" -> {:ok, {:key, {:key_d, 1, ["shift"]}}} "E" -> {:ok, {:key, {:key_e, 1, ["shift"]}}} "F" -> {:ok, {:key, {:key_f, 1, ["shift"]}}} "G" -> {:ok, {:key, {:key_g, 1, ["shift"]}}} "H" -> {:ok, {:key, {:key_h, 1, ["shift"]}}} "I" -> {:ok, {:key, {:key_i, 1, ["shift"]}}} "J" -> {:ok, {:key, {:key_j, 1, ["shift"]}}} "K" -> {:ok, {:key, {:key_k, 1, ["shift"]}}} "L" -> {:ok, {:key, {:key_l, 1, ["shift"]}}} "M" -> {:ok, {:key, {:key_m, 1, ["shift"]}}} "N" -> {:ok, {:key, {:key_n, 1, ["shift"]}}} "O" -> {:ok, {:key, {:key_o, 1, ["shift"]}}} "P" -> {:ok, {:key, {:key_p, 1, ["shift"]}}} "Q" -> {:ok, {:key, {:key_q, 1, ["shift"]}}} "R" -> {:ok, {:key, {:key_r, 1, ["shift"]}}} "S" -> {:ok, {:key, {:key_s, 1, ["shift"]}}} "T" -> {:ok, {:key, {:key_t, 1, ["shift"]}}} "U" -> {:ok, {:key, {:key_u, 1, ["shift"]}}} "V" -> {:ok, {:key, {:key_v, 1, ["shift"]}}} "W" -> {:ok, {:key, {:key_w, 1, ["shift"]}}} "X" -> {:ok, {:key, {:key_x, 1, ["shift"]}}} "Y" -> {:ok, {:key, {:key_y, 1, ["shift"]}}} "Z" -> {:ok, {:key, {:key_z, 1, ["shift"]}}} # Numbers "0" -> {:ok, {:key, {:key_0, 1, []}}} "1" -> {:ok, {:key, {:key_1, 1, []}}} "2" -> {:ok, {:key, {:key_2, 1, []}}} "3" -> {:ok, {:key, {:key_3, 1, []}}} "4" -> {:ok, {:key, {:key_4, 1, []}}} "5" -> {:ok, {:key, {:key_5, 1, []}}} "6" -> {:ok, {:key, {:key_6, 1, []}}} "7" -> {:ok, {:key, {:key_7, 1, []}}} "8" -> {:ok, {:key, {:key_8, 1, []}}} "9" -> {:ok, {:key, {:key_9, 1, []}}} # Common symbols " " -> {:ok, {:key, {:key_space, 1, []}}} "!" -> {:ok, {:key, {:key_1, 1, ["shift"]}}} "\"" -> {:ok, {:key, {:key_apostrophe, 1, ["shift"]}}} "#" -> {:ok, {:key, {:key_3, 1, ["shift"]}}} "$" -> {:ok, {:key, {:key_4, 1, ["shift"]}}} "%" -> {:ok, {:key, {:key_5, 1, ["shift"]}}} "&" -> {:ok, {:key, {:key_7, 1, ["shift"]}}} "'" -> {:ok, {:key, {:key_apostrophe, 1, []}}} "(" -> {:ok, {:key, {:key_9, 1, ["shift"]}}} ")" -> {:ok, {:key, {:key_0, 1, ["shift"]}}} "*" -> {:ok, {:key, {:key_8, 1, ["shift"]}}} "+" -> {:ok, {:key, {:key_equal, 1, ["shift"]}}} "," -> {:ok, {:key, {:key_comma, 1, []}}} "-" -> {:ok, {:key, {:key_minus, 1, []}}} "." -> {:ok, {:key, {:key_period, 1, []}}} "/" -> {:ok, {:key, {:key_slash, 1, []}}} ":" -> {:ok, {:key, {:key_semicolon, 1, ["shift"]}}} ";" -> {:ok, {:key, {:key_semicolon, 1, []}}} "<" -> {:ok, {:key, {:key_comma, 1, ["shift"]}}} "=" -> {:ok, {:key, {:key_equal, 1, []}}} ">" -> {:ok, {:key, {:key_period, 1, ["shift"]}}} "?" -> {:ok, {:key, {:key_slash, 1, ["shift"]}}} "@" -> {:ok, {:key, {:key_2, 1, ["shift"]}}} "[" -> {:ok, {:key, {:key_left_bracket, 1, []}}} "\\" -> {:ok, {:key, {:key_backslash, 1, []}}} "]" -> {:ok, {:key, {:key_right_bracket, 1, []}}} "^" -> {:ok, {:key, {:key_6, 1, ["shift"]}}} "_" -> {:ok, {:key, {:key_minus, 1, ["shift"]}}} "`" -> {:ok, {:key, {:key_grave_accent, 1, []}}} "{" -> {:ok, {:key, {:key_left_bracket, 1, ["shift"]}}} "|" -> {:ok, {:key, {:key_backslash, 1, ["shift"]}}} "}" -> {:ok, {:key, {:key_right_bracket, 1, ["shift"]}}} "~" -> {:ok, {:key, {:key_grave_accent, 1, ["shift"]}}} # Unsupported character _ -> :error end end defp normalize_key(key) do case String.downcase(key) do "enter" -> :key_enter "escape" -> :key_escape "tab" -> :key_tab "backspace" -> :key_backspace "delete" -> :key_delete "space" -> :key_space "up" -> :key_up "down" -> :key_down "left" -> :key_left "right" -> :key_right "home" -> :key_home "end" -> :key_end "page_up" -> :key_pageup "page_down" -> :key_pagedown "f1" -> :key_f1 "f2" -> :key_f2 "f3" -> :key_f3 "f4" -> :key_f4 "f5" -> :key_f5 "f6" -> :key_f6 "f7" -> :key_f7 "f8" -> :key_f8 "f9" -> :key_f9 "f10" -> :key_f10 "f11" -> :key_f11 "f12" -> :key_f12 other -> String.to_atom("key_" <> other) end end defp parse_modifiers(modifiers) when is_list(modifiers) do modifiers |> Enum.filter(&(&1 in ["shift", "ctrl", "alt", "cmd", "meta"])) |> Enum.map(&String.to_atom/1) # Convert to atoms for Scenic end defp parse_modifiers(_), do: [] defp parse_button(button) when is_binary(button) do case String.downcase(button) do "left" -> :btn_left "right" -> :btn_right "middle" -> :btn_middle _ -> :btn_left end end defp parse_button(_), do: :btn_left # Convert Unicode codepoint to Scenic key atom with modifiers # Returns {key_atom, modifiers} tuple # Letters and numbers get :key_<char> format (e.g., :key_a, :key_1) # Special characters get their own key atoms, uppercase/symbols include shift defp codepoint_to_key_with_mods(codepoint) when is_integer(codepoint) do case codepoint do # Lowercase letters a-z (no shift) c when c >= ?a and c <= ?z -> {String.to_atom("key_#{<<c>>}"), []} # Uppercase letters A-Z (need shift) c when c >= ?A and c <= ?Z -> {String.to_atom("key_#{<<c + 32>>}"), [:shift]} # Numbers 0-9 (no shift) c when c >= ?0 and c <= ?9 -> {String.to_atom("key_#{<<c>>}"), []} # Space 32 -> {:key_space, []} # Common punctuation requiring shift ?! -> {:key_1, [:shift]} ?@ -> {:key_2, [:shift]} ?# -> {:key_3, [:shift]} ?$ -> {:key_4, [:shift]} ?% -> {:key_5, [:shift]} ?^ -> {:key_6, [:shift]} ?& -> {:key_7, [:shift]} ?* -> {:key_8, [:shift]} ?( -> {:key_9, [:shift]} ?) -> {:key_0, [:shift]} ?_ -> {:key_minus, [:shift]} ?+ -> {:key_equal, [:shift]} ?{ -> {:key_open_bracket, [:shift]} ?} -> {:key_close_bracket, [:shift]} ?| -> {:key_backslash, [:shift]} ?: -> {:key_semicolon, [:shift]} ?" -> {:key_apostrophe, [:shift]} ?< -> {:key_comma, [:shift]} ?> -> {:key_period, [:shift]} ?? -> {:key_slash, [:shift]} ?~ -> {:key_grave, [:shift]} # Common punctuation without shift ?- -> {:key_minus, []} ?= -> {:key_equal, []} ?[ -> {:key_open_bracket, []} ?] -> {:key_close_bracket, []} ?\\ -> {:key_backslash, []} ?\; -> {:key_semicolon, []} ?' -> {:key_apostrophe, []} ?, -> {:key_comma, []} ?. -> {:key_period, []} ?/ -> {:key_slash, []} ?` -> {:key_grave, []} # Fallback for unknown characters - use generic key name _ -> {:key_unknown, []} end end defp calculate_center(bounds) when is_map(bounds) do # Bounds format: %{left: x, top: y, width: w, height: h} left = Map.get(bounds, :left, 0) top = Map.get(bounds, :top, 0) width = Map.get(bounds, :width, 0) height = Map.get(bounds, :height, 0) %{ x: left + width / 2, y: top + height / 2 } end defp calculate_center(_), do: %{x: 0, y: 0} defp calculate_center_with_transforms(bounds, transforms, graph_key, vp_state) when is_map(bounds) do # Calculate local center local_center = calculate_center(bounds) # DEBUG: See what transforms we're getting IO.inspect(transforms, label: "DEBUG: Element transforms") # Apply element's own translate transform if present # Transforms can be in format: %{translate: {x, y}} or %{pin: ..., translate: ...} element_translate = case Map.get(transforms, :translate) do {tx, ty} when is_number(tx) and is_number(ty) -> {tx, ty} _ -> {0, 0} end {elem_tx, elem_ty} = element_translate IO.inspect({elem_tx, elem_ty}, label: "DEBUG: Element translate") # NEW: Traverse the graph hierarchy and accumulate all parent transforms hierarchy_translate = get_hierarchy_transforms(graph_key, vp_state) {hier_tx, hier_ty} = hierarchy_translate IO.inspect({hier_tx, hier_ty}, label: "DEBUG: Hierarchy translate for #{inspect(graph_key)}") # Accumulate all transforms %{ x: local_center.x + elem_tx + hier_tx, y: local_center.y + elem_ty + hier_ty } end defp calculate_center_with_transforms(bounds, _transforms, _graph_key, _vp_state) do calculate_center(bounds) end @doc """ Traverse the graph hierarchy and accumulate transforms from all parent graphs. Uses scene_script_table which contains parent-child relationships between graphs. ## Algorithm 1. Build a parent map from scene_script_table (child -> parent) 2. Walk up the chain from element's graph_key to :_root_ 3. For each graph in the chain, extract and accumulate its transforms 4. Return the total accumulated translate offset ## Example Button in scrolled modal: - Button graph_key: "xyz123" (transforms: translate {20, 630}) - Parent scroll group: "abc456" (transforms: translate {0, -495}) - Parent modal container: "def789" (transforms: translate {300, 100}) - Root: :_root_ Accumulated: {20, 630} + {0, -495} + {300, 100} = {320, 235} """ defp get_hierarchy_transforms(nil, _vp_state), do: {0, 0} defp get_hierarchy_transforms(_graph_key, nil), do: {0, 0} defp get_hierarchy_transforms(graph_key, vp_state) when graph_key in [:_root_, "_root_"] do # Root graph has no parent, no accumulated transform {0, 0} end defp get_hierarchy_transforms(graph_key, vp_state) do scene_script_table = Map.get(vp_state, :scene_script_table) if scene_script_table == nil do IO.puts("DEBUG: No scene_script_table in viewport state") {0, 0} else # Build parent map from scene_script_table parent_map = build_parent_map(scene_script_table) IO.inspect(Map.keys(parent_map), label: "DEBUG: Parent map keys", limit: 10) # Walk up the hierarchy and accumulate transforms accumulate_parent_transforms(graph_key, parent_map, scene_script_table, {0, 0}, 0) end end @doc """ Build a child -> parent map from scene_script_table. scene_script_table entries have format: {graph_key, %{children: [child_key1, child_key2, ...], transforms: [...], ...}} We invert this to create: %{child_key => parent_key} """ defp build_parent_map(scene_script_table) do :ets.tab2list(scene_script_table) |> Enum.reduce(%{}, fn {parent_key, script_info}, acc -> children = Map.get(script_info, :children, []) Enum.reduce(children, acc, fn child_key, acc2 -> Map.put(acc2, child_key, parent_key) end) end) end @doc """ Recursively walk up the parent chain and accumulate transforms. Stops at :_root_ or when no parent is found (max depth 10 for safety). """ defp accumulate_parent_transforms(_graph_key, _parent_map, _scene_script_table, acc, depth) when depth > 10 do IO.puts("DEBUG: Max hierarchy depth (10) reached, stopping") acc end defp accumulate_parent_transforms(graph_key, parent_map, scene_script_table, {acc_x, acc_y}, depth) do # Look up this graph's parent case Map.get(parent_map, graph_key) do nil -> # No parent found, we're at the top IO.puts("DEBUG: No parent for #{inspect(graph_key)}, stopping at depth #{depth}") {acc_x, acc_y} parent_key when parent_key in [:_root_, "_root_"] -> # Reached root, stop here IO.puts("DEBUG: Reached root from #{inspect(graph_key)} at depth #{depth}") {acc_x, acc_y} parent_key -> # Get parent's transforms from scene_script_table parent_transform = get_graph_transform_from_scene_script(parent_key, scene_script_table) {parent_tx, parent_ty} = parent_transform IO.puts("DEBUG: Parent #{inspect(parent_key)} has transform {#{parent_tx}, #{parent_ty}}") # Accumulate and recurse new_acc = {acc_x + parent_tx, acc_y + parent_ty} accumulate_parent_transforms(parent_key, parent_map, scene_script_table, new_acc, depth + 1) end end @doc """ Extract translate transform from a scene_script_table entry. scene_script_table has format: {graph_key, %{transforms: [{:translate, {x, y}}, ...], ...}} """ defp get_graph_transform_from_scene_script(graph_key, scene_script_table) do case :ets.lookup(scene_script_table, graph_key) do [] -> IO.puts("DEBUG: Graph #{inspect(graph_key)} not found in scene_script_table") {0, 0} [{^graph_key, script_info}] -> # Extract transforms list transforms_list = Map.get(script_info, :transforms, []) # Find translate transform translate = Enum.find_value(transforms_list, {0, 0}, fn {:translate, {x, y}} when is_number(x) and is_number(y) -> {x, y} _ -> false end) translate other -> IO.puts("DEBUG: Unexpected scene_script_table format: #{inspect(other)}") {0, 0} end end @doc """ DEPRECATED: Old function that only looked at immediate graph transform. Replaced by get_hierarchy_transforms which traverses the full parent chain. """ defp get_graph_transform(nil, _vp_state), do: {0, 0} defp get_graph_transform(_graph_key, nil), do: {0, 0} defp get_graph_transform(graph_key, vp_state) when graph_key in [:_root_, "_root_"] do # Root graph has no transform {0, 0} end defp get_graph_transform(graph_key, vp_state) do # Look up the graph's script in the script_table case Map.get(vp_state, :script_table) do nil -> IO.puts("DEBUG: No script_table in viewport state") {0, 0} script_table -> case :ets.lookup(script_table, graph_key) do [] -> IO.puts("DEBUG: Graph key #{inspect(graph_key)} not found in script_table") {0, 0} [{^graph_key, compiled_script} | _] -> extract_translate_from_script(compiled_script, graph_key) [{^graph_key, compiled_script, _pid} | _] -> extract_translate_from_script(compiled_script, graph_key) other -> IO.puts("DEBUG: Unexpected script_table format for #{inspect(graph_key)}: #{inspect(other)}") {0, 0} end end end @doc """ Extract the translate transform from a compiled Scenic script. A compiled script is a binary containing drawing commands. The transform is embedded in the script's metadata/commands. For now, we use a simple approach: inspect the script structure for translate transforms. This may need refinement based on Scenic's internal script format. """ defp extract_translate_from_script(compiled_script, graph_key) do # Compiled scripts are complex binary structures. We need to access # the transform that was applied when the graph was compiled. # # The script contains a :tx (transform matrix) in its metadata. # For translate transforms, this is typically a simple {dx, dy} offset. try do # Scenic scripts have a specific structure - they're Erlang terms # Try to pattern match common structures case compiled_script do # Some scripts might have transform info accessible %{tx: tx_matrix} -> extract_translate_from_matrix(tx_matrix) # Compiled script might be a tuple with transform data {_commands, _opts, tx_matrix} -> extract_translate_from_matrix(tx_matrix) # Binary format - harder to parse binary when is_binary(binary) -> # For binary scripts, we'd need to parse Scenic's internal format # This is complex, so for now return {0, 0} and log IO.puts("DEBUG: Binary script for #{inspect(graph_key)} - cannot easily extract transform") IO.puts(" Script size: #{byte_size(binary)} bytes") {0, 0} other -> IO.inspect(other, label: "DEBUG: Unexpected script structure for #{inspect(graph_key)}", limit: 5) {0, 0} end rescue error -> IO.puts("DEBUG: Error extracting transform for #{inspect(graph_key)}: #{inspect(error)}") {0, 0} end end defp extract_translate_from_matrix({dx, dy}), do: {dx, dy} defp extract_translate_from_matrix([1, 0, 0, 1, dx, dy]), do: {dx, dy} defp extract_translate_from_matrix(_), do: {0, 0} defp maybe_filter_by_id(elements, nil), do: elements defp maybe_filter_by_id(elements, filter) when is_binary(filter) do # Try to match the filter against the element key # Support both ":atom_name" and "atom_name" formats filter_atom = filter |> String.trim_leading(":") |> String.to_atom() Enum.filter(elements, fn {key, _data} -> key == filter_atom or inspect(key) =~ filter end) end defp maybe_filter_by_id(elements, _), do: elements # Helper to get values from maps that may have atom or string keys defp get_in_sanitized(map, [key | rest]) when is_map(map) do value = Map.get(map, key) || Map.get(map, to_string(key)) if rest == [] do value else get_in_sanitized(value, rest) end end defp get_in_sanitized(_, _), do: nil # Recursively sanitize data structures to be JSON-encodable # Converts tuples, atoms, and other non-JSON types to strings/basic types defp sanitize_for_json(data) when is_map(data) do data |> Enum.map(fn {k, v} -> {sanitize_for_json(k), sanitize_for_json(v)} end) |> Enum.into(%{}) end defp sanitize_for_json(data) when is_list(data) do Enum.map(data, &sanitize_for_json/1) end defp sanitize_for_json(data) when is_tuple(data) do # Convert tuples to string representation inspect(data) end defp sanitize_for_json(data) when is_atom(data) and data != nil and data != true and data != false do # Convert atoms (except nil, true, false) to strings Atom.to_string(data) end defp sanitize_for_json(data) when is_binary(data) or is_number(data) or is_boolean(data) or is_nil(data) do # These types are already JSON-safe data end defp sanitize_for_json(data) do # Fallback for any other types (PIDs, refs, etc.) inspect(data) end # This one here is the real milk in the tea! Here we map what actions we receive to tool calls def handle_action(%{"action" => "inspect_viewport"} = _actn) do ScenicMcp.Tools.handle_get_scenic_graph() end def handle_action(%{"action" => "send_keys"} = actn) do ScenicMcp.Tools.handle_send_keys(actn) end def handle_action(%{"action" => "send_mouse_move"} = actn) do ScenicMcp.Tools.handle_mouse_move(actn) end def handle_action(%{"action" => "send_mouse_click"} = actn) do ScenicMcp.Tools.handle_mouse_click(actn) end def handle_action(%{"action" => "take_screenshot"} = actn) do ScenicMcp.Tools.take_screenshot(actn) end def handle_action(%{"action" => "find_clickable"} = actn) do ScenicMcp.Tools.find_clickable_elements(actn) end def handle_action(%{"action" => "click_element"} = actn) do ScenicMcp.Tools.click_element(actn) end def handle_action(%{"action" => "hover_element"} = actn) do ScenicMcp.Tools.hover_element(actn) end def handle_action(%{"action" => _action}) do {:error, "Unknown command"} end def handle_action(_) do {:error, "Invalid action format - must include 'action' key"} end end

Latest Blog Posts

Model Context Protocol Proxies: Enabling Enterprise Control with Virtual MCPs
By Om-Shree-0709 on December 9, 2025.
AI Security
Virtual MCP
Kubernetes Operator
The State of MCP in 2025: Who's Building What and Why It Matters
By punkpeye on December 7, 2025.
mcp
startups
MCP hosting with persistent storage
By punkpeye on December 6, 2025.
changelog

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/scenic-contrib/scenic_mcp_experimental'

If you have feedback or need assistance with the MCP directory API, please join our Discord server