Skip to main content
Glama
desktop-tools.ts11.1 kB
/** * ByteBot Desktop API MCP Tools * * MCP tool definitions for ByteBot computer control operations */ import { DesktopClient } from '../clients/desktop-client.js'; import { formatErrorForMCP } from '../utils/error-handler.js'; /** * Tool definitions for Desktop API */ export function getDesktopTools() { return [ // Mouse tools { name: 'bytebot_move_mouse', description: 'Move the mouse cursor to specific screen coordinates. Use this to position the cursor before clicking or dragging.', inputSchema: { type: 'object' as const, properties: { x: { type: 'number', description: 'X coordinate (horizontal position in pixels)', }, y: { type: 'number', description: 'Y coordinate (vertical position in pixels)', }, }, required: ['x', 'y'], }, }, { name: 'bytebot_click', description: 'Click at specific screen coordinates. Supports left, right, and middle mouse buttons, as well as double-clicks.', inputSchema: { type: 'object' as const, properties: { x: { type: 'number', description: 'X coordinate (horizontal position in pixels)', }, y: { type: 'number', description: 'Y coordinate (vertical position in pixels)', }, button: { type: 'string', enum: ['left', 'right', 'middle'], description: 'Mouse button to click. Default: left', default: 'left', }, count: { type: 'number', description: 'Number of clicks (1 = single click, 2 = double click). Default: 1', default: 1, }, }, required: ['x', 'y'], }, }, { name: 'bytebot_drag', description: 'Drag the mouse from one position to another. Useful for moving windows, selecting text, or drag-and-drop operations.', inputSchema: { type: 'object' as const, properties: { from_x: { type: 'number', description: 'Starting X coordinate', }, from_y: { type: 'number', description: 'Starting Y coordinate', }, to_x: { type: 'number', description: 'Ending X coordinate', }, to_y: { type: 'number', description: 'Ending Y coordinate', }, }, required: ['from_x', 'from_y', 'to_x', 'to_y'], }, }, { name: 'bytebot_scroll', description: 'Scroll the screen in a specific direction. Use this to navigate through content.', inputSchema: { type: 'object' as const, properties: { direction: { type: 'string', enum: ['up', 'down', 'left', 'right'], description: 'Direction to scroll', }, count: { type: 'number', description: 'Number of scroll increments. Default: 1', default: 1, }, }, required: ['direction'], }, }, // Keyboard tools { name: 'bytebot_type_text', description: 'Type text string as if typing on a keyboard. Use this for entering text in forms, search boxes, etc.', inputSchema: { type: 'object' as const, properties: { text: { type: 'string', description: 'Text to type', }, delay: { type: 'number', description: 'Optional delay between keystrokes in milliseconds', }, }, required: ['text'], }, }, { name: 'bytebot_paste_text', description: 'Paste text directly (without typing). Faster than type_text and works with special characters/emojis.', inputSchema: { type: 'object' as const, properties: { text: { type: 'string', description: 'Text to paste', }, }, required: ['text'], }, }, { name: 'bytebot_press_keys', description: 'Press keyboard keys including modifiers (Ctrl, Shift, Alt, etc.). Use this for keyboard shortcuts like Ctrl+C, Ctrl+V, Alt+Tab.', inputSchema: { type: 'object' as const, properties: { keys: { type: 'array', items: { type: 'string', }, description: 'Array of keys to press together. Examples: ["ctrl", "c"] for copy, ["alt", "tab"] for app switching', }, }, required: ['keys'], }, }, // Screen tools { name: 'bytebot_screenshot', description: 'Capture a screenshot of the entire screen. Returns base64-encoded PNG image data.', inputSchema: { type: 'object' as const, properties: {}, }, }, { name: 'bytebot_cursor_position', description: 'Get the current mouse cursor position. Returns {x, y} coordinates.', inputSchema: { type: 'object' as const, properties: {}, }, }, // File I/O tools { name: 'bytebot_read_file', description: 'Read a file from the filesystem. Returns base64-encoded file content.', inputSchema: { type: 'object' as const, properties: { path: { type: 'string', description: 'Absolute or relative path to the file to read', }, }, required: ['path'], }, }, { name: 'bytebot_write_file', description: 'Write content to a file on the filesystem. Content must be base64-encoded.', inputSchema: { type: 'object' as const, properties: { path: { type: 'string', description: 'Absolute or relative path to the file to write', }, content: { type: 'string', description: 'Base64-encoded file content', }, }, required: ['path', 'content'], }, }, // System tools { name: 'bytebot_switch_application', description: 'Switch to a specific application window. Use this to bring an app to the foreground.', inputSchema: { type: 'object' as const, properties: { name: { type: 'string', description: 'Application name (e.g., "firefox", "terminal", "vscode", "chrome", "safari")', }, }, required: ['name'], }, }, { name: 'bytebot_wait', description: 'Wait for a specified duration. Use this to add delays between actions or wait for UI updates.', inputSchema: { type: 'object' as const, properties: { duration: { type: 'number', description: 'Duration to wait in milliseconds', }, }, required: ['duration'], }, }, ]; } /** * Tool handlers for Desktop API */ export async function handleDesktopTool( toolName: string, args: Record<string, unknown>, desktopClient: DesktopClient ) { try { let result; switch (toolName) { case 'bytebot_move_mouse': result = await desktopClient.moveMouse( args.x as number, args.y as number ); break; case 'bytebot_click': result = await desktopClient.clickMouse( args.x as number, args.y as number, (args.button as any) || 'left', (args.count as number) || 1 ); break; case 'bytebot_drag': result = await desktopClient.dragMouse( args.from_x as number, args.from_y as number, args.to_x as number, args.to_y as number ); break; case 'bytebot_scroll': result = await desktopClient.scroll( args.direction as any, (args.count as number) || 1 ); break; case 'bytebot_type_text': result = await desktopClient.typeText( args.text as string, args.delay as number | undefined ); break; case 'bytebot_paste_text': result = await desktopClient.pasteText(args.text as string); break; case 'bytebot_press_keys': result = await desktopClient.pressKeys(args.keys as string[]); break; case 'bytebot_screenshot': result = await desktopClient.screenshot(); // Validate screenshot size if (result.screenshot) { desktopClient.validateScreenshotSize(result.screenshot); } break; case 'bytebot_cursor_position': result = await desktopClient.getCursorPosition(); break; case 'bytebot_read_file': result = await desktopClient.readFile(args.path as string); break; case 'bytebot_write_file': result = await desktopClient.writeFile( args.path as string, args.content as string ); break; case 'bytebot_switch_application': result = await desktopClient.switchApplication(args.name as string); break; case 'bytebot_wait': result = await desktopClient.wait(args.duration as number); break; default: throw new Error(`Unknown tool: ${toolName}`); } // Format response const responseText = formatDesktopResponse(result, toolName); return { content: [ { type: 'text', text: responseText, }, ], }; } catch (error) { const errorInfo = formatErrorForMCP(error); return { content: [ { type: 'text', text: `Error: ${errorInfo.error}${errorInfo.details ? '\n\nDetails:\n' + errorInfo.details : ''}`, }, ], isError: true, }; } } /** * Format desktop action response for display */ function formatDesktopResponse(result: any, toolName: string): string { const lines: string[] = []; lines.push(`✓ ${toolName} completed successfully`); lines.push(`Duration: ${result.duration}ms`); // Add specific result data based on tool type if (result.screenshot) { const sizeKB = ((result.screenshot.length * 3) / 4 / 1024).toFixed(2); lines.push(`Screenshot captured: ${sizeKB} KB`); lines.push(`Base64 data: ${result.screenshot.substring(0, 100)}...`); } if (result.position) { lines.push(`Cursor position: (${result.position.x}, ${result.position.y})`); } if (result.content) { const sizeKB = ((result.content.length * 3) / 4 / 1024).toFixed(2); lines.push(`File content: ${sizeKB} KB`); lines.push(`Base64 data: ${result.content.substring(0, 100)}...`); } if (result.message) { lines.push(`Message: ${result.message}`); } return lines.join('\n'); }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sensuslab/spark-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server