/**
* VSCode Automation MCP Server - Keyboard & Input Tools
*
* Tools for keyboard input, focus management, scrolling, and drag/drop.
*
* @author Sukarth Acharya
* @license MIT
*/
import { z } from 'zod';
import { getVSCodeDriver } from '../vscode-driver.js';
/**
* Input schema for vscode_press_keys tool
*/
export const pressKeysInputSchema = {
keys: z.string().describe('Key combination to press (e.g., "ctrl+s", "ctrl+shift+p", "Enter", "Escape", "ArrowDown")'),
selector: z.string().optional().describe('Optional CSS selector to focus before pressing keys'),
count: z.number().optional().default(1).describe('Number of times to press the key combination'),
delay: z.number().optional().default(50).describe('Delay in ms between key presses when count > 1'),
};
/**
* Input schema for vscode_focus_element tool
*/
export const focusElementInputSchema = {
selector: z.string().describe('CSS selector of the element to focus'),
scrollIntoView: z.boolean().optional().default(true).describe('Scroll element into view before focusing'),
};
/**
* Input schema for vscode_scroll tool
*/
export const scrollInputSchema = {
selector: z.string().optional().default('body').describe('CSS selector of the element to scroll'),
direction: z.enum(['up', 'down', 'left', 'right', 'top', 'bottom']).describe('Scroll direction or position'),
amount: z.number().optional().default(300).describe('Scroll amount in pixels (for up/down/left/right)'),
smooth: z.boolean().optional().default(false).describe('Use smooth scrolling'),
};
/**
* Input schema for vscode_drag_drop tool
*/
export const dragDropInputSchema = {
sourceSelector: z.string().describe('CSS selector of the element to drag'),
targetSelector: z.string().describe('CSS selector of the drop target'),
offsetX: z.number().optional().default(0).describe('X offset within target'),
offsetY: z.number().optional().default(0).describe('Y offset within target'),
};
/**
* Input schema for vscode_hover tool
*/
export const hoverInputSchema = {
selector: z.string().describe('CSS selector of the element to hover over'),
duration: z.number().optional().default(500).describe('How long to hover in ms (for triggering hover effects)'),
};
/**
* Press keyboard keys or key combinations
*/
export async function pressKeys(input: {
keys: string;
selector?: string;
count?: number;
delay?: number;
}): Promise<{ content: Array<{ type: 'text'; text: string }> }> {
const driver = getVSCodeDriver();
const webDriver = await driver.getDriver();
const count = input.count || 1;
const delay = input.delay || 50;
try {
const { Key } = await import('selenium-webdriver');
// Focus element if specified
if (input.selector) {
const By = (await import('selenium-webdriver')).By;
const element = await webDriver.findElement(By.css(input.selector));
await element.click();
}
// Parse key combination
const keyMap: Record<string, string> = {
'ctrl': Key.CONTROL,
'control': Key.CONTROL,
'alt': Key.ALT,
'shift': Key.SHIFT,
'meta': Key.META,
'cmd': Key.META,
'command': Key.META,
'enter': Key.ENTER,
'return': Key.RETURN,
'tab': Key.TAB,
'escape': Key.ESCAPE,
'esc': Key.ESCAPE,
'space': Key.SPACE,
'backspace': Key.BACK_SPACE,
'delete': Key.DELETE,
'up': Key.ARROW_UP,
'down': Key.ARROW_DOWN,
'left': Key.ARROW_LEFT,
'right': Key.ARROW_RIGHT,
'arrowup': Key.ARROW_UP,
'arrowdown': Key.ARROW_DOWN,
'arrowleft': Key.ARROW_LEFT,
'arrowright': Key.ARROW_RIGHT,
'home': Key.HOME,
'end': Key.END,
'pageup': Key.PAGE_UP,
'pagedown': Key.PAGE_DOWN,
'f1': Key.F1,
'f2': Key.F2,
'f3': Key.F3,
'f4': Key.F4,
'f5': Key.F5,
'f6': Key.F6,
'f7': Key.F7,
'f8': Key.F8,
'f9': Key.F9,
'f10': Key.F10,
'f11': Key.F11,
'f12': Key.F12,
};
const parts = input.keys.toLowerCase().split('+').map(k => k.trim());
const keys: string[] = [];
for (const part of parts) {
if (keyMap[part]) {
keys.push(keyMap[part]);
} else if (part.length === 1) {
keys.push(part);
} else {
keys.push(part);
}
}
// Use a fresh actions chain for each iteration to avoid stale state
for (let i = 0; i < count; i++) {
const actions = webDriver.actions({ async: true });
// Build the complete action chain - press modifiers, send key, release modifiers
// Doing this in a single chain is more stable than multiple perform() calls
for (const key of keys.slice(0, -1)) {
actions.keyDown(key);
}
// Press the final key
const finalKey = keys[keys.length - 1];
if (finalKey) {
actions.sendKeys(finalKey);
}
// Release modifier keys in reverse order
for (const key of keys.slice(0, -1).reverse()) {
actions.keyUp(key);
}
// Perform the entire action chain at once
await actions.perform();
if (i < count - 1 && delay > 0) {
await new Promise(resolve => setTimeout(resolve, delay));
}
}
return {
content: [{
type: 'text',
text: JSON.stringify({
success: true,
keys: input.keys,
count: count,
selector: input.selector || null,
}, null, 2),
}],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: false,
error: errorMessage,
keys: input.keys,
}, null, 2),
}],
};
}
}
/**
* Focus an element
*/
export async function focusElement(input: {
selector: string;
scrollIntoView?: boolean;
}): Promise<{ content: Array<{ type: 'text'; text: string }> }> {
const driver = getVSCodeDriver();
const webDriver = await driver.getDriver();
const scrollIntoView = input.scrollIntoView !== false;
try {
const By = (await import('selenium-webdriver')).By;
const element = await webDriver.findElement(By.css(input.selector));
if (scrollIntoView) {
await webDriver.executeScript('arguments[0].scrollIntoView({ block: "center" });', element);
}
await webDriver.executeScript('arguments[0].focus();', element);
const isFocused = await webDriver.executeScript<boolean>(
'return document.activeElement === arguments[0];',
element
);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: true,
selector: input.selector,
focused: isFocused,
scrolledIntoView: scrollIntoView,
}, null, 2),
}],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: false,
error: errorMessage,
selector: input.selector,
}, null, 2),
}],
};
}
}
/**
* Scroll an element or the page
*/
export async function scroll(input: {
selector?: string;
direction: 'up' | 'down' | 'left' | 'right' | 'top' | 'bottom';
amount?: number;
smooth?: boolean;
}): Promise<{ content: Array<{ type: 'text'; text: string }> }> {
const driver = getVSCodeDriver();
const webDriver = await driver.getDriver();
const selector = input.selector || 'body';
const amount = input.amount || 300;
const smooth = input.smooth || false;
try {
const result = await webDriver.executeScript<string>(
`
const el = arguments[0] === 'body' ? document.documentElement : document.querySelector(arguments[0]);
if (!el) return JSON.stringify({ error: 'Element not found' });
const direction = arguments[1];
const amount = arguments[2];
const smooth = arguments[3];
const behavior = smooth ? 'smooth' : 'auto';
const beforeTop = el.scrollTop;
const beforeLeft = el.scrollLeft;
switch (direction) {
case 'up':
el.scrollBy({ top: -amount, behavior });
break;
case 'down':
el.scrollBy({ top: amount, behavior });
break;
case 'left':
el.scrollBy({ left: -amount, behavior });
break;
case 'right':
el.scrollBy({ left: amount, behavior });
break;
case 'top':
el.scrollTo({ top: 0, behavior });
break;
case 'bottom':
el.scrollTo({ top: el.scrollHeight, behavior });
break;
}
return JSON.stringify({
scrollTop: el.scrollTop,
scrollLeft: el.scrollLeft,
scrollHeight: el.scrollHeight,
scrollWidth: el.scrollWidth,
scrolledBy: {
top: el.scrollTop - beforeTop,
left: el.scrollLeft - beforeLeft
}
});
`,
selector,
input.direction,
amount,
smooth
);
const parsed = JSON.parse(result);
if (parsed.error) {
return {
content: [{
type: 'text',
text: JSON.stringify({
success: false,
error: parsed.error,
selector: selector,
}, null, 2),
}],
};
}
return {
content: [{
type: 'text',
text: JSON.stringify({
success: true,
selector: selector,
direction: input.direction,
...parsed,
}, null, 2),
}],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: false,
error: errorMessage,
selector: selector,
}, null, 2),
}],
};
}
}
/**
* Drag and drop an element to a target
*/
export async function dragDrop(input: {
sourceSelector: string;
targetSelector: string;
offsetX?: number;
offsetY?: number;
}): Promise<{ content: Array<{ type: 'text'; text: string }> }> {
const driver = getVSCodeDriver();
const webDriver = await driver.getDriver();
try {
const By = (await import('selenium-webdriver')).By;
const source = await webDriver.findElement(By.css(input.sourceSelector));
const target = await webDriver.findElement(By.css(input.targetSelector));
const actions = webDriver.actions({ async: true });
if (input.offsetX || input.offsetY) {
await actions
.dragAndDrop(source, { x: input.offsetX || 0, y: input.offsetY || 0 })
.perform();
} else {
await actions.dragAndDrop(source, target).perform();
}
return {
content: [{
type: 'text',
text: JSON.stringify({
success: true,
source: input.sourceSelector,
target: input.targetSelector,
}, null, 2),
}],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: false,
error: errorMessage,
source: input.sourceSelector,
target: input.targetSelector,
}, null, 2),
}],
};
}
}
/**
* Hover over an element to trigger hover effects
*/
export async function hover(input: {
selector: string;
duration?: number;
}): Promise<{ content: Array<{ type: 'text'; text: string }> }> {
const driver = getVSCodeDriver();
const webDriver = await driver.getDriver();
const duration = input.duration || 500;
try {
const By = (await import('selenium-webdriver')).By;
const element = await webDriver.findElement(By.css(input.selector));
const actions = webDriver.actions({ async: true });
await actions.move({ origin: element }).perform();
// Wait for hover effects
await new Promise(resolve => setTimeout(resolve, duration));
// Check for any tooltips or hover elements that appeared
const hoverInfo = await webDriver.executeScript<string>(
`
const tooltip = document.querySelector('.monaco-hover, .hover-contents, [role="tooltip"], .tippy-box');
if (tooltip) {
return JSON.stringify({
tooltipFound: true,
tooltipText: tooltip.textContent?.slice(0, 500),
tooltipClass: tooltip.className
});
}
return JSON.stringify({ tooltipFound: false });
`
);
const parsed = JSON.parse(hoverInfo);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: true,
selector: input.selector,
duration: duration,
...parsed,
}, null, 2),
}],
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{
type: 'text',
text: JSON.stringify({
success: false,
error: errorMessage,
selector: input.selector,
}, null, 2),
}],
};
}
}