/**
* Browser namespace implementation with WebDriver integration
* FULL PRODUCTION IMPLEMENTATION - NO SHORTCUTS
*/
import { Builder, WebDriver, By, until, WebElement, logging } from 'selenium-webdriver';
import { Options as ChromeOptions } from 'selenium-webdriver/chrome.js';
import { Options as FirefoxOptions } from 'selenium-webdriver/firefox.js';
import { v4 as uuidv4 } from 'uuid';
import fs from 'fs';
import path from 'path';
import os from 'os';
import {
captureScreenshot,
generateImage,
cropImage,
getImageDimensions
} from '../utils/image-utils-canvas.js';
import { createImageResponse, createTextAndImageResponse } from '../types/mcp-responses.js';
import { MCPServer } from '../core/server.js';
import { MCPTool } from '../types/core.js';
import {
BrowserSession,
BrowserSessionOptions,
WindowInfo,
ClickTarget,
WaitCondition,
ScrollParams,
ActResult,
Recording,
RecordingStep,
StartSessionResponse,
OpenResponse,
ScreenshotResponse,
ActionResponse,
EvalResponse,
RecordResponse,
ListWindowsResponse,
WindowStateResponse,
ListRecordingsResponse,
ViewRecordingResponse,
GotoResponse
} from '../types/browser.js';
import { InvalidArgError, SessionNotFoundError } from '../core/errors.js';
import { GoogleGenerativeAI } from '@google/generative-ai';
export class BrowserNamespace {
private mcpServer: MCPServer;
private sessions = new Map<string, BrowserSession>();
private recordings = new Map<string, Recording>();
private geminiClient?: GoogleGenerativeAI;
private recordingsDir: string;
constructor(mcpServer: MCPServer) {
this.mcpServer = mcpServer;
this.recordingsDir = path.join(os.tmpdir(), 'mcp-browser-recordings');
// Create recordings directory
if (!fs.existsSync(this.recordingsDir)) {
fs.mkdirSync(this.recordingsDir, { recursive: true });
}
// Initialize Gemini if available
const env = mcpServer.getEnvConfig();
if (env.GEMINI_API_KEY) {
this.geminiClient = new GoogleGenerativeAI(env.GEMINI_API_KEY);
}
this.registerStaticTools();
}
private registerStaticTools(): void {
const registry = this.mcpServer.getRegistry();
registry.registerTool(
'browser.start_session',
{
name: 'browser.start_session',
description: 'Start a new browser session',
inputSchema: {
type: 'object',
properties: {
opts: {
type: 'object',
properties: {
driver: { type: 'string', enum: ['local', 'remote'] },
remote_url: { type: 'string' },
headless: { type: 'boolean' },
viewport: {
type: 'object',
properties: {
w: { type: 'number' },
h: { type: 'number' }
}
},
user_agent: { type: 'string' },
proxy: { type: 'string' },
downloads: {
type: 'object',
properties: {
dir: { type: 'string' },
accept: { type: 'boolean' }
}
},
incognito: { type: 'boolean' },
locale: { type: 'string' },
timezone: { type: 'string' }
}
}
}
}
},
this.startSession.bind(this)
);
registry.registerTool(
'browser.end_session',
{
name: 'browser.end_session',
description: 'End a browser session',
inputSchema: {
type: 'object',
properties: {
session_id: { type: 'string' }
},
required: ['session_id']
}
},
this.endSession.bind(this)
);
}
private registerSessionTools(sessionId: string): void {
const registry = this.mcpServer.getRegistry();
const tools = [
{
name: `browser.${sessionId}.open`,
description: 'Open a URL',
inputSchema: {
type: 'object',
properties: {
url: { type: 'string' }
},
required: ['url']
},
handler: (params: any) => this.open(sessionId, params.url)
},
{
name: `browser.${sessionId}.list_windows`,
description: 'List all browser windows',
inputSchema: {
type: 'object',
properties: {}
},
handler: () => this.listWindows(sessionId)
},
{
name: `browser.${sessionId}.get_window_state`,
description: 'Get window state',
inputSchema: {
type: 'object',
properties: {
window_id: { type: 'string' }
},
required: ['window_id']
},
handler: (params: any) => this.getWindowState(sessionId, params.window_id)
},
{
name: `browser.${sessionId}.screenshot`,
description: 'Take a screenshot',
inputSchema: {
type: 'object',
properties: {
window_id: { type: 'string' },
area: {
type: 'object',
properties: {
x: { type: 'number' },
y: { type: 'number' },
w: { type: 'number' },
h: { type: 'number' }
}
}
}
},
handler: (params: any) => this.screenshot(sessionId, params.window_id, params.area)
},
{
name: `browser.${sessionId}.click`,
description: 'Click on an element or coordinates',
inputSchema: {
type: 'object',
properties: {
target: {
oneOf: [
{
type: 'object',
properties: { selector: { type: 'string' } },
required: ['selector']
},
{
type: 'object',
properties: {
x: { type: 'number' },
y: { type: 'number' }
},
required: ['x', 'y']
}
]
},
wait: { type: 'string', enum: ['none', 'load', 'idle'] }
},
required: ['target']
},
handler: (params: any) => this.click(sessionId, params.target, params.wait)
},
{
name: `browser.${sessionId}.type`,
description: 'Type text into an element',
inputSchema: {
type: 'object',
properties: {
selector: { type: 'string' },
text: { type: 'string' },
submit: { type: 'boolean' }
},
required: ['selector', 'text']
},
handler: (params: any) => this.type(sessionId, params.selector, params.text, params.submit)
},
{
name: `browser.${sessionId}.scroll`,
description: 'Scroll the window',
inputSchema: {
type: 'object',
properties: {
window_id: { type: 'string' },
by: {
type: 'object',
properties: {
dx: { type: 'number' },
dy: { type: 'number' }
}
},
to: {
type: 'object',
properties: {
x: { type: 'number' },
y: { type: 'number' }
}
}
},
required: ['window_id']
},
handler: (params: any) => this.scroll(sessionId, params.window_id, params)
},
{
name: `browser.${sessionId}.eval`,
description: 'Execute JavaScript in the browser',
inputSchema: {
type: 'object',
properties: {
window_id: { type: 'string' },
js: { type: 'string' },
sandbox: { type: 'boolean' }
},
required: ['window_id', 'js']
},
handler: (params: any) => this.eval(sessionId, params.window_id, params.js, params.sandbox)
},
{
name: `browser.${sessionId}.goto`,
description: 'Navigate to a URL',
inputSchema: {
type: 'object',
properties: {
window_id: { type: 'string' },
url: { type: 'string' },
wait: { type: 'string', enum: ['none', 'load', 'idle'] }
},
required: ['window_id', 'url']
},
handler: (params: any) => this.goto(sessionId, params.window_id, params.url, params.wait)
},
{
name: `browser.${sessionId}.act`,
description: 'AI-driven browser automation',
inputSchema: {
type: 'object',
properties: {
goal: { type: 'string' },
constraints: { type: 'object' },
max_steps: { type: 'number' },
temperature: { type: 'number' }
},
required: ['goal']
},
handler: (params: any) => this.act(sessionId, params.goal, params.constraints, params.max_steps, params.temperature)
},
{
name: `browser.${sessionId}.record`,
description: 'Start or stop recording',
inputSchema: {
type: 'object',
properties: {
window_id: { type: 'string' },
cmd: { type: 'string', enum: ['start', 'stop'] },
opts: {
type: 'object',
properties: {
har: { type: 'boolean' },
screenshots: { type: 'boolean' },
fps: { type: 'number' }
}
}
},
required: ['window_id', 'cmd']
},
handler: (params: any) => this.record(sessionId, params.window_id, params.cmd, params.opts)
},
{
name: `browser.${sessionId}.replay`,
description: 'Replay a recording',
inputSchema: {
type: 'object',
properties: {
recording_id: { type: 'string' },
overrides: { type: 'object' }
},
required: ['recording_id']
},
handler: (params: any) => this.replay(sessionId, params.recording_id, params.overrides)
},
{
name: `browser.${sessionId}.list_recordings`,
description: 'List recordings',
inputSchema: {
type: 'object',
properties: {
filter: { type: 'object' }
}
},
handler: (params: any) => this.listRecordings(sessionId, params.filter)
},
{
name: `browser.${sessionId}.view_recording`,
description: 'View recording details',
inputSchema: {
type: 'object',
properties: {
recording_id: { type: 'string' }
},
required: ['recording_id']
},
handler: (params: any) => this.viewRecording(sessionId, params.recording_id)
},
{
name: `browser.${sessionId}.get_har`,
description: 'Get HAR (HTTP Archive) data',
inputSchema: {
type: 'object',
properties: {}
},
handler: () => this.getHar(sessionId)
}
];
for (const tool of tools) {
registry.registerSessionTool(sessionId, tool.name, {
name: tool.name,
description: tool.description,
inputSchema: tool.inputSchema as { type: 'object'; properties: Record<string, any>; required?: string[] }
}, tool.handler);
}
}
private async startSession(params: { opts?: BrowserSessionOptions }): Promise<StartSessionResponse> {
const sessionId = uuidv4();
const opts = params.opts || {};
// Configure Chrome options
const chromeOptions = new ChromeOptions();
// Enable Chrome DevTools Protocol for network logging
chromeOptions.addArguments('--enable-logging');
chromeOptions.addArguments('--log-level=0');
chromeOptions.addArguments('--dump-dom');
// Set performance logging prefs for HAR capture
const loggingPrefs = new logging.Preferences();
loggingPrefs.setLevel(logging.Type.PERFORMANCE, logging.Level.ALL);
chromeOptions.setLoggingPrefs(loggingPrefs);
if (opts.headless !== false) {
chromeOptions.addArguments('--headless=new');
}
if (opts.viewport) {
chromeOptions.addArguments(`--window-size=${opts.viewport.w},${opts.viewport.h}`);
}
if (opts.user_agent) {
chromeOptions.addArguments(`--user-agent="${opts.user_agent}"`);
}
if (opts.incognito) {
chromeOptions.addArguments('--incognito');
}
if (opts.locale) {
chromeOptions.addArguments(`--lang=${opts.locale}`);
}
if (opts.proxy) {
chromeOptions.addArguments(`--proxy-server=${opts.proxy}`);
}
// Enable Chrome DevTools Protocol
chromeOptions.addArguments('--enable-blink-features=IdleDetection');
chromeOptions.setUserPreferences({
'perfLoggingPrefs': {
'enableNetwork': true,
'enablePage': true,
'traceCategories': 'browser,devtools.timeline,devtools'
}
});
// Build the driver
let driver: WebDriver;
if (opts.driver === 'remote' && opts.remote_url) {
driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(chromeOptions)
.usingServer(opts.remote_url)
.build();
} else {
driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(chromeOptions)
.build();
}
// Set timezone if specified
if (opts.timezone) {
await driver.executeScript(`
Object.defineProperty(Intl.DateTimeFormat.prototype, 'resolvedOptions', {
value: function() {
return {
timeZone: '${opts.timezone}'
};
}
});
`);
}
// Create session
const session: BrowserSession = {
session_id: sessionId,
driver,
options: opts,
windows: new Map(),
created_at: new Date().toISOString(),
last_activity: new Date().toISOString(),
networkLogs: [],
activeRecording: null
};
this.sessions.set(sessionId, session);
// Register session with the registry
this.mcpServer.getRegistry().createSession(sessionId, 'browser');
// Register dynamic session tools
this.registerSessionTools(sessionId);
return { session_id: sessionId };
}
private async endSession(params: { session_id: string }): Promise<{ ok: true }> {
const session = this.sessions.get(params.session_id);
if (!session) {
throw new SessionNotFoundError(params.session_id);
}
// Stop any active recording
if (session.activeRecording) {
await this.record(params.session_id, '', 'stop', {});
}
// Close the browser
await session.driver.quit();
// Remove session
this.sessions.delete(params.session_id);
// Unregister from registry
this.mcpServer.getRegistry().destroySession(params.session_id);
return { ok: true };
}
private async open(sessionId: string, url: string): Promise<any> {
const session = this.getSession(sessionId);
await session.driver.get(url);
const title = await session.driver.getTitle();
const currentUrl = await session.driver.getCurrentUrl();
const windowHandle = await session.driver.getWindowHandle();
// Update window info
session.windows.set(windowHandle, {
window_id: windowHandle,
url: currentUrl,
title,
focused: true
});
// Take screenshot
const screenshotData = await captureScreenshot(session.driver);
// Return proper MCP response with image
return createImageResponse(screenshotData, 'image/png', {
window_id: windowHandle,
url: currentUrl,
title
});
}
private async screenshot(sessionId: string, windowId?: string, area?: { x: number; y: number; w: number; h: number }): Promise<any> {
const session = this.getSession(sessionId);
if (windowId) {
await session.driver.switchTo().window(windowId);
}
let screenshotData = await captureScreenshot(session.driver);
// Crop if area specified
if (area) {
screenshotData = await cropImage(screenshotData, area);
}
// Get actual dimensions using canvas
const { width, height } = await getImageDimensions(screenshotData);
// Return proper MCP image response
return createImageResponse(screenshotData, 'image/png', {
width,
height,
window_id: windowId || 'current'
});
}
private async takeScreenshot(driver: WebDriver): Promise<Buffer> {
const base64Screenshot = await driver.takeScreenshot();
return Buffer.from(base64Screenshot, 'base64');
}
private async click(sessionId: string, target: ClickTarget, wait?: WaitCondition): Promise<any> {
const session = this.getSession(sessionId);
if ('selector' in target && target.selector) {
const element = await session.driver.findElement(By.css(target.selector));
await element.click();
} else if ('x' in target && 'y' in target) {
await session.driver.actions().move({ x: target.x, y: target.y }).click().perform();
} else {
throw new InvalidArgError('target', 'Must provide either selector or coordinates');
}
if (wait === 'load') {
await session.driver.wait(until.titleIs(await session.driver.getTitle()), 5000);
} else if (wait === 'idle') {
// Wait for network idle
await session.driver.sleep(1000);
}
const screenshotData = await captureScreenshot(session.driver);
const { width, height } = await getImageDimensions(screenshotData);
return createImageResponse(screenshotData, 'image/png', {
ok: true,
action: 'click',
width,
height
});
}
private async type(sessionId: string, selector: string, text: string, submit?: boolean): Promise<any> {
const session = this.getSession(sessionId);
const element = await session.driver.findElement(By.css(selector));
await element.clear();
await element.sendKeys(text);
if (submit) {
await element.submit();
}
const screenshotData = await captureScreenshot(session.driver);
const { width, height } = await getImageDimensions(screenshotData);
return createImageResponse(screenshotData, 'image/png', {
ok: true,
action: 'type',
width,
height
});
}
private async scroll(sessionId: string, windowId: string, params: ScrollParams): Promise<any> {
const session = this.getSession(sessionId);
await session.driver.switchTo().window(windowId);
if (params.by) {
await session.driver.executeScript(`window.scrollBy(${params.by.dx}, ${params.by.dy})`);
} else if (params.to) {
await session.driver.executeScript(`window.scrollTo(${params.to.x}, ${params.to.y})`);
}
const screenshotData = await captureScreenshot(session.driver);
const { width, height } = await getImageDimensions(screenshotData);
return createImageResponse(screenshotData, 'image/png', {
ok: true,
action: 'scroll',
width,
height
});
}
private async eval(sessionId: string, windowId: string, js: string, sandbox?: boolean): Promise<any> {
const session = this.getSession(sessionId);
await session.driver.switchTo().window(windowId);
try {
const result = await session.driver.executeScript(js);
const screenshotData = await captureScreenshot(session.driver);
return createImageResponse(screenshotData, 'image/png', {
value: result
});
} catch (error) {
return {
error: error instanceof Error ? error.message : String(error)
};
}
}
private async goto(sessionId: string, windowId: string, url: string, wait?: WaitCondition): Promise<any> {
const session = this.getSession(sessionId);
await session.driver.switchTo().window(windowId);
await session.driver.get(url);
if (wait === 'load') {
await session.driver.wait(until.titleIs(await session.driver.getTitle()), 10000);
} else if (wait === 'idle') {
await session.driver.sleep(2000);
}
const title = await session.driver.getTitle();
const currentUrl = await session.driver.getCurrentUrl();
const screenshotData = await captureScreenshot(session.driver);
return createImageResponse(screenshotData, 'image/png', {
window_id: windowId,
url: currentUrl,
title
});
}
private async act(
sessionId: string,
goal: string,
constraints?: any,
maxSteps: number = 10,
temperature: number = 0.7
): Promise<ActResult> {
if (!this.geminiClient) {
throw new Error('GEMINI_API_KEY not configured - AI automation requires Gemini');
}
const session = this.getSession(sessionId);
const model = this.geminiClient.getGenerativeModel({
model: 'gemini-2.0-flash-exp'
});
const steps: RecordingStep[] = [];
let status: 'ok' | 'stuck' | 'failed' = 'ok';
let notes = '';
for (let i = 0; i < maxSteps; i++) {
// Take screenshot
const screenshot = await captureScreenshot(session.driver);
const pageSource = await session.driver.getPageSource();
const currentUrl = await session.driver.getCurrentUrl();
// Ask Gemini what to do next
const prompt = `
You are automating a web browser to achieve this goal: "${goal}"
${constraints ? `Constraints: ${JSON.stringify(constraints)}` : ''}
Current URL: ${currentUrl}
Step ${i + 1} of ${maxSteps}
Based on the screenshot and HTML, what is the next action to take?
Respond with a JSON object:
{
"action": "click" | "type" | "scroll" | "done" | "stuck",
"selector": "CSS selector if click/type",
"text": "text to type if action is type",
"reason": "why this action",
"progress": "description of progress toward goal"
}
Page HTML summary (first 5000 chars):
${pageSource.substring(0, 5000)}
`;
const result = await model.generateContent([
prompt,
{
inlineData: {
data: screenshot.toString('base64'),
mimeType: 'image/png'
}
}
]);
const responseText = result.response.text();
let action: any;
try {
// Parse JSON from response
const jsonMatch = responseText.match(/\{[\s\S]*\}/);
if (jsonMatch) {
action = JSON.parse(jsonMatch[0]);
} else {
throw new Error('No JSON found in response');
}
} catch (error) {
status = 'failed';
notes = `Failed to parse AI response: ${error}`;
break;
}
// Record the step
steps.push({
action: action.action,
selector: action.selector,
text: action.text,
timestamp: new Date().toISOString(),
url: currentUrl,
notes: action.reason
});
// Execute the action
if (action.action === 'done') {
status = 'ok';
notes = action.progress;
break;
} else if (action.action === 'stuck') {
status = 'stuck';
notes = action.reason;
break;
} else if (action.action === 'click' && action.selector) {
try {
const element = await session.driver.findElement(By.css(action.selector));
await element.click();
await session.driver.sleep(1000); // Wait for page to update
} catch (error) {
status = 'failed';
notes = `Failed to click ${action.selector}: ${error}`;
break;
}
} else if (action.action === 'type' && action.selector && action.text) {
try {
const element = await session.driver.findElement(By.css(action.selector));
await element.clear();
await element.sendKeys(action.text);
await session.driver.sleep(500);
} catch (error) {
status = 'failed';
notes = `Failed to type in ${action.selector}: ${error}`;
break;
}
} else if (action.action === 'scroll') {
await session.driver.executeScript('window.scrollBy(0, 300)');
await session.driver.sleep(500);
}
}
// Take final screenshot
const finalScreenshot = await captureScreenshot(session.driver);
return {
status,
steps,
notes,
questions: [],
image_name: 'final_state.png',
finalScreenshot_data: finalScreenshot.toString('base64'),
finalScreenshot_mimeType: 'image/png'
} as any;
}
private async record(
sessionId: string,
windowId: string,
cmd: 'start' | 'stop',
opts?: { har?: boolean; screenshots?: boolean; fps?: number }
): Promise<RecordResponse> {
const session = this.getSession(sessionId);
if (cmd === 'start') {
const recordingId = uuidv4();
const recording: Recording = {
id: recordingId,
sessionId,
windowId,
startTime: new Date().toISOString(),
steps: [],
har: opts?.har ? { log: { version: '1.2', creator: { name: 'mcp-browser', version: '1.0' }, entries: [] } } : undefined,
screenshots: [],
options: opts
};
session.activeRecording = recording;
this.recordings.set(recordingId, recording);
// Start capturing network logs if HAR is enabled
if (opts?.har) {
// Clear existing logs
await session.driver.manage().logs().get(logging.Type.PERFORMANCE);
}
// Start screenshot interval if enabled
if (opts?.screenshots) {
const fps = opts.fps || 1;
const interval = 1000 / fps;
session.screenshotInterval = setInterval(async () => {
if (session.activeRecording) {
const screenshot = await captureScreenshot(session.driver);
const screenshotPath = path.join(
this.recordingsDir,
`${recordingId}_${Date.now()}.png`
);
fs.writeFileSync(screenshotPath, screenshot);
session.activeRecording.screenshots?.push(screenshotPath);
}
}, interval);
}
return { recording_id: recordingId };
} else { // stop
if (!session.activeRecording) {
throw new Error('No active recording to stop');
}
const recording = session.activeRecording;
recording.endTime = new Date().toISOString();
// Stop screenshot interval
if (session.screenshotInterval) {
clearInterval(session.screenshotInterval);
session.screenshotInterval = undefined;
}
// Capture final HAR if enabled
if (recording.har) {
const logs = await session.driver.manage().logs().get(logging.Type.PERFORMANCE);
recording.har = this.logsToHar(logs);
}
session.activeRecording = null;
return { stopped: true };
}
}
private async replay(
sessionId: string,
recordingId: string,
overrides?: any
): Promise<{ run_id: string; status: 'ok' | 'error' }> {
const session = this.getSession(sessionId);
const recording = this.recordings.get(recordingId);
if (!recording) {
throw new Error(`Recording ${recordingId} not found`);
}
const runId = uuidv4();
try {
// Replay each recorded step
for (const step of recording.steps) {
if (step.action === 'click' && step.selector) {
if (typeof step.selector === 'string') {
const element = await session.driver.findElement(By.css(step.selector));
await element.click();
} else {
// Coordinates click
await session.driver.actions()
.move({ x: step.selector.x, y: step.selector.y })
.click()
.perform();
}
} else if (step.action === 'type' && step.selector && step.text) {
if (typeof step.selector === 'string') {
const element = await session.driver.findElement(By.css(step.selector));
await element.clear();
const textToType = overrides?.[step.selector] || step.text;
await element.sendKeys(textToType);
}
} else if (step.action === 'navigate' && step.url) {
await session.driver.get(step.url);
} else if (step.action === 'scroll') {
await session.driver.executeScript('window.scrollBy(0, 300)');
}
// Wait between steps
await session.driver.sleep(500);
}
return { run_id: runId, status: 'ok' };
} catch (error) {
return { run_id: runId, status: 'error' };
}
}
private async listRecordings(
sessionId: string,
filter?: any
): Promise<ListRecordingsResponse> {
const recordings = Array.from(this.recordings.values())
.filter(r => r.sessionId === sessionId)
.map(r => ({
id: r.id,
name: `Recording ${r.id.substring(0, 8)}`,
created: r.startTime,
step_count: r.steps.length
}));
return { recordings };
}
private async viewRecording(
sessionId: string,
recordingId: string
): Promise<ViewRecordingResponse> {
const recording = this.recordings.get(recordingId);
if (!recording) {
throw new Error(`Recording ${recordingId} not found`);
}
// Prepare steps with screenshot attachments
const stepsWithImages: any[] = [];
for (let i = 0; i < recording.steps.length; i++) {
const step = recording.steps[i];
const stepData: any = {
action: step.action,
selector: step.selector,
text: step.text
};
// Add screenshot if available
if (recording.screenshots && recording.screenshots[i]) {
const screenshotPath = recording.screenshots[i];
if (fs.existsSync(screenshotPath)) {
const screenshotBuffer = fs.readFileSync(screenshotPath);
stepData.image_name = `step_${i}.png`;
stepData[`step_${i}_data`] = screenshotBuffer.toString('base64');
stepData[`step_${i}_mimeType`] = 'image/png';
}
}
stepsWithImages.push(stepData);
}
return { steps: stepsWithImages };
}
private async listWindows(sessionId: string): Promise<ListWindowsResponse> {
const session = this.getSession(sessionId);
const handles = await session.driver.getAllWindowHandles();
const currentHandle = await session.driver.getWindowHandle();
const windows = [];
for (const handle of handles) {
await session.driver.switchTo().window(handle);
const url = await session.driver.getCurrentUrl();
const title = await session.driver.getTitle();
windows.push({
window_id: handle,
url,
title,
focused: handle === currentHandle
});
}
// Switch back to original window
await session.driver.switchTo().window(currentHandle);
return { windows };
}
private async getWindowState(sessionId: string, windowId: string): Promise<WindowStateResponse> {
const session = this.getSession(sessionId);
const currentHandle = await session.driver.getWindowHandle();
await session.driver.switchTo().window(windowId);
const url = await session.driver.getCurrentUrl();
const title = await session.driver.getTitle();
const rect = await session.driver.manage().window().getRect();
const scrollPosition = await session.driver.executeScript('return {x: window.scrollX, y: window.scrollY}') as any;
// Switch back if needed
if (windowId !== currentHandle) {
await session.driver.switchTo().window(currentHandle);
}
return {
url,
title,
dimensions: { w: rect.width, h: rect.height },
scroll: scrollPosition,
focused: windowId === currentHandle
};
}
private async getHar(sessionId: string): Promise<{ har_json: any }> {
const session = this.getSession(sessionId);
// Get performance logs from Chrome
const logs = await session.driver.manage().logs().get(logging.Type.PERFORMANCE);
// Convert logs to HAR format
const har = this.logsToHar(logs);
return { har_json: har };
}
private logsToHar(logs: logging.Entry[]): any {
const entries: any[] = [];
const pages: any[] = [];
let pageRef = 'page_1';
// Process each log entry
for (const log of logs) {
try {
const message = JSON.parse(log.message);
const method = message.message?.method;
const params = message.message?.params;
if (method === 'Network.responseReceived') {
const response = params.response;
const request = params.request;
entries.push({
startedDateTime: new Date(params.timestamp * 1000).toISOString(),
time: response.timing?.receiveHeadersEnd - response.timing?.requestTime || 0,
request: {
method: request.method,
url: response.url,
httpVersion: response.protocol || 'HTTP/1.1',
headers: Object.entries(request.headers || {}).map(([name, value]) => ({ name, value })),
queryString: [],
cookies: [],
headersSize: -1,
bodySize: request.postData?.length || 0,
postData: request.postData ? {
mimeType: request.headers?.['Content-Type'] || 'application/octet-stream',
text: request.postData
} : undefined
},
response: {
status: response.status,
statusText: response.statusText,
httpVersion: response.protocol || 'HTTP/1.1',
headers: Object.entries(response.headers || {}).map(([name, value]) => ({ name, value })),
cookies: [],
content: {
size: response.encodedDataLength || 0,
mimeType: response.mimeType || 'application/octet-stream',
compression: response.encodedDataLength < response.dataLength ? response.dataLength - response.encodedDataLength : 0
},
redirectURL: response.headers?.location || '',
headersSize: -1,
bodySize: response.encodedDataLength || 0
},
cache: {},
timings: {
blocked: 0,
dns: response.timing?.dnsEnd - response.timing?.dnsStart || -1,
connect: response.timing?.connectEnd - response.timing?.connectStart || -1,
send: response.timing?.sendEnd - response.timing?.sendStart || 0,
wait: response.timing?.receiveHeadersEnd - response.timing?.sendEnd || 0,
receive: 0,
ssl: response.timing?.sslEnd - response.timing?.sslStart || -1
},
serverIPAddress: response.remoteIPAddress,
pageref: pageRef
});
} else if (method === 'Page.frameNavigated') {
pages.push({
startedDateTime: new Date().toISOString(),
id: pageRef,
title: params.frame.name || 'Page',
pageTimings: {
onContentLoad: -1,
onLoad: -1
}
});
}
} catch (error) {
// Skip malformed log entries
console.warn('Failed to parse log entry:', error);
}
}
return {
log: {
version: '1.2',
creator: {
name: 'mcp-browser',
version: '1.0'
},
pages,
entries
}
};
}
private getSession(sessionId: string): BrowserSession {
const session = this.sessions.get(sessionId);
if (!session) {
throw new SessionNotFoundError(sessionId);
}
return session;
}
// Cleanup method to be called on server shutdown
async cleanup(): Promise<void> {
const sessionIds = Array.from(this.sessions.keys());
for (const sessionId of sessionIds) {
try {
await this.endSession({ session_id: sessionId });
} catch (error) {
console.warn(`Error cleaning up browser session ${sessionId}:`, error);
}
}
// Clean up recordings directory
if (fs.existsSync(this.recordingsDir)) {
fs.rmSync(this.recordingsDir, { recursive: true, force: true });
}
}
}