Skip to main content
Glama
screen.ts12.9 kB
import { z } from 'zod'; import { exec } from 'child_process'; import { promisify } from 'util'; import fs from 'fs'; import { promises as fsPromises } from 'fs'; import path from 'path'; import os from 'os'; import { logAudit } from '../audit.js'; import { PowerShellSession } from '../utils/powerShellSession.js'; const execAsync = promisify(exec); // Schema definitions export const ScreenshotSchema = { region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number(), }).optional().describe('Optional region to capture. If not specified, captures entire screen.'), monitor: z.number().optional().describe('Monitor index (0-based). Default: primary monitor.'), format: z.enum(['png', 'jpg', 'base64']).optional().describe('Output format. Default: base64.'), savePath: z.string().optional().describe('If provided, saves screenshot to this path instead of returning base64.'), }; export const GetScreenInfoSchema = {}; export const WaitForScreenChangeSchema = { region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number(), }).optional().describe('Region to monitor for changes'), timeout: z.number().optional().describe('Timeout in milliseconds (default: 5000)'), threshold: z.number().optional().describe('Change threshold 0-1 (default: 0.1)'), }; export const FindOnScreenSchema = { text: z.string().optional().describe('Text to find on screen (uses OCR)'), image: z.string().optional().describe('Path to template image to find'), region: z.object({ x: z.number(), y: z.number(), width: z.number(), height: z.number(), }).optional().describe('Region to search within'), confidence: z.number().optional().describe('Match confidence 0-1 (default: 0.8)'), }; // Platform-specific screenshot implementation async function captureScreen(options: { region?: { x: number; y: number; width: number; height: number }; monitor?: number; savePath?: string; }): Promise<{ path: string; base64?: string }> { const platform = os.platform(); const tempPath = options.savePath || path.join(os.tmpdir(), `screenshot_${Date.now()}.png`); try { if (platform === 'win32') { // Use PowerShell for Windows screenshot const script = options.region ? ` Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing try { $bitmap = New-Object System.Drawing.Bitmap(${options.region.width}, ${options.region.height}) $graphics = [System.Drawing.Graphics]::FromImage($bitmap) $graphics.CopyFromScreen(${options.region.x}, ${options.region.y}, 0, 0, $bitmap.Size) $bitmap.Save('${tempPath.replace(/\\/g, '\\\\')}') } finally { if ($graphics) { $graphics.Dispose() } if ($bitmap) { $bitmap.Dispose() } } ` : ` Add-Type -AssemblyName System.Windows.Forms Add-Type -AssemblyName System.Drawing try { $screen = [System.Windows.Forms.Screen]::PrimaryScreen $bitmap = New-Object System.Drawing.Bitmap($screen.Bounds.Width, $screen.Bounds.Height) $graphics = [System.Drawing.Graphics]::FromImage($bitmap) $graphics.CopyFromScreen($screen.Bounds.Location, [System.Drawing.Point]::Empty, $screen.Bounds.Size) $bitmap.Save('${tempPath.replace(/\\/g, '\\\\')}') } finally { if ($graphics) { $graphics.Dispose() } if ($bitmap) { $bitmap.Dispose() } } `; await PowerShellSession.getInstance().execute(script); } else if (platform === 'darwin') { // macOS: use screencapture const regionArgs = options.region ? `-R${options.region.x},${options.region.y},${options.region.width},${options.region.height}` : ''; await execAsync(`screencapture ${regionArgs} -x "${tempPath}"`, { timeout: 10000 }); } else { // Linux: use scrot or gnome-screenshot const regionArgs = options.region ? `-a ${options.region.x},${options.region.y},${options.region.width},${options.region.height}` : ''; try { await execAsync(`scrot ${regionArgs} "${tempPath}"`, { timeout: 10000 }); } catch { await execAsync(`gnome-screenshot -f "${tempPath}"`, { timeout: 10000 }); } } return { path: tempPath }; } catch (error: any) { throw new Error(`Screenshot failed: ${error.message}`); } } // Get screen/display information async function getDisplayInfo(): Promise<any[]> { const platform = os.platform(); if (platform === 'win32') { const script = ` Add-Type -AssemblyName System.Windows.Forms [System.Windows.Forms.Screen]::AllScreens | ForEach-Object { @{ DeviceName = $_.DeviceName Primary = $_.Primary Bounds = @{ X = $_.Bounds.X Y = $_.Bounds.Y Width = $_.Bounds.Width Height = $_.Bounds.Height } WorkingArea = @{ X = $_.WorkingArea.X Y = $_.WorkingArea.Y Width = $_.WorkingArea.Width Height = $_.WorkingArea.Height } } } | ConvertTo-Json `; const stdout = await PowerShellSession.getInstance().execute(script); const result = JSON.parse(stdout); return Array.isArray(result) ? result : [result]; } else if (platform === 'darwin') { const { stdout } = await execAsync(`system_profiler SPDisplaysDataType -json`, { timeout: 5000 }); const data = JSON.parse(stdout); return data.SPDisplaysDataType?.[0]?.spdisplays_ndrvs || []; } else { const { stdout } = await execAsync(`xrandr --query`, { timeout: 5000 }); const displays: any[] = []; const regex = /(\S+) connected(?: primary)? (\d+)x(\d+)\+(\d+)\+(\d+)/g; let match; while ((match = regex.exec(stdout)) !== null) { displays.push({ name: match[1], width: parseInt(match[2]), height: parseInt(match[3]), x: parseInt(match[4]), y: parseInt(match[5]), }); } return displays; } } // Tool handlers export async function handleScreenshot(args: { region?: { x: number; y: number; width: number; height: number }; monitor?: number; format?: 'png' | 'jpg' | 'base64'; savePath?: string; }) { try { const format = args.format || 'base64'; const result = await captureScreen({ region: args.region, monitor: args.monitor, savePath: args.savePath, }); await logAudit('screenshot', args, 'success'); if (args.savePath) { return { content: [{ type: 'text', text: JSON.stringify({ saved: true, path: result.path }, null, 2) }], }; } if (format === 'base64') { const imageBuffer = fs.readFileSync(result.path); const base64 = imageBuffer.toString('base64'); // Clean up temp file fs.unlinkSync(result.path); return { content: [ { type: 'image', data: base64, mimeType: 'image/png', }, { type: 'text', text: JSON.stringify({ width: args.region?.width || 'full', height: args.region?.height || 'full', format: 'base64/png' }) } ], }; } return { content: [{ type: 'text', text: JSON.stringify({ path: result.path }, null, 2) }], }; } catch (error: any) { await logAudit('screenshot', args, null, error.message); return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true, }; } } export async function handleGetScreenInfo() { try { const displays = await getDisplayInfo(); await logAudit('get_screen_info', {}, 'success'); return { content: [{ type: 'text', text: JSON.stringify({ platform: os.platform(), displayCount: displays.length, displays }, null, 2) }], }; } catch (error: any) { await logAudit('get_screen_info', {}, null, error.message); return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true, }; } } export async function handleWaitForScreenChange(args: { region?: { x: number; y: number; width: number; height: number }; timeout?: number; threshold?: number; }) { try { const timeout = args.timeout || 5000; const startTime = Date.now(); // Take initial screenshot const initial = await captureScreen({ region: args.region }); const initialBuffer = fs.readFileSync(initial.path); fs.unlinkSync(initial.path); // Poll for changes while (Date.now() - startTime < timeout) { await new Promise(resolve => setTimeout(resolve, 100)); const current = await captureScreen({ region: args.region }); const currentBuffer = fs.readFileSync(current.path); fs.unlinkSync(current.path); // Simple byte comparison (could be improved with image diff) if (!initialBuffer.equals(currentBuffer)) { const elapsed = Date.now() - startTime; await logAudit('wait_for_screen_change', args, { changed: true, elapsed }); return { content: [{ type: 'text', text: JSON.stringify({ changed: true, elapsed_ms: elapsed }, null, 2) }], }; } } await logAudit('wait_for_screen_change', args, { changed: false, timeout: true }); return { content: [{ type: 'text', text: JSON.stringify({ changed: false, timeout: true, elapsed_ms: timeout }, null, 2) }], }; } catch (error: any) { await logAudit('wait_for_screen_change', args, null, error.message); return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true, }; } } export async function handleFindOnScreen(args: { text?: string; image?: string; region?: { x: number; y: number; width: number; height: number }; confidence?: number; }) { try { // This is a placeholder - real implementation would need OCR (tesseract) or template matching (opencv) // For now, we'll indicate the capability exists but needs additional dependencies await logAudit('find_on_screen', args, 'not_implemented'); return { content: [{ type: 'text', text: JSON.stringify({ status: 'requires_dependencies', message: 'OCR/template matching requires additional dependencies (tesseract for text, opencv for images). Use screenshot + external OCR as alternative.', suggestion: args.text ? 'For text finding, take a screenshot and use an external OCR service or library.' : 'For image finding, take a screenshot and use template matching with an image processing library.' }, null, 2) }], }; } catch (error: any) { await logAudit('find_on_screen', args, null, error.message); return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true, }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Mnehmos/mnehmos.ooda.mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server