Peekaboo MCP

Overview Inspect Schema Related Servers Score Discussions

MIT License

7,465

820

Peekaboo
Apps
CLI
Tests
CLIAutomationTests

ImageCommandTests.swift•27.2 kB

import Foundation import PeekabooCore import PeekabooFoundation import Testing @testable import PeekabooCLI #if !PEEKABOO_SKIP_AUTOMATION @Suite("ImageCommand Tests", .serialized, .tags(.imageCapture, .unit)) @MainActor struct ImageCommandTests { // MARK: - Test Data & Helpers private static let validFormats: [ImageFormat] = [.png, .jpg] private static let validCaptureModes: [CaptureMode] = [.screen, .window, .multi] private static let validCaptureFocus: [CaptureFocus] = [.background, .foreground] private static func createTestCommand(_ args: [String] = []) throws -> ImageCommand { try ImageCommand.parse(args) } // MARK: - Command Parsing Tests @Test("Basic command parsing with defaults", .tags(.fast)) func imageCommandParsing() throws { // Test basic command parsing let command = try ImageCommand.parse([]) // Verify defaults #expect(command.mode == nil) #expect(command.format == .png) #expect(command.path == nil) #expect(command.app == nil) #expect(command.captureFocus == .auto) #expect(command.retina == false) #expect(command.jsonOutput == false) } @Test("Command with screen mode", .tags(.fast)) func imageCommandWithScreenMode() throws { // Test screen capture mode let command = try ImageCommand.parse(["--mode", "screen"]) #expect(command.mode == .screen) } @Test("Command with app specifier", .tags(.fast)) func imageCommandWithAppSpecifier() throws { // Test app-specific capture let command = try ImageCommand.parse([ "--app", "Finder", ]) #expect(command.mode == nil) // mode is optional #expect(command.app == "Finder") } @Test("Command with PID specifier", .tags(.fast)) func imageCommandWithPIDSpecifier() throws { // Test PID-specific capture let command = try ImageCommand.parse([ "--app", "PID:1234", ]) #expect(command.mode == nil) // mode is optional #expect(command.app == "PID:1234") } @Test("Command with window title", .tags(.fast)) func imageCommandWithWindowTitle() throws { // Test window title capture let command = try ImageCommand.parse([ "--window-title", "Documents", ]) #expect(command.windowTitle == "Documents") } @Test("Command with output path", .tags(.fast)) func imageCommandWithOutput() throws { // Test output path specification let outputPath = "/tmp/test-images" let command = try ImageCommand.parse([ "--path", outputPath, ]) #expect(command.path == outputPath) } @Test("Command with format option", .tags(.fast)) func imageCommandWithFormat() throws { // Test format specification let command = try ImageCommand.parse([ "--format", "jpg", ]) #expect(command.format == .jpg) } @Test("Command with focus option", .tags(.fast)) func imageCommandWithFocus() throws { // Test focus option let command = try ImageCommand.parse([ "--capture-focus", "foreground", ]) #expect(command.captureFocus == .foreground) } @Test("Command with JSON output", .tags(.fast)) func imageCommandWithJSONOutput() throws { // Test JSON output flag let command = try ImageCommand.parse([ "--json-output", ]) #expect(command.jsonOutput == true) } @Test("Command with multi mode", .tags(.fast)) func imageCommandWithMultiMode() throws { // Test multi capture mode let command = try ImageCommand.parse([ "--mode", "multi", ]) #expect(command.mode == .multi) } @Test("Command with screen index", .tags(.fast)) func imageCommandWithScreenIndex() throws { // Test screen index specification let command = try ImageCommand.parse([ "--screen-index", "1", ]) #expect(command.screenIndex == 1) } @Test("Command with analyze option", .tags(.fast)) func imageCommandWithAnalyze() throws { // Test analyze option parsing let command = try ImageCommand.parse([ "--analyze", "What is shown in this image?", ]) #expect(command.analyze == "What is shown in this image?") } @Test("Command with analyze and app", .tags(.fast)) func imageCommandWithAnalyzeAndApp() throws { // Test analyze with app specification let command = try ImageCommand.parse([ "--app", "Safari", "--analyze", "Summarize this webpage", ]) #expect(command.app == "Safari") #expect(command.analyze == "Summarize this webpage") } @Test("Command with analyze and mode", .tags(.fast)) func imageCommandWithAnalyzeAndMode() throws { // Test analyze with different capture modes let command = try ImageCommand.parse([ "--mode", "frontmost", "--analyze", "What errors are shown?", ]) #expect(command.mode == .frontmost) #expect(command.analyze == "What errors are shown?") } @Test("Command with analyze and JSON output", .tags(.fast)) func imageCommandWithAnalyzeAndJSON() throws { // Test analyze with JSON output let command = try ImageCommand.parse([ "--analyze", "Describe the UI", "--json-output", ]) #expect(command.analyze == "Describe the UI") #expect(command.jsonOutput == true) } @Test("Command with retina flag", .tags(.fast)) func imageCommandWithRetinaFlag() throws { let command = try ImageCommand.parse(["--retina"]) #expect(command.retina == true) } // MARK: - Parameterized Command Tests @Test( "Various command combinations", arguments: [ (args: ["--mode", "screen", "--format", "png"], mode: CaptureMode.screen, format: ImageFormat.png), (args: ["--mode", "window", "--format", "jpg"], mode: CaptureMode.window, format: ImageFormat.jpg), (args: ["--mode", "multi", "--json-output"], mode: CaptureMode.multi, format: ImageFormat.png), ] ) func commandCombinations(args: [String], mode: CaptureMode, format: ImageFormat) throws { let command = try ImageCommand.parse(args) #expect(command.mode == mode) #expect(command.format == format) } @Test( "Analyze option with different modes", arguments: [ ( args: ["--mode", "screen", "--analyze", "What is on screen?"], mode: CaptureMode.screen, prompt: "What is on screen?" ), ( args: ["--mode", "window", "--analyze", "Describe this window"], mode: CaptureMode.window, prompt: "Describe this window" ), ( args: ["--mode", "multi", "--analyze", "Compare windows"], mode: CaptureMode.multi, prompt: "Compare windows" ), ( args: ["--mode", "frontmost", "--analyze", "What app is this?"], mode: CaptureMode.frontmost, prompt: "What app is this?" ), ] ) func analyzeWithDifferentModes(args: [String], mode: CaptureMode, prompt: String) throws { let command = try ImageCommand.parse(args) #expect(command.mode == mode) #expect(command.analyze == prompt) } @Test( "Invalid arguments throw errors", arguments: [ ["--mode", "invalid"], ["--format", "bmp"], ["--capture-focus", "neither"], ["--screen-index", "abc"], ] ) func invalidArguments(args: [String]) { #expect(throws: (any Error).self) { try CLIOutputCapture.suppressStderr { _ = try ImageCommand.parse(args) } } } // MARK: - Model Tests @Test("SavedFile model creation", .tags(.fast)) func savedFileModel() { let savedFile = SavedFile( path: "/tmp/screenshot.png", item_label: "Screen 1", window_title: nil, window_id: nil, window_index: nil, mime_type: "image/png" ) #expect(savedFile.path == "/tmp/screenshot.png") #expect(savedFile.item_label == "Screen 1") #expect(savedFile.mime_type == "image/png") } @Test("ImageCaptureData encoding", .tags(.fast)) func imageCaptureDataEncoding() throws { let savedFile = SavedFile( path: "/tmp/test.png", item_label: "Test", window_title: nil, window_id: nil, window_index: nil, mime_type: "image/png" ) let captureData = ImageCaptureData(saved_files: [savedFile]) // Test JSON encoding let encoder = JSONEncoder() // Properties are already in snake_case, no conversion needed let data = try encoder.encode(captureData) #expect(!data.isEmpty) // Test decoding let decoder = JSONDecoder() // Properties are already in snake_case, no conversion needed let decoded = try decoder.decode(ImageCaptureData.self, from: data) #expect(decoded.saved_files.count == 1) #expect(decoded.saved_files[0].path == "/tmp/test.png") } // MARK: - Enum Raw Value Tests @Test("CaptureMode raw values", .tags(.fast)) func captureModeRawValues() { #expect(CaptureMode.screen.rawValue == "screen") #expect(CaptureMode.window.rawValue == "window") #expect(CaptureMode.multi.rawValue == "multi") } @Test("ImageFormat raw values", .tags(.fast)) func imageFormatRawValues() { #expect(ImageFormat.png.rawValue == "png") #expect(ImageFormat.jpg.rawValue == "jpg") } @Test("CaptureFocus raw values", .tags(.fast)) func captureFocusRawValues() { #expect(CaptureFocus.background.rawValue == "background") #expect(CaptureFocus.foreground.rawValue == "foreground") } // MARK: - Mode Determination & Logic Tests @Test("Mode determination logic", .tags(.fast)) func modeDeterminationLogic() throws { // No mode, no app -> should default to screen let screenCommand = try ImageCommand.parse([]) #expect(screenCommand.mode == nil) #expect(screenCommand.app == nil) // No mode, with app -> should infer window mode in actual execution let windowCommand = try ImageCommand.parse(["--app", "Finder"]) #expect(windowCommand.mode == nil) #expect(windowCommand.app == "Finder") // Explicit mode should be preserved let explicitCommand = try ImageCommand.parse(["--mode", "multi"]) #expect(explicitCommand.mode == .multi) } @Test("Default values verification", .tags(.fast)) func defaultValues() throws { let command = try ImageCommand.parse([]) #expect(command.mode == nil) #expect(command.format == .png) #expect(command.path == nil) #expect(command.app == nil) #expect(command.windowTitle == nil) #expect(command.windowIndex == nil) #expect(command.screenIndex == nil) #expect(command.captureFocus == .auto) #expect(command.jsonOutput == false) #expect(command.analyze == nil) } @Test( "Screen index boundary values", arguments: [0, 1, 99, 9999] ) func screenIndexBoundaries(index: Int) throws { let command = try ImageCommand.parse(["--screen-index", String(index)]) #expect(command.screenIndex == index) } @Test( "Window index boundary values", arguments: [0, 1, 10, 9999] ) func windowIndexBoundaries(index: Int) throws { let command = try ImageCommand.parse(["--window-index", String(index)]) #expect(command.windowIndex == index) } @Test("Error handling for invalid combinations", .tags(.fast)) func invalidCombinations() { // Window capture without app should fail in execution // This tests the parsing, execution would fail later do { let command = try ImageCommand.parse(["--mode", "window"]) #expect(command.mode == .window) #expect(command.app == nil) // This would cause execution error } catch { Issue.record("Parsing should succeed even with invalid combinations") } } // MARK: - Window Selection Tests @Test("Prefers the first renderable main window when overlays exist", .tags(.imageCapture)) func imageCommandPrefersRenderableWindow() async throws { let appName = "iTerm2" let overlay = ServiceWindowInfo( windowID: 10, title: "Command Palette", bounds: CGRect(x: 0, y: 0, width: 80, height: 80), isMinimized: false, isMainWindow: false, windowLevel: 0, alpha: 1.0, index: 0 ) let terminal = ServiceWindowInfo( windowID: 11, title: "zsh — main", bounds: CGRect(x: 50, y: 50, width: 1280, height: 720), isMinimized: false, isMainWindow: true, windowLevel: 0, alpha: 1.0, index: 1 ) let windows = [overlay, terminal] let appInfo = ServiceApplicationInfo( processIdentifier: 4242, bundleIdentifier: "com.googlecode.iterm2", name: appName, windowCount: windows.count ) let captureResult = Self.makeCaptureResult(app: appInfo, window: terminal) let captureService = StubScreenCaptureService(permissionGranted: true) var recordedWindowIndex: Int? captureService.captureWindowHandler = { identifier, index, _ in #expect(identifier == appName) recordedWindowIndex = index return captureResult } let applications = StubApplicationService(applications: [appInfo], windowsByApp: [appName: windows]) let windowService = StubWindowService(windowsByApp: [appName: windows]) let services = TestServicesFactory.makePeekabooServices( applications: applications, windows: windowService, screenCapture: captureService ) let outputPath = Self.makeTempCapturePath("iterm.png") var command = try ImageCommand.parse(["--app", appName, "--path", outputPath]) command.captureFocus = .background let runtime = CommandRuntime( configuration: .init(verbose: false, jsonOutput: true, logLevel: nil), services: services ) try await command.run(using: runtime) let index = try #require(recordedWindowIndex) #expect(index == terminal.index) try? FileManager.default.removeItem(atPath: outputPath) } @Test("Honors --window-title when selecting a window", .tags(.imageCapture)) func imageCommandMatchesWindowTitle() async throws { let appName = "LogsApp" let inspector = ServiceWindowInfo( windowID: 20, title: "Inspector", bounds: CGRect(x: 0, y: 0, width: 640, height: 480), isMinimized: false, isMainWindow: false, windowLevel: 0, alpha: 1.0, index: 0 ) let logs = ServiceWindowInfo( windowID: 21, title: "Server Logs", bounds: CGRect(x: 100, y: 80, width: 1024, height: 768), isMinimized: false, isMainWindow: true, windowLevel: 0, alpha: 1.0, index: 1 ) let windows = [inspector, logs] let appInfo = ServiceApplicationInfo( processIdentifier: 5252, bundleIdentifier: "dev.logs.app", name: appName, windowCount: windows.count ) let captureResult = Self.makeCaptureResult(app: appInfo, window: logs) let captureService = StubScreenCaptureService(permissionGranted: true) var recordedWindowIndex: Int? captureService.captureWindowHandler = { _, index, _ in recordedWindowIndex = index return captureResult } let applications = StubApplicationService(applications: [appInfo], windowsByApp: [appName: windows]) let windowService = StubWindowService(windowsByApp: [appName: windows]) let services = TestServicesFactory.makePeekabooServices( applications: applications, windows: windowService, screenCapture: captureService ) let outputPath = Self.makeTempCapturePath("logs.png") var command = try ImageCommand.parse([ "--app", appName, "--window-title", "Logs", "--path", outputPath, ]) command.captureFocus = .background let runtime = CommandRuntime( configuration: .init(verbose: false, jsonOutput: true, logLevel: nil), services: services ) try await command.run(using: runtime) let index = try #require(recordedWindowIndex) #expect(index == logs.index) try? FileManager.default.removeItem(atPath: outputPath) } @Test("Throws when --window-title does not match any window", .tags(.imageCapture)) func imageCommandThrowsWhenWindowTitleMissing() async throws { let appName = "Notes" let notesWindow = ServiceWindowInfo( windowID: 31, title: "All Notes", bounds: CGRect(x: 0, y: 0, width: 900, height: 600), isMinimized: false, isMainWindow: true, windowLevel: 0, alpha: 1.0, index: 0 ) let appInfo = ServiceApplicationInfo( processIdentifier: 6060, bundleIdentifier: "com.example.notes", name: appName, windowCount: 1 ) let captureService = StubScreenCaptureService(permissionGranted: true) let applications = StubApplicationService(applications: [appInfo], windowsByApp: [appName: [notesWindow]]) let windowService = StubWindowService(windowsByApp: [appName: [notesWindow]]) let services = TestServicesFactory.makePeekabooServices( applications: applications, windows: windowService, screenCapture: captureService ) let result = try await InProcessCommandRunner.run( [ "image", "--app", appName, "--window-title", "Nonexistent", "--capture-focus", "background", "--path", Self.makeTempCapturePath("notes.png"), "--json-output", ], services: services ) #expect(result.exitStatus == 1) let response = try JSONDecoder().decode( JSONResponse.self, from: Data(result.combinedOutput.utf8) ) #expect(response.success == false) #expect(response.error?.code == ErrorCode.WINDOW_NOT_FOUND.rawValue) } @Test("Defaults to 1x logical scale", .tags(.imageCapture)) func imageCommandUsesLogicalScaleByDefault() async throws { let screens = [Self.makeScreenInfo(scale: 2.0)] let captureResult = Self.makeScreenCaptureResult(size: CGSize(width: 1200, height: 800), scale: 1.0) let captureService = StubScreenCaptureService(permissionGranted: true) var recordedScale: CaptureScalePreference? captureService.captureScreenHandler = { _, scale in recordedScale = scale return captureResult } let services = TestServicesFactory.makePeekabooServices( screens: screens, screenCapture: captureService ) var command = try ImageCommand.parse([ "--mode", "screen", "--path", Self.makeTempCapturePath("logical.png"), "--json-output", ]) let runtime = CommandRuntime( configuration: .init(verbose: false, jsonOutput: true, logLevel: nil), services: services ) try await command.run(using: runtime) #expect(recordedScale == .logical1x) } @Test("Retina flag opts into native scale", .tags(.imageCapture)) func imageCommandHonorsRetinaFlag() async throws { let screens = [Self.makeScreenInfo(scale: 2.0)] let captureResult = Self.makeScreenCaptureResult(size: CGSize(width: 2400, height: 1600), scale: 2.0) let captureService = StubScreenCaptureService(permissionGranted: true) var recordedScale: CaptureScalePreference? captureService.captureScreenHandler = { _, scale in recordedScale = scale return captureResult } let services = TestServicesFactory.makePeekabooServices( screens: screens, screenCapture: captureService ) var command = try ImageCommand.parse([ "--mode", "screen", "--retina", "--path", Self.makeTempCapturePath("retina.png"), "--json-output", ]) let runtime = CommandRuntime( configuration: .init(verbose: false, jsonOutput: true, logLevel: nil), services: services ) try await command.run(using: runtime) #expect(recordedScale == .native) } @Test("Skips windows marked non-shareable", .tags(.imageCapture)) func imageCommandSkipsNonShareableWindows() async throws { let appName = "Console" let hidden = ServiceWindowInfo( windowID: 90, title: "Console Overlay", bounds: CGRect(x: 0, y: 0, width: 400, height: 200), index: 0, sharingState: .some(.none) ) let visible = ServiceWindowInfo( windowID: 91, title: "Logs", bounds: CGRect(x: 40, y: 40, width: 1400, height: 900), index: 1, sharingState: .readWrite ) let appInfo = ServiceApplicationInfo( processIdentifier: 7070, bundleIdentifier: "dev.console", name: appName, windowCount: 2 ) let captureResult = Self.makeCaptureResult(app: appInfo, window: visible) let captureService = StubScreenCaptureService(permissionGranted: true) var recordedWindowIndex: Int? captureService.captureWindowHandler = { _, index, _ in recordedWindowIndex = index return captureResult } let services = TestServicesFactory.makePeekabooServices( applications: StubApplicationService(applications: [appInfo], windowsByApp: [appName: [hidden, visible]]), windows: StubWindowService(windowsByApp: [appName: [hidden, visible]]), screenCapture: captureService ) let path = Self.makeTempCapturePath("console.png") var command = try ImageCommand.parse(["--app", appName, "--path", path]) command.captureFocus = .background let runtime = CommandRuntime( configuration: .init(verbose: false, jsonOutput: true, logLevel: nil), services: services ) try await command.run(using: runtime) let index = try #require(recordedWindowIndex) #expect(index == visible.index) try? FileManager.default.removeItem(atPath: path) } @Test("Errors when only hidden windows remain", .tags(.imageCapture)) func imageCommandFailsWhenAllWindowsHidden() async throws { let appName = "OverlayApp" let hidden = ServiceWindowInfo( windowID: 101, title: "Overlay", bounds: CGRect(x: 0, y: 0, width: 500, height: 300), sharingState: .some(.none) ) let appInfo = ServiceApplicationInfo( processIdentifier: 9090, bundleIdentifier: "dev.overlay", name: appName, windowCount: 1 ) let captureService = StubScreenCaptureService(permissionGranted: true) let services = TestServicesFactory.makePeekabooServices( applications: StubApplicationService(applications: [appInfo], windowsByApp: [appName: [hidden]]), windows: StubWindowService(windowsByApp: [appName: [hidden]]), screenCapture: captureService ) let result = try await InProcessCommandRunner.run( [ "image", "--app", appName, "--capture-focus", "background", "--path", Self.makeTempCapturePath("overlay.png"), "--json-output", ], services: services ) #expect(result.exitStatus == 1) let response = try JSONDecoder().decode( JSONResponse.self, from: Data(result.combinedOutput.utf8) ) #expect(response.success == false) #expect(response.error?.code == ErrorCode.WINDOW_NOT_FOUND.rawValue) } private static func makeTempCapturePath(_ suffix: String) -> String { FileManager.default .temporaryDirectory .appendingPathComponent("image-command-tests-\(UUID().uuidString)-\(suffix)") .path } private static func makeCaptureResult( app: ServiceApplicationInfo, window: ServiceWindowInfo ) -> CaptureResult { let metadata = CaptureMetadata( size: window.bounds.size, mode: .window, applicationInfo: app, windowInfo: window ) return CaptureResult( imageData: Data(repeating: 0xAB, count: 32), metadata: metadata ) } private static func makeScreenInfo(scale: CGFloat) -> ScreenInfo { ScreenInfo( index: 0, name: "Retina", frame: CGRect(x: 0, y: 0, width: 1200, height: 800), visibleFrame: CGRect(x: 0, y: 0, width: 1200, height: 800), isPrimary: true, scaleFactor: scale, displayID: 1 ) } private static func makeScreenCaptureResult(size: CGSize, scale: CGFloat) -> CaptureResult { let metadata = CaptureMetadata( size: size, mode: .screen, displayInfo: DisplayInfo( index: 0, name: "Retina", bounds: CGRect(origin: .zero, size: CGSize(width: size.width / scale, height: size.height / scale)), scaleFactor: scale ) ) return CaptureResult( imageData: Data(repeating: 0xCD, count: 16), metadata: metadata ) } } #endif

Latest Blog Posts

The 50MB Markdown Files That Broke Our Server
By punkpeye on December 3, 2025.
react
react-router
node-js
OpenTelemetry for Model Context Protocol (MCP) Analytics and Agent Observability
By Om-Shree-0709 on November 29, 2025.
observability
mcp
opentelemetry
Securing Enterprise AI Agents with Unique Identities in the Model Context Protocol (MCP)
By Om-Shree-0709 on November 27, 2025.

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/steipete/Peekaboo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server