Skip to main content
Glama

Peekaboo MCP

by steipete
SeeCommand.swiftβ€’39.2 kB
import AppKit import ArgumentParser import AXorcist import CoreGraphics import Foundation import PeekabooCore import PeekabooFoundation import ScreenCaptureKit /// Capture a screenshot and build an interactive UI map @available(macOS 14.0, *) struct SeeCommand: AsyncParsableCommand, VerboseCommand, ErrorHandlingCommand, OutputFormattable, ApplicationResolvable { static let configuration = VisionToolDefinitions.see.commandConfiguration @Option(help: "Application name to capture, or special values: 'menubar', 'frontmost'") var app: String? @Option(name: .long, help: "Target application by process ID") var pid: Int32? @Option(help: "Specific window title to capture") var windowTitle: String? @Option(help: "Capture mode (screen, window, frontmost)") var mode: CaptureMode? @Option(help: "Output path for screenshot") var path: String? @Option( name: .long, help: "Specific screen index to capture (0-based). If not specified, captures all screens when in screen mode" ) var screenIndex: Int? @Flag(help: "Generate annotated screenshot with interaction markers") var annotate = false @Option(help: "Analyze captured content with AI") var analyze: String? @Flag(help: "Output in JSON format") var jsonOutput = false @Flag(name: .shortAndLong, help: "Enable verbose logging for detailed output") var verbose = false enum CaptureMode: String, ExpressibleByArgument { case screen case window case frontmost } @MainActor mutating func run() async throws { let startTime = Date() configureVerboseLogging() Logger.shared.setJsonOutputMode(self.jsonOutput) if self.jsonOutput && !self.verbose { // Ensure timing/operation logs appear in JSON debug_logs Logger.shared.setVerboseMode(true) } Logger.shared.operationStart("see_command", metadata: [ "app": self.app ?? "none", "mode": self.mode?.rawValue ?? "auto", "annotate": self.annotate, "hasAnalyzePrompt": self.analyze != nil, ]) do { // Check permissions Logger.shared.verbose("Checking screen recording permissions", category: "Permissions") try await requireScreenRecordingPermission() Logger.shared.verbose("Screen recording permission granted", category: "Permissions") // Perform capture and element detection Logger.shared.verbose("Starting capture and detection phase", category: "Capture") let captureResult = try await performCaptureWithDetection() Logger.shared.verbose("Capture completed successfully", category: "Capture", metadata: [ "sessionId": captureResult.sessionId, "elementCount": captureResult.elements.all.count, "screenshotSize": self.getFileSize(captureResult.screenshotPath) ?? 0, ]) // Generate annotated screenshot if requested var annotatedPath: String? if self.annotate { Logger.shared.operationStart("generate_annotations") annotatedPath = try await self.generateAnnotatedScreenshot( sessionId: captureResult.sessionId, originalPath: captureResult.screenshotPath ) Logger.shared.operationComplete("generate_annotations", metadata: [ "annotatedPath": annotatedPath ?? "none", ]) } // Perform AI analysis if requested var analysisResult: SeeAnalysisData? if let prompt = analyze { // Pre-analysis diagnostics let fileSize = (try? FileManager.default.attributesOfItem(atPath: captureResult.screenshotPath)[.size] as? Int) ?? 0 Logger.shared.verbose( "Starting AI analysis", category: "AI", metadata: [ "imagePath": captureResult.screenshotPath, "imageSizeBytes": fileSize, "promptLength": prompt.count ] ) Logger.shared.operationStart("ai_analysis", metadata: ["promptPreview": String(prompt.prefix(80))]) Logger.shared.startTimer("ai_generate") analysisResult = try await self.performAnalysisDetailed( imagePath: captureResult.screenshotPath, prompt: prompt ) Logger.shared.stopTimer("ai_generate") Logger.shared.operationComplete( "ai_analysis", success: analysisResult != nil, metadata: [ "provider": analysisResult?.provider ?? "unknown", "model": analysisResult?.model ?? "unknown" ] ) } // Output results let executionTime = Date().timeIntervalSince(startTime) Logger.shared.operationComplete("see_command", metadata: [ "executionTimeMs": Int(executionTime * 1000), "success": true, ]) if self.jsonOutput { await self.outputJSONResults( sessionId: captureResult.sessionId, screenshotPath: captureResult.screenshotPath, annotatedPath: annotatedPath, metadata: captureResult.metadata, elements: captureResult.elements, analysis: analysisResult, executionTime: executionTime ) } else { await self.outputTextResults( sessionId: captureResult.sessionId, screenshotPath: captureResult.screenshotPath, annotatedPath: annotatedPath, metadata: captureResult.metadata, elements: captureResult.elements, analysis: analysisResult, executionTime: executionTime ) } } catch { Logger.shared.operationComplete("see_command", success: false, metadata: [ "error": error.localizedDescription, ]) self.handleError(error) // Use protocol's error handling throw ExitCode.failure } } private func getFileSize(_ path: String) -> Int? { try? FileManager.default.attributesOfItem(atPath: path)[.size] as? Int } private func performCaptureWithDetection() async throws -> CaptureAndDetectionResult { // Handle special app cases let captureResult: CaptureResult if let appName = self.app?.lowercased() { switch appName { case "menubar": Logger.shared.verbose("Capturing menu bar area", category: "Capture") captureResult = try await self.captureMenuBar() case "frontmost": Logger.shared.verbose("Capturing frontmost window (via --app frontmost)", category: "Capture") captureResult = try await PeekabooServices.shared.screenCapture.captureFrontmost() default: // Use normal capture logic captureResult = try await self.performStandardCapture() } } else { // Use normal capture logic captureResult = try await self.performStandardCapture() } // Save screenshot Logger.shared.startTimer("file_write") let outputPath = try saveScreenshot(captureResult.imageData) Logger.shared.stopTimer("file_write") // Create window context from capture metadata let windowContext = WindowContext( applicationName: captureResult.metadata.applicationInfo?.name, windowTitle: captureResult.metadata.windowInfo?.title, windowBounds: captureResult.metadata.windowInfo?.bounds ) // Detect UI elements with window context Logger.shared.operationStart("element_detection") let detectionResult = try await PeekabooServices.shared.automation.detectElements( in: captureResult.imageData, sessionId: nil, windowContext: windowContext ) Logger.shared.operationComplete("element_detection") // Update the result with the correct screenshot path let resultWithPath = ElementDetectionResult( sessionId: detectionResult.sessionId, screenshotPath: outputPath, elements: detectionResult.elements, metadata: detectionResult.metadata ) // Store the result in session try await PeekabooServices.shared.sessions.storeDetectionResult( sessionId: detectionResult.sessionId, result: resultWithPath ) return CaptureAndDetectionResult( sessionId: detectionResult.sessionId, screenshotPath: outputPath, elements: detectionResult.elements, metadata: detectionResult.metadata ) } private func performStandardCapture() async throws -> CaptureResult { let effectiveMode = self.determineMode() Logger.shared.verbose( "Determined capture mode", category: "Capture", metadata: ["mode": effectiveMode.rawValue] ) Logger.shared.operationStart("capture_phase", metadata: ["mode": effectiveMode.rawValue]) switch effectiveMode { case .screen: // Handle screen capture with multi-screen support let result = try await self.performScreenCapture() Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue]) return result case .window: if self.app != nil || self.pid != nil { let appIdentifier = try self.resolveApplicationIdentifier() Logger.shared.verbose("Initiating window capture", category: "Capture", metadata: [ "app": appIdentifier, "windowTitle": self.windowTitle ?? "any", ]) // Find specific window if title is provided if let title = windowTitle { Logger.shared.verbose( "Searching for window with title", category: "WindowSearch", metadata: ["title": title] ) let windowsOutput = try await PeekabooServices.shared.applications.listWindows( for: appIdentifier, timeout: nil ) Logger.shared.verbose( "Found windows", category: "WindowSearch", metadata: ["count": windowsOutput.data.windows.count] ) if let windowIndex = windowsOutput.data.windows.firstIndex(where: { $0.title.contains(title) }) { Logger.shared.verbose( "Window found at index", category: "WindowSearch", metadata: ["index": windowIndex] ) Logger.shared.startTimer("window_capture") let result = try await PeekabooServices.shared.screenCapture.captureWindow( appIdentifier: appIdentifier, windowIndex: windowIndex ) Logger.shared.stopTimer("window_capture") Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue]) return result } else { Logger.shared.error( "Window not found with title", category: "WindowSearch", metadata: ["title": title] ) throw CaptureError.windowNotFound } } else { let result = try await PeekabooServices.shared.screenCapture.captureWindow( appIdentifier: appIdentifier, windowIndex: nil ) Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue]) return result } } else { throw ValidationError("--app or --pid is required for window mode") } case .frontmost: Logger.shared.verbose("Capturing frontmost window") let result = try await PeekabooServices.shared.screenCapture.captureFrontmost() Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue]) return result } } private func captureMenuBar() async throws -> CaptureResult { // Get the main screen bounds guard let mainScreen = NSScreen.main else { throw PeekabooError.captureFailed("No main screen found") } // Menu bar is at the top of the screen let menuBarHeight: CGFloat = 24.0 // Standard macOS menu bar height let menuBarRect = CGRect( x: mainScreen.frame.origin.x, y: mainScreen.frame.origin.y + mainScreen.frame.height - menuBarHeight, width: mainScreen.frame.width, height: menuBarHeight ) // Capture the menu bar area return try await PeekabooServices.shared.screenCapture.captureArea(menuBarRect) } private func saveScreenshot(_ imageData: Data) throws -> String { let outputPath: String if let providedPath = path { outputPath = NSString(string: providedPath).expandingTildeInPath } else { let timestamp = Date().timeIntervalSince1970 let filename = "peekaboo_see_\(Int(timestamp)).png" let defaultPath = ConfigurationManager.shared.getDefaultSavePath(cliValue: nil) outputPath = (defaultPath as NSString).appendingPathComponent(filename) } // Create directory if needed let directory = (outputPath as NSString).deletingLastPathComponent try FileManager.default.createDirectory( atPath: directory, withIntermediateDirectories: true ) // Save the image try imageData.write(to: URL(fileURLWithPath: outputPath)) Logger.shared.verbose("Saved screenshot to: \(outputPath)") return outputPath } private func generateAnnotatedScreenshot( sessionId: String, originalPath: String ) async throws -> String { // Get detection result from session guard let detectionResult = try await PeekabooServices.shared.sessions.getDetectionResult(sessionId: sessionId) else { Logger.shared.info("No detection result found for session") return originalPath } // Create annotated image let annotatedPath = (originalPath as NSString).deletingPathExtension + "_annotated.png" // Load original image guard let nsImage = NSImage(contentsOfFile: originalPath) else { throw CaptureError.fileIOError("Failed to load image from \(originalPath)") } // Get image size let imageSize = nsImage.size // Create bitmap context guard let bitmapRep = NSBitmapImageRep( bitmapDataPlanes: nil, pixelsWide: Int(imageSize.width), pixelsHigh: Int(imageSize.height), bitsPerSample: 8, samplesPerPixel: 4, hasAlpha: true, isPlanar: false, colorSpaceName: .calibratedRGB, bytesPerRow: 0, bitsPerPixel: 0 ) else { throw CaptureError.captureFailure("Failed to create bitmap representation") } // Draw into context NSGraphicsContext.saveGraphicsState() guard let context = NSGraphicsContext(bitmapImageRep: bitmapRep) else { Logger.shared.error("Failed to create graphics context") throw CaptureError.captureFailure("Failed to create graphics context") } NSGraphicsContext.current = context Logger.shared.verbose("Graphics context created successfully") // Draw original image nsImage.draw(in: NSRect(origin: .zero, size: imageSize)) Logger.shared.verbose("Original image drawn") // Configure text attributes - smaller font for less occlusion let fontSize: CGFloat = 8 let textAttributes: [NSAttributedString.Key: Any] = [ .font: NSFont.systemFont(ofSize: fontSize, weight: .semibold), .foregroundColor: NSColor.white, ] // Role-based colors from spec let roleColors: [ElementType: NSColor] = [ .button: NSColor(red: 0, green: 0.48, blue: 1.0, alpha: 1.0), // #007AFF .textField: NSColor(red: 0.204, green: 0.78, blue: 0.349, alpha: 1.0), // #34C759 .link: NSColor(red: 0, green: 0.48, blue: 1.0, alpha: 1.0), // #007AFF .checkbox: NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0), // #8E8E93 .slider: NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0), // #8E8E93 .menu: NSColor(red: 0, green: 0.48, blue: 1.0, alpha: 1.0), // #007AFF ] // Draw UI elements let enabledElements = detectionResult.elements.all.filter(\.isEnabled) if enabledElements.isEmpty { Logger.shared.info("No enabled elements to annotate. Total elements: \(detectionResult.elements.all.count)") print("⚠️ No interactive UI elements found to annotate") return originalPath // Return original image if no elements to annotate } Logger.shared.info( "Annotating \(enabledElements.count) enabled elements out of \(detectionResult.elements.all.count) total" ) Logger.shared.verbose("Image size: \(imageSize)") // Calculate window origin from element bounds if we have elements var windowOrigin = CGPoint.zero if !detectionResult.elements.all.isEmpty { // Find the leftmost and topmost element to estimate window origin let minX = detectionResult.elements.all.map(\.bounds.minX).min() ?? 0 let minY = detectionResult.elements.all.map(\.bounds.minY).min() ?? 0 windowOrigin = CGPoint(x: minX, y: minY) Logger.shared.verbose("Estimated window origin from elements: \(windowOrigin)") } // Convert all element bounds to window-relative coordinates and flip Y var elementRects: [(element: DetectedElement, rect: NSRect)] = [] for element in enabledElements { let elementFrame = CGRect( x: element.bounds.origin.x - windowOrigin.x, y: element.bounds.origin.y - windowOrigin.y, width: element.bounds.width, height: element.bounds.height ) let rect = NSRect( x: elementFrame.origin.x, y: imageSize.height - elementFrame.origin.y - elementFrame.height, // Flip Y coordinate width: elementFrame.width, height: elementFrame.height ) elementRects.append((element: element, rect: rect)) } // Create smart label placer for intelligent label positioning let labelPlacer = SmartLabelPlacer(image: nsImage, fontSize: fontSize, debugMode: verbose) // Draw elements and calculate label positions var labelPositions: [(rect: NSRect, connection: NSPoint?, element: DetectedElement)] = [] for (element, rect) in elementRects { Logger.shared .verbose( "Drawing element: \(element.id), type: \(element.type), original bounds: \(element.bounds), window rect: \(rect)" ) // Get color for element type let color = roleColors[element.type] ?? NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0) // Draw bounding box color.withAlphaComponent(0.5).setFill() rect.fill() color.setStroke() let path = NSBezierPath(rect: rect) path.lineWidth = 2 path.stroke() // Calculate label size let idString = NSAttributedString(string: element.id, attributes: textAttributes) let textSize = idString.size() let labelPadding: CGFloat = 4 let labelSize = NSSize(width: textSize.width + labelPadding * 2, height: textSize.height + labelPadding) // Use smart label placer to find best position if let placement = labelPlacer.findBestLabelPosition( for: element, elementRect: rect, labelSize: labelSize, existingLabels: labelPositions.map { ($0.rect, $0.element) }, allElements: elementRects ) { labelPositions.append(( rect: placement.labelRect, connection: placement.connectionPoint, element: element )) } } // NOTE: Old placement code removed - now using SmartLabelPlacer // [OLD CODE REMOVED - lines 483-785 contained the old placement logic] // Draw all labels and connection lines for (labelRect, connectionPoint, element) in labelPositions { // Draw connection line if label is outside - make it more subtle if let connection = connectionPoint { NSColor.black.withAlphaComponent(0.3).setStroke() let linePath = NSBezierPath() linePath.lineWidth = 0.5 // Draw line from connection point to nearest edge of label linePath.move(to: connection) // Find the closest point on label rectangle to the connection point let closestX = max(labelRect.minX, min(connection.x, labelRect.maxX)) let closestY = max(labelRect.minY, min(connection.y, labelRect.maxY)) linePath.line(to: NSPoint(x: closestX, y: closestY)) linePath.stroke() } // Draw label background - more transparent to show content beneath NSColor.black.withAlphaComponent(0.7).setFill() NSBezierPath(roundedRect: labelRect, xRadius: 1, yRadius: 1).fill() // Draw label border (same color as element) - thinner for less occlusion let color = roleColors[element.type] ?? NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0) color.withAlphaComponent(0.8).setStroke() let borderPath = NSBezierPath(roundedRect: labelRect, xRadius: 1, yRadius: 1) borderPath.lineWidth = 0.5 borderPath.stroke() // Draw label text let idString = NSAttributedString(string: element.id, attributes: textAttributes) idString.draw(at: NSPoint(x: labelRect.origin.x + 4, y: labelRect.origin.y + 2)) } NSGraphicsContext.restoreGraphicsState() // Save annotated image guard let pngData = bitmapRep.representation(using: .png, properties: [:]) else { throw CaptureError.captureFailure("Failed to create PNG data") } try pngData.write(to: URL(fileURLWithPath: annotatedPath)) Logger.shared.verbose("Created annotated screenshot: \(annotatedPath)") // Log annotation info only in non-JSON mode if !self.jsonOutput { let interactableElements = detectionResult.elements.all.filter(\.isEnabled) print("πŸ“ Created annotated screenshot with \(interactableElements.count) interactive elements") } return annotatedPath } // [OLD CODE REMOVED - massive cleanup of duplicate placement logic] } // MARK: - Supporting Types private struct CaptureAndDetectionResult { let sessionId: String let screenshotPath: String let elements: DetectedElements let metadata: DetectionMetadata } private struct SessionPaths { let raw: String let annotated: String let map: String } // MARK: - JSON Output Structure (matching original) struct UIElementSummary: Codable { let id: String let role: String let title: String? let label: String? let identifier: String? let is_actionable: Bool let keyboard_shortcut: String? } struct SeeAnalysisData: Codable { let provider: String let model: String let text: String } struct SeeResult: Codable { let session_id: String let screenshot_raw: String let screenshot_annotated: String let ui_map: String let application_name: String? let window_title: String? let is_dialog: Bool let element_count: Int let interactable_count: Int let capture_mode: String let analysis: SeeAnalysisData? let execution_time: TimeInterval let ui_elements: [UIElementSummary] let menu_bar: MenuBarSummary? var success: Bool = true } struct MenuBarSummary: Codable { let menus: [MenuSummary] struct MenuSummary: Codable { let title: String let item_count: Int let enabled: Bool let items: [MenuItemSummary] } struct MenuItemSummary: Codable { let title: String let enabled: Bool let keyboard_shortcut: String? } } // MARK: - Format Helpers Extension extension SeeCommand { private func performAnalysisDetailed(imagePath: String, prompt: String) async throws -> SeeAnalysisData { // Use PeekabooCore AI service which is configured via ConfigurationManager/Tachikoma let ai = await PeekabooAIService() let res = try await ai.analyzeImageFileDetailed(at: imagePath, question: prompt, model: nil) return SeeAnalysisData(provider: res.provider, model: res.model, text: res.text) } private func determineMode() -> CaptureMode { if let mode = self.mode { mode } else if self.app != nil || self.windowTitle != nil { // If app or window title is specified, default to window mode .window } else { // Otherwise default to frontmost .frontmost } } // MARK: - Output Methods @MainActor private func outputJSONResults( sessionId: String, screenshotPath: String, annotatedPath: String?, metadata: DetectionMetadata, elements: DetectedElements, analysis: SeeAnalysisData?, executionTime: TimeInterval ) async { // Build UI element summaries let uiElements: [UIElementSummary] = elements.all.map { element in UIElementSummary( id: element.id, role: element.type.rawValue, title: element.attributes["title"], label: element.label, identifier: element.attributes["identifier"], is_actionable: element.isEnabled, keyboard_shortcut: element.attributes["keyboardShortcut"] ) } // Build session paths let sessionPaths = SessionPaths( raw: screenshotPath, annotated: annotatedPath ?? screenshotPath, map: PeekabooServices.shared.sessions.getSessionStoragePath() + "/\(sessionId)/map.json" ) // Structured analysis is passed in let output = await SeeResult( session_id: sessionId, screenshot_raw: sessionPaths.raw, screenshot_annotated: sessionPaths.annotated, ui_map: sessionPaths.map, application_name: metadata.windowContext?.applicationName, window_title: metadata.windowContext?.windowTitle, is_dialog: metadata.isDialog, element_count: metadata.elementCount, interactable_count: elements.all.count { $0.isEnabled }, capture_mode: self.determineMode().rawValue, analysis: analysis, execution_time: executionTime, ui_elements: uiElements, menu_bar: self.getMenuBarItemsSummary() ) outputSuccessCodable(data: output) } @MainActor private func getMenuBarItemsSummary() async -> MenuBarSummary { // Get menu bar items from service var menuExtras: [MenuExtraInfo] = [] do { menuExtras = try await PeekabooServices.shared.menu.listMenuExtras() } catch { // If there's an error, just return empty array menuExtras = [] } // Group items into menu categories // For now, we'll create a simplified view showing each menu bar item as a "menu" let menus = menuExtras.map { extra in MenuBarSummary.MenuSummary( title: extra.title, item_count: 1, // Each menu bar item is treated as a single menu enabled: true, items: [ MenuBarSummary.MenuItemSummary( title: extra.title, enabled: true, keyboard_shortcut: nil ) ] ) } return MenuBarSummary(menus: menus) } @MainActor private func outputTextResults( sessionId: String, screenshotPath: String, annotatedPath: String?, metadata: DetectionMetadata, elements: DetectedElements, analysis: SeeAnalysisData?, executionTime: TimeInterval ) async { let sessionPaths = SessionPaths( raw: screenshotPath, annotated: annotatedPath ?? screenshotPath, map: PeekabooServices.shared.sessions.getSessionStoragePath() + "/\(sessionId)/map.json" ) let interactableCount = elements.all.count { $0.isEnabled } print("βœ… Screenshot captured successfully") print("πŸ“ Session ID: \(sessionId)") print("πŸ–Ό Raw screenshot: \(sessionPaths.raw)") if let annotated = annotatedPath { print("🎯 Annotated: \(annotated)") } print("πŸ—ΊοΈ UI map: \(sessionPaths.map)") print("πŸ” Found \(metadata.elementCount) UI elements (\(interactableCount) interactive)") if let app = metadata.windowContext?.applicationName { print("πŸ“± Application: \(app)") } if let window = metadata.windowContext?.windowTitle { let windowType = metadata.isDialog ? "Dialog" : "Window" let icon = metadata.isDialog ? "πŸ—¨οΈ" : "πŸͺŸ" print("\(icon) \(windowType): \(window)") } // Show menu bar items // Get menu bar items from service let menuExtras: [MenuExtraInfo] do { menuExtras = try await PeekabooServices.shared.menu.listMenuExtras() } catch { // If there's an error, just return empty array menuExtras = [] } if !menuExtras.isEmpty { print("πŸ“Š Menu Bar Items: \(menuExtras.count)") for item in menuExtras.prefix(10) { // Show first 10 print(" β€’ \(item.title)") } if menuExtras.count > 10 { print(" ... and \(menuExtras.count - 10) more") } } if let analysis = analysis { print("πŸ€– Analysis (") print("\(analysis.provider)/\(analysis.model)):") print(analysis.text) } print("⏱️ Completed in \(String(format: "%.2f", executionTime))s") } } // MARK: - Multi-Screen Support extension SeeCommand { private func performScreenCapture() async throws -> CaptureResult { // Log warning if annotation was requested for full screen captures if self.annotate { Logger.shared.info("Annotation is disabled for full screen captures due to performance constraints") } Logger.shared.verbose("Initiating screen capture", category: "Capture") Logger.shared.startTimer("screen_capture") defer { Logger.shared.stopTimer("screen_capture") } if let index = self.screenIndex ?? (self.analyze != nil ? 0 : nil) { // Capture specific screen Logger.shared.verbose("Capturing specific screen", category: "Capture", metadata: ["screenIndex": index]) let result = try await PeekabooServices.shared.screenCapture.captureScreen(displayIndex: index) // Add display info to output if let displayInfo = result.metadata.displayInfo { let bounds = displayInfo.bounds print( "πŸ–₯️ Display \(index): \(displayInfo.name ?? "Display \(index)") (\(Int(bounds.width))Γ—\(Int(bounds.height)))" ) } Logger.shared.verbose("Screen capture completed", category: "Capture", metadata: [ "mode": "screen-index", "screenIndex": index, "imageBytes": result.imageData.count ]) return result } else { // Capture all screens Logger.shared.verbose("Capturing all screens", category: "Capture") let results = try await self.captureAllScreens() if results.isEmpty { throw CaptureError.captureFailure("Failed to capture any screens") } // Save all screenshots except the first (which will be saved by the normal flow) print("πŸ“Έ Captured \(results.count) screen(s):") for (index, result) in results.enumerated() { if index > 0 { // Save additional screenshots let screenPath: String if let basePath = self.path { // User specified a path - add screen index to filename let directory = (basePath as NSString).deletingLastPathComponent let filename = (basePath as NSString).lastPathComponent let nameWithoutExt = (filename as NSString).deletingPathExtension let ext = (filename as NSString).pathExtension screenPath = (directory as NSString) .appendingPathComponent("\(nameWithoutExt)_screen\(index).\(ext)") } else { // Default path with screen index let timestamp = ISO8601DateFormatter().string(from: Date()) screenPath = "screenshot_\(timestamp)_screen\(index).png" } // Save the screenshot try result.imageData.write(to: URL(fileURLWithPath: screenPath)) // Display info about this screen if let displayInfo = result.metadata.displayInfo { let fileSize = self.getFileSize(screenPath) ?? 0 let bounds = displayInfo.bounds print( " πŸ–₯️ Display \(index): \(displayInfo.name ?? "Display \(index)") (\(Int(bounds.width))Γ—\(Int(bounds.height))) β†’ \(screenPath) (\(self.formatFileSize(Int64(fileSize))))" ) } } else { // First screen will be saved by the normal flow, just show info if let displayInfo = result.metadata.displayInfo { let bounds = displayInfo.bounds print( " πŸ–₯️ Display \(index): \(displayInfo.name ?? "Display \(index)") (\(Int(bounds.width))Γ—\(Int(bounds.height))) β†’ (primary)" ) } } } // Return the primary screen result (first one) Logger.shared.verbose("Multi-screen capture completed", category: "Capture", metadata: [ "count": results.count, "primaryBytes": results.first?.imageData.count ?? 0 ]) return results[0] } } } // MARK: - Multi-Screen Support extension SeeCommand { private func captureAllScreens() async throws -> [CaptureResult] { var results: [CaptureResult] = [] // Get available displays from the screen capture service let content = try await SCShareableContent.current let displays = content.displays Logger.shared.info("Found \(displays.count) display(s) to capture") for (index, display) in displays.enumerated() { Logger.shared.verbose("Capturing display \(index)", category: "MultiScreen", metadata: [ "displayID": display.displayID, "width": display.width, "height": display.height ]) do { let result = try await PeekabooServices.shared.screenCapture.captureScreen(displayIndex: index) // Update path to include screen index if capturing multiple screens if displays.count > 1 { let updatedResult = self.updateCaptureResultPath(result, screenIndex: index, displayInfo: display) results.append(updatedResult) } else { results.append(result) } } catch { Logger.shared.error("Failed to capture display \(index): \(error)") // Continue capturing other screens even if one fails } } if results.isEmpty { throw CaptureError.captureFailure("Failed to capture any screens") } return results } private func updateCaptureResultPath( _ result: CaptureResult, screenIndex: Int, displayInfo: SCDisplay ) -> CaptureResult { // Since CaptureResult is immutable and doesn't have a path property, // we can't update the path. Just return the original result. // The saved path is already included in result.savedPath if it was saved. result } private func formatFileSize(_ bytes: Int64) -> String { let formatter = ByteCountFormatter() formatter.countStyle = .file return formatter.string(fromByteCount: bytes) } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/steipete/Peekaboo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server