SeeCommand.swiftβ’39.2 kB
import AppKit
import ArgumentParser
import AXorcist
import CoreGraphics
import Foundation
import PeekabooCore
import PeekabooFoundation
import ScreenCaptureKit
/// Capture a screenshot and build an interactive UI map
@available(macOS 14.0, *)
struct SeeCommand: AsyncParsableCommand, VerboseCommand, ErrorHandlingCommand, OutputFormattable,
ApplicationResolvable {
static let configuration = VisionToolDefinitions.see.commandConfiguration
@Option(help: "Application name to capture, or special values: 'menubar', 'frontmost'")
var app: String?
@Option(name: .long, help: "Target application by process ID")
var pid: Int32?
@Option(help: "Specific window title to capture")
var windowTitle: String?
@Option(help: "Capture mode (screen, window, frontmost)")
var mode: CaptureMode?
@Option(help: "Output path for screenshot")
var path: String?
@Option(
name: .long,
help: "Specific screen index to capture (0-based). If not specified, captures all screens when in screen mode"
)
var screenIndex: Int?
@Flag(help: "Generate annotated screenshot with interaction markers")
var annotate = false
@Option(help: "Analyze captured content with AI")
var analyze: String?
@Flag(help: "Output in JSON format")
var jsonOutput = false
@Flag(name: .shortAndLong, help: "Enable verbose logging for detailed output")
var verbose = false
enum CaptureMode: String, ExpressibleByArgument {
case screen
case window
case frontmost
}
@MainActor
mutating func run() async throws {
let startTime = Date()
configureVerboseLogging()
Logger.shared.setJsonOutputMode(self.jsonOutput)
if self.jsonOutput && !self.verbose {
// Ensure timing/operation logs appear in JSON debug_logs
Logger.shared.setVerboseMode(true)
}
Logger.shared.operationStart("see_command", metadata: [
"app": self.app ?? "none",
"mode": self.mode?.rawValue ?? "auto",
"annotate": self.annotate,
"hasAnalyzePrompt": self.analyze != nil,
])
do {
// Check permissions
Logger.shared.verbose("Checking screen recording permissions", category: "Permissions")
try await requireScreenRecordingPermission()
Logger.shared.verbose("Screen recording permission granted", category: "Permissions")
// Perform capture and element detection
Logger.shared.verbose("Starting capture and detection phase", category: "Capture")
let captureResult = try await performCaptureWithDetection()
Logger.shared.verbose("Capture completed successfully", category: "Capture", metadata: [
"sessionId": captureResult.sessionId,
"elementCount": captureResult.elements.all.count,
"screenshotSize": self.getFileSize(captureResult.screenshotPath) ?? 0,
])
// Generate annotated screenshot if requested
var annotatedPath: String?
if self.annotate {
Logger.shared.operationStart("generate_annotations")
annotatedPath = try await self.generateAnnotatedScreenshot(
sessionId: captureResult.sessionId,
originalPath: captureResult.screenshotPath
)
Logger.shared.operationComplete("generate_annotations", metadata: [
"annotatedPath": annotatedPath ?? "none",
])
}
// Perform AI analysis if requested
var analysisResult: SeeAnalysisData?
if let prompt = analyze {
// Pre-analysis diagnostics
let fileSize = (try? FileManager.default.attributesOfItem(atPath: captureResult.screenshotPath)[.size] as? Int) ?? 0
Logger.shared.verbose(
"Starting AI analysis",
category: "AI",
metadata: [
"imagePath": captureResult.screenshotPath,
"imageSizeBytes": fileSize,
"promptLength": prompt.count
]
)
Logger.shared.operationStart("ai_analysis", metadata: ["promptPreview": String(prompt.prefix(80))])
Logger.shared.startTimer("ai_generate")
analysisResult = try await self.performAnalysisDetailed(
imagePath: captureResult.screenshotPath,
prompt: prompt
)
Logger.shared.stopTimer("ai_generate")
Logger.shared.operationComplete(
"ai_analysis",
success: analysisResult != nil,
metadata: [
"provider": analysisResult?.provider ?? "unknown",
"model": analysisResult?.model ?? "unknown"
]
)
}
// Output results
let executionTime = Date().timeIntervalSince(startTime)
Logger.shared.operationComplete("see_command", metadata: [
"executionTimeMs": Int(executionTime * 1000),
"success": true,
])
if self.jsonOutput {
await self.outputJSONResults(
sessionId: captureResult.sessionId,
screenshotPath: captureResult.screenshotPath,
annotatedPath: annotatedPath,
metadata: captureResult.metadata,
elements: captureResult.elements,
analysis: analysisResult,
executionTime: executionTime
)
} else {
await self.outputTextResults(
sessionId: captureResult.sessionId,
screenshotPath: captureResult.screenshotPath,
annotatedPath: annotatedPath,
metadata: captureResult.metadata,
elements: captureResult.elements,
analysis: analysisResult,
executionTime: executionTime
)
}
} catch {
Logger.shared.operationComplete("see_command", success: false, metadata: [
"error": error.localizedDescription,
])
self.handleError(error) // Use protocol's error handling
throw ExitCode.failure
}
}
private func getFileSize(_ path: String) -> Int? {
try? FileManager.default.attributesOfItem(atPath: path)[.size] as? Int
}
private func performCaptureWithDetection() async throws -> CaptureAndDetectionResult {
// Handle special app cases
let captureResult: CaptureResult
if let appName = self.app?.lowercased() {
switch appName {
case "menubar":
Logger.shared.verbose("Capturing menu bar area", category: "Capture")
captureResult = try await self.captureMenuBar()
case "frontmost":
Logger.shared.verbose("Capturing frontmost window (via --app frontmost)", category: "Capture")
captureResult = try await PeekabooServices.shared.screenCapture.captureFrontmost()
default:
// Use normal capture logic
captureResult = try await self.performStandardCapture()
}
} else {
// Use normal capture logic
captureResult = try await self.performStandardCapture()
}
// Save screenshot
Logger.shared.startTimer("file_write")
let outputPath = try saveScreenshot(captureResult.imageData)
Logger.shared.stopTimer("file_write")
// Create window context from capture metadata
let windowContext = WindowContext(
applicationName: captureResult.metadata.applicationInfo?.name,
windowTitle: captureResult.metadata.windowInfo?.title,
windowBounds: captureResult.metadata.windowInfo?.bounds
)
// Detect UI elements with window context
Logger.shared.operationStart("element_detection")
let detectionResult = try await PeekabooServices.shared.automation.detectElements(
in: captureResult.imageData,
sessionId: nil,
windowContext: windowContext
)
Logger.shared.operationComplete("element_detection")
// Update the result with the correct screenshot path
let resultWithPath = ElementDetectionResult(
sessionId: detectionResult.sessionId,
screenshotPath: outputPath,
elements: detectionResult.elements,
metadata: detectionResult.metadata
)
// Store the result in session
try await PeekabooServices.shared.sessions.storeDetectionResult(
sessionId: detectionResult.sessionId,
result: resultWithPath
)
return CaptureAndDetectionResult(
sessionId: detectionResult.sessionId,
screenshotPath: outputPath,
elements: detectionResult.elements,
metadata: detectionResult.metadata
)
}
private func performStandardCapture() async throws -> CaptureResult {
let effectiveMode = self.determineMode()
Logger.shared.verbose(
"Determined capture mode",
category: "Capture",
metadata: ["mode": effectiveMode.rawValue]
)
Logger.shared.operationStart("capture_phase", metadata: ["mode": effectiveMode.rawValue])
switch effectiveMode {
case .screen:
// Handle screen capture with multi-screen support
let result = try await self.performScreenCapture()
Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue])
return result
case .window:
if self.app != nil || self.pid != nil {
let appIdentifier = try self.resolveApplicationIdentifier()
Logger.shared.verbose("Initiating window capture", category: "Capture", metadata: [
"app": appIdentifier,
"windowTitle": self.windowTitle ?? "any",
])
// Find specific window if title is provided
if let title = windowTitle {
Logger.shared.verbose(
"Searching for window with title",
category: "WindowSearch",
metadata: ["title": title]
)
let windowsOutput = try await PeekabooServices.shared.applications.listWindows(
for: appIdentifier,
timeout: nil
)
Logger.shared.verbose(
"Found windows",
category: "WindowSearch",
metadata: ["count": windowsOutput.data.windows.count]
)
if let windowIndex = windowsOutput.data.windows.firstIndex(where: { $0.title.contains(title) }) {
Logger.shared.verbose(
"Window found at index",
category: "WindowSearch",
metadata: ["index": windowIndex]
)
Logger.shared.startTimer("window_capture")
let result = try await PeekabooServices.shared.screenCapture.captureWindow(
appIdentifier: appIdentifier,
windowIndex: windowIndex
)
Logger.shared.stopTimer("window_capture")
Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue])
return result
} else {
Logger.shared.error(
"Window not found with title",
category: "WindowSearch",
metadata: ["title": title]
)
throw CaptureError.windowNotFound
}
} else {
let result = try await PeekabooServices.shared.screenCapture.captureWindow(
appIdentifier: appIdentifier,
windowIndex: nil
)
Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue])
return result
}
} else {
throw ValidationError("--app or --pid is required for window mode")
}
case .frontmost:
Logger.shared.verbose("Capturing frontmost window")
let result = try await PeekabooServices.shared.screenCapture.captureFrontmost()
Logger.shared.operationComplete("capture_phase", metadata: ["mode": effectiveMode.rawValue])
return result
}
}
private func captureMenuBar() async throws -> CaptureResult {
// Get the main screen bounds
guard let mainScreen = NSScreen.main else {
throw PeekabooError.captureFailed("No main screen found")
}
// Menu bar is at the top of the screen
let menuBarHeight: CGFloat = 24.0 // Standard macOS menu bar height
let menuBarRect = CGRect(
x: mainScreen.frame.origin.x,
y: mainScreen.frame.origin.y + mainScreen.frame.height - menuBarHeight,
width: mainScreen.frame.width,
height: menuBarHeight
)
// Capture the menu bar area
return try await PeekabooServices.shared.screenCapture.captureArea(menuBarRect)
}
private func saveScreenshot(_ imageData: Data) throws -> String {
let outputPath: String
if let providedPath = path {
outputPath = NSString(string: providedPath).expandingTildeInPath
} else {
let timestamp = Date().timeIntervalSince1970
let filename = "peekaboo_see_\(Int(timestamp)).png"
let defaultPath = ConfigurationManager.shared.getDefaultSavePath(cliValue: nil)
outputPath = (defaultPath as NSString).appendingPathComponent(filename)
}
// Create directory if needed
let directory = (outputPath as NSString).deletingLastPathComponent
try FileManager.default.createDirectory(
atPath: directory,
withIntermediateDirectories: true
)
// Save the image
try imageData.write(to: URL(fileURLWithPath: outputPath))
Logger.shared.verbose("Saved screenshot to: \(outputPath)")
return outputPath
}
private func generateAnnotatedScreenshot(
sessionId: String,
originalPath: String
) async throws -> String {
// Get detection result from session
guard let detectionResult = try await PeekabooServices.shared.sessions.getDetectionResult(sessionId: sessionId)
else {
Logger.shared.info("No detection result found for session")
return originalPath
}
// Create annotated image
let annotatedPath = (originalPath as NSString).deletingPathExtension + "_annotated.png"
// Load original image
guard let nsImage = NSImage(contentsOfFile: originalPath) else {
throw CaptureError.fileIOError("Failed to load image from \(originalPath)")
}
// Get image size
let imageSize = nsImage.size
// Create bitmap context
guard let bitmapRep = NSBitmapImageRep(
bitmapDataPlanes: nil,
pixelsWide: Int(imageSize.width),
pixelsHigh: Int(imageSize.height),
bitsPerSample: 8,
samplesPerPixel: 4,
hasAlpha: true,
isPlanar: false,
colorSpaceName: .calibratedRGB,
bytesPerRow: 0,
bitsPerPixel: 0
)
else {
throw CaptureError.captureFailure("Failed to create bitmap representation")
}
// Draw into context
NSGraphicsContext.saveGraphicsState()
guard let context = NSGraphicsContext(bitmapImageRep: bitmapRep) else {
Logger.shared.error("Failed to create graphics context")
throw CaptureError.captureFailure("Failed to create graphics context")
}
NSGraphicsContext.current = context
Logger.shared.verbose("Graphics context created successfully")
// Draw original image
nsImage.draw(in: NSRect(origin: .zero, size: imageSize))
Logger.shared.verbose("Original image drawn")
// Configure text attributes - smaller font for less occlusion
let fontSize: CGFloat = 8
let textAttributes: [NSAttributedString.Key: Any] = [
.font: NSFont.systemFont(ofSize: fontSize, weight: .semibold),
.foregroundColor: NSColor.white,
]
// Role-based colors from spec
let roleColors: [ElementType: NSColor] = [
.button: NSColor(red: 0, green: 0.48, blue: 1.0, alpha: 1.0), // #007AFF
.textField: NSColor(red: 0.204, green: 0.78, blue: 0.349, alpha: 1.0), // #34C759
.link: NSColor(red: 0, green: 0.48, blue: 1.0, alpha: 1.0), // #007AFF
.checkbox: NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0), // #8E8E93
.slider: NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0), // #8E8E93
.menu: NSColor(red: 0, green: 0.48, blue: 1.0, alpha: 1.0), // #007AFF
]
// Draw UI elements
let enabledElements = detectionResult.elements.all.filter(\.isEnabled)
if enabledElements.isEmpty {
Logger.shared.info("No enabled elements to annotate. Total elements: \(detectionResult.elements.all.count)")
print("β οΈ No interactive UI elements found to annotate")
return originalPath // Return original image if no elements to annotate
}
Logger.shared.info(
"Annotating \(enabledElements.count) enabled elements out of \(detectionResult.elements.all.count) total"
)
Logger.shared.verbose("Image size: \(imageSize)")
// Calculate window origin from element bounds if we have elements
var windowOrigin = CGPoint.zero
if !detectionResult.elements.all.isEmpty {
// Find the leftmost and topmost element to estimate window origin
let minX = detectionResult.elements.all.map(\.bounds.minX).min() ?? 0
let minY = detectionResult.elements.all.map(\.bounds.minY).min() ?? 0
windowOrigin = CGPoint(x: minX, y: minY)
Logger.shared.verbose("Estimated window origin from elements: \(windowOrigin)")
}
// Convert all element bounds to window-relative coordinates and flip Y
var elementRects: [(element: DetectedElement, rect: NSRect)] = []
for element in enabledElements {
let elementFrame = CGRect(
x: element.bounds.origin.x - windowOrigin.x,
y: element.bounds.origin.y - windowOrigin.y,
width: element.bounds.width,
height: element.bounds.height
)
let rect = NSRect(
x: elementFrame.origin.x,
y: imageSize.height - elementFrame.origin.y - elementFrame.height, // Flip Y coordinate
width: elementFrame.width,
height: elementFrame.height
)
elementRects.append((element: element, rect: rect))
}
// Create smart label placer for intelligent label positioning
let labelPlacer = SmartLabelPlacer(image: nsImage, fontSize: fontSize, debugMode: verbose)
// Draw elements and calculate label positions
var labelPositions: [(rect: NSRect, connection: NSPoint?, element: DetectedElement)] = []
for (element, rect) in elementRects {
Logger.shared
.verbose(
"Drawing element: \(element.id), type: \(element.type), original bounds: \(element.bounds), window rect: \(rect)"
)
// Get color for element type
let color = roleColors[element.type] ?? NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0)
// Draw bounding box
color.withAlphaComponent(0.5).setFill()
rect.fill()
color.setStroke()
let path = NSBezierPath(rect: rect)
path.lineWidth = 2
path.stroke()
// Calculate label size
let idString = NSAttributedString(string: element.id, attributes: textAttributes)
let textSize = idString.size()
let labelPadding: CGFloat = 4
let labelSize = NSSize(width: textSize.width + labelPadding * 2, height: textSize.height + labelPadding)
// Use smart label placer to find best position
if let placement = labelPlacer.findBestLabelPosition(
for: element,
elementRect: rect,
labelSize: labelSize,
existingLabels: labelPositions.map { ($0.rect, $0.element) },
allElements: elementRects
) {
labelPositions.append((
rect: placement.labelRect,
connection: placement.connectionPoint,
element: element
))
}
}
// NOTE: Old placement code removed - now using SmartLabelPlacer
// [OLD CODE REMOVED - lines 483-785 contained the old placement logic]
// Draw all labels and connection lines
for (labelRect, connectionPoint, element) in labelPositions {
// Draw connection line if label is outside - make it more subtle
if let connection = connectionPoint {
NSColor.black.withAlphaComponent(0.3).setStroke()
let linePath = NSBezierPath()
linePath.lineWidth = 0.5
// Draw line from connection point to nearest edge of label
linePath.move(to: connection)
// Find the closest point on label rectangle to the connection point
let closestX = max(labelRect.minX, min(connection.x, labelRect.maxX))
let closestY = max(labelRect.minY, min(connection.y, labelRect.maxY))
linePath.line(to: NSPoint(x: closestX, y: closestY))
linePath.stroke()
}
// Draw label background - more transparent to show content beneath
NSColor.black.withAlphaComponent(0.7).setFill()
NSBezierPath(roundedRect: labelRect, xRadius: 1, yRadius: 1).fill()
// Draw label border (same color as element) - thinner for less occlusion
let color = roleColors[element.type] ?? NSColor(red: 0.557, green: 0.557, blue: 0.576, alpha: 1.0)
color.withAlphaComponent(0.8).setStroke()
let borderPath = NSBezierPath(roundedRect: labelRect, xRadius: 1, yRadius: 1)
borderPath.lineWidth = 0.5
borderPath.stroke()
// Draw label text
let idString = NSAttributedString(string: element.id, attributes: textAttributes)
idString.draw(at: NSPoint(x: labelRect.origin.x + 4, y: labelRect.origin.y + 2))
}
NSGraphicsContext.restoreGraphicsState()
// Save annotated image
guard let pngData = bitmapRep.representation(using: .png, properties: [:]) else {
throw CaptureError.captureFailure("Failed to create PNG data")
}
try pngData.write(to: URL(fileURLWithPath: annotatedPath))
Logger.shared.verbose("Created annotated screenshot: \(annotatedPath)")
// Log annotation info only in non-JSON mode
if !self.jsonOutput {
let interactableElements = detectionResult.elements.all.filter(\.isEnabled)
print("π Created annotated screenshot with \(interactableElements.count) interactive elements")
}
return annotatedPath
}
// [OLD CODE REMOVED - massive cleanup of duplicate placement logic]
}
// MARK: - Supporting Types
private struct CaptureAndDetectionResult {
let sessionId: String
let screenshotPath: String
let elements: DetectedElements
let metadata: DetectionMetadata
}
private struct SessionPaths {
let raw: String
let annotated: String
let map: String
}
// MARK: - JSON Output Structure (matching original)
struct UIElementSummary: Codable {
let id: String
let role: String
let title: String?
let label: String?
let identifier: String?
let is_actionable: Bool
let keyboard_shortcut: String?
}
struct SeeAnalysisData: Codable {
let provider: String
let model: String
let text: String
}
struct SeeResult: Codable {
let session_id: String
let screenshot_raw: String
let screenshot_annotated: String
let ui_map: String
let application_name: String?
let window_title: String?
let is_dialog: Bool
let element_count: Int
let interactable_count: Int
let capture_mode: String
let analysis: SeeAnalysisData?
let execution_time: TimeInterval
let ui_elements: [UIElementSummary]
let menu_bar: MenuBarSummary?
var success: Bool = true
}
struct MenuBarSummary: Codable {
let menus: [MenuSummary]
struct MenuSummary: Codable {
let title: String
let item_count: Int
let enabled: Bool
let items: [MenuItemSummary]
}
struct MenuItemSummary: Codable {
let title: String
let enabled: Bool
let keyboard_shortcut: String?
}
}
// MARK: - Format Helpers Extension
extension SeeCommand {
private func performAnalysisDetailed(imagePath: String, prompt: String) async throws -> SeeAnalysisData {
// Use PeekabooCore AI service which is configured via ConfigurationManager/Tachikoma
let ai = await PeekabooAIService()
let res = try await ai.analyzeImageFileDetailed(at: imagePath, question: prompt, model: nil)
return SeeAnalysisData(provider: res.provider, model: res.model, text: res.text)
}
private func determineMode() -> CaptureMode {
if let mode = self.mode {
mode
} else if self.app != nil || self.windowTitle != nil {
// If app or window title is specified, default to window mode
.window
} else {
// Otherwise default to frontmost
.frontmost
}
}
// MARK: - Output Methods
@MainActor
private func outputJSONResults(
sessionId: String,
screenshotPath: String,
annotatedPath: String?,
metadata: DetectionMetadata,
elements: DetectedElements,
analysis: SeeAnalysisData?,
executionTime: TimeInterval
) async {
// Build UI element summaries
let uiElements: [UIElementSummary] = elements.all.map { element in
UIElementSummary(
id: element.id,
role: element.type.rawValue,
title: element.attributes["title"],
label: element.label,
identifier: element.attributes["identifier"],
is_actionable: element.isEnabled,
keyboard_shortcut: element.attributes["keyboardShortcut"]
)
}
// Build session paths
let sessionPaths = SessionPaths(
raw: screenshotPath,
annotated: annotatedPath ?? screenshotPath,
map: PeekabooServices.shared.sessions.getSessionStoragePath() + "/\(sessionId)/map.json"
)
// Structured analysis is passed in
let output = await SeeResult(
session_id: sessionId,
screenshot_raw: sessionPaths.raw,
screenshot_annotated: sessionPaths.annotated,
ui_map: sessionPaths.map,
application_name: metadata.windowContext?.applicationName,
window_title: metadata.windowContext?.windowTitle,
is_dialog: metadata.isDialog,
element_count: metadata.elementCount,
interactable_count: elements.all.count { $0.isEnabled },
capture_mode: self.determineMode().rawValue,
analysis: analysis,
execution_time: executionTime,
ui_elements: uiElements,
menu_bar: self.getMenuBarItemsSummary()
)
outputSuccessCodable(data: output)
}
@MainActor
private func getMenuBarItemsSummary() async -> MenuBarSummary {
// Get menu bar items from service
var menuExtras: [MenuExtraInfo] = []
do {
menuExtras = try await PeekabooServices.shared.menu.listMenuExtras()
} catch {
// If there's an error, just return empty array
menuExtras = []
}
// Group items into menu categories
// For now, we'll create a simplified view showing each menu bar item as a "menu"
let menus = menuExtras.map { extra in
MenuBarSummary.MenuSummary(
title: extra.title,
item_count: 1, // Each menu bar item is treated as a single menu
enabled: true,
items: [
MenuBarSummary.MenuItemSummary(
title: extra.title,
enabled: true,
keyboard_shortcut: nil
)
]
)
}
return MenuBarSummary(menus: menus)
}
@MainActor
private func outputTextResults(
sessionId: String,
screenshotPath: String,
annotatedPath: String?,
metadata: DetectionMetadata,
elements: DetectedElements,
analysis: SeeAnalysisData?,
executionTime: TimeInterval
) async {
let sessionPaths = SessionPaths(
raw: screenshotPath,
annotated: annotatedPath ?? screenshotPath,
map: PeekabooServices.shared.sessions.getSessionStoragePath() + "/\(sessionId)/map.json"
)
let interactableCount = elements.all.count { $0.isEnabled }
print("β
Screenshot captured successfully")
print("π Session ID: \(sessionId)")
print("πΌ Raw screenshot: \(sessionPaths.raw)")
if let annotated = annotatedPath {
print("π― Annotated: \(annotated)")
}
print("πΊοΈ UI map: \(sessionPaths.map)")
print("π Found \(metadata.elementCount) UI elements (\(interactableCount) interactive)")
if let app = metadata.windowContext?.applicationName {
print("π± Application: \(app)")
}
if let window = metadata.windowContext?.windowTitle {
let windowType = metadata.isDialog ? "Dialog" : "Window"
let icon = metadata.isDialog ? "π¨οΈ" : "πͺ"
print("\(icon) \(windowType): \(window)")
}
// Show menu bar items
// Get menu bar items from service
let menuExtras: [MenuExtraInfo]
do {
menuExtras = try await PeekabooServices.shared.menu.listMenuExtras()
} catch {
// If there's an error, just return empty array
menuExtras = []
}
if !menuExtras.isEmpty {
print("π Menu Bar Items: \(menuExtras.count)")
for item in menuExtras.prefix(10) { // Show first 10
print(" β’ \(item.title)")
}
if menuExtras.count > 10 {
print(" ... and \(menuExtras.count - 10) more")
}
}
if let analysis = analysis {
print("π€ Analysis (")
print("\(analysis.provider)/\(analysis.model)):")
print(analysis.text)
}
print("β±οΈ Completed in \(String(format: "%.2f", executionTime))s")
}
}
// MARK: - Multi-Screen Support
extension SeeCommand {
private func performScreenCapture() async throws -> CaptureResult {
// Log warning if annotation was requested for full screen captures
if self.annotate {
Logger.shared.info("Annotation is disabled for full screen captures due to performance constraints")
}
Logger.shared.verbose("Initiating screen capture", category: "Capture")
Logger.shared.startTimer("screen_capture")
defer {
Logger.shared.stopTimer("screen_capture")
}
if let index = self.screenIndex ?? (self.analyze != nil ? 0 : nil) {
// Capture specific screen
Logger.shared.verbose("Capturing specific screen", category: "Capture", metadata: ["screenIndex": index])
let result = try await PeekabooServices.shared.screenCapture.captureScreen(displayIndex: index)
// Add display info to output
if let displayInfo = result.metadata.displayInfo {
let bounds = displayInfo.bounds
print(
"π₯οΈ Display \(index): \(displayInfo.name ?? "Display \(index)") (\(Int(bounds.width))Γ\(Int(bounds.height)))"
)
}
Logger.shared.verbose("Screen capture completed", category: "Capture", metadata: [
"mode": "screen-index",
"screenIndex": index,
"imageBytes": result.imageData.count
])
return result
} else {
// Capture all screens
Logger.shared.verbose("Capturing all screens", category: "Capture")
let results = try await self.captureAllScreens()
if results.isEmpty {
throw CaptureError.captureFailure("Failed to capture any screens")
}
// Save all screenshots except the first (which will be saved by the normal flow)
print("πΈ Captured \(results.count) screen(s):")
for (index, result) in results.enumerated() {
if index > 0 {
// Save additional screenshots
let screenPath: String
if let basePath = self.path {
// User specified a path - add screen index to filename
let directory = (basePath as NSString).deletingLastPathComponent
let filename = (basePath as NSString).lastPathComponent
let nameWithoutExt = (filename as NSString).deletingPathExtension
let ext = (filename as NSString).pathExtension
screenPath = (directory as NSString)
.appendingPathComponent("\(nameWithoutExt)_screen\(index).\(ext)")
} else {
// Default path with screen index
let timestamp = ISO8601DateFormatter().string(from: Date())
screenPath = "screenshot_\(timestamp)_screen\(index).png"
}
// Save the screenshot
try result.imageData.write(to: URL(fileURLWithPath: screenPath))
// Display info about this screen
if let displayInfo = result.metadata.displayInfo {
let fileSize = self.getFileSize(screenPath) ?? 0
let bounds = displayInfo.bounds
print(
" π₯οΈ Display \(index): \(displayInfo.name ?? "Display \(index)") (\(Int(bounds.width))Γ\(Int(bounds.height))) β \(screenPath) (\(self.formatFileSize(Int64(fileSize))))"
)
}
} else {
// First screen will be saved by the normal flow, just show info
if let displayInfo = result.metadata.displayInfo {
let bounds = displayInfo.bounds
print(
" π₯οΈ Display \(index): \(displayInfo.name ?? "Display \(index)") (\(Int(bounds.width))Γ\(Int(bounds.height))) β (primary)"
)
}
}
}
// Return the primary screen result (first one)
Logger.shared.verbose("Multi-screen capture completed", category: "Capture", metadata: [
"count": results.count,
"primaryBytes": results.first?.imageData.count ?? 0
])
return results[0]
}
}
}
// MARK: - Multi-Screen Support
extension SeeCommand {
private func captureAllScreens() async throws -> [CaptureResult] {
var results: [CaptureResult] = []
// Get available displays from the screen capture service
let content = try await SCShareableContent.current
let displays = content.displays
Logger.shared.info("Found \(displays.count) display(s) to capture")
for (index, display) in displays.enumerated() {
Logger.shared.verbose("Capturing display \(index)", category: "MultiScreen", metadata: [
"displayID": display.displayID,
"width": display.width,
"height": display.height
])
do {
let result = try await PeekabooServices.shared.screenCapture.captureScreen(displayIndex: index)
// Update path to include screen index if capturing multiple screens
if displays.count > 1 {
let updatedResult = self.updateCaptureResultPath(result, screenIndex: index, displayInfo: display)
results.append(updatedResult)
} else {
results.append(result)
}
} catch {
Logger.shared.error("Failed to capture display \(index): \(error)")
// Continue capturing other screens even if one fails
}
}
if results.isEmpty {
throw CaptureError.captureFailure("Failed to capture any screens")
}
return results
}
private func updateCaptureResultPath(
_ result: CaptureResult,
screenIndex: Int,
displayInfo: SCDisplay
) -> CaptureResult {
// Since CaptureResult is immutable and doesn't have a path property,
// we can't update the path. Just return the original result.
// The saved path is already included in result.savedPath if it was saved.
result
}
private func formatFileSize(_ bytes: Int64) -> String {
let formatter = ByteCountFormatter()
formatter.countStyle = .file
return formatter.string(fromByteCount: bytes)
}
}