visual-automation.jsā¢7.27 kB
#!/usr/bin/env node
// šÆ VISUAL AUTOMATION - The Coffee Spitter!
// This is what makes people go "WTF, this is possible?!"
import { execSync } from 'child_process';
import { promises as fs } from 'fs';
import path from 'path';
export class VisualAutomation {
constructor() {
this.screenshotPath = '/tmp/automator-screenshot.png';
}
// Take screenshot and analyze with AI
async analyzeScreen(region = null) {
const regionFlag = region ? `-R${region.x},${region.y},${region.width},${region.height}` : '';
// Capture screenshot
execSync(`screencapture -x ${regionFlag} ${this.screenshotPath}`);
// In real implementation, this would use Vision API or local ML model
// For now, we'll use OCR
const text = await this.ocrScreenshot();
return {
screenshot: this.screenshotPath,
text: text,
elements: await this.detectUIElements(),
suggestions: await this.suggestActions(text)
};
}
// OCR the screenshot
async ocrScreenshot() {
try {
// Using macOS built-in OCR (Monterey+)
const script = `
use framework "Vision"
use framework "Foundation"
on ocrImage(imagePath)
set theImage to current application's NSImage's alloc()'s initWithContentsOfFile:imagePath
set theImageRep to theImage's representations()'s objectAtIndex:0
set theCGImage to theImageRep's CGImage()
set request to current application's VNRecognizeTextRequest's alloc()'s init()
request's setRecognitionLevel:(current application's VNRequestTextRecognitionLevelAccurate)
set handler to current application's VNImageRequestHandler's alloc()'s initWithCGImage:theCGImage options:(missing value)
handler's performRequests:{request} |error|:(missing value)
set results to request's results()
set textItems to {}
repeat with observation in results
set end of textItems to (observation's topCandidates:1)'s firstObject()'s |string|() as text
end repeat
return textItems
end ocrImage
return ocrImage("${this.screenshotPath}")
`;
const result = execSync(`osascript -l JavaScript -e '${script}'`, { encoding: 'utf8' });
return result.trim();
} catch (error) {
// Fallback to third-party OCR if needed
console.error('OCR failed:', error);
return '';
}
}
// Detect UI elements (buttons, text fields, etc.)
async detectUIElements() {
const script = `
tell application "System Events"
set frontApp to name of first application process whose frontmost is true
tell process frontApp
set allElements to entire contents of window 1
set elementsList to {}
repeat with elem in allElements
try
set elemInfo to {class:class of elem, title:title of elem, enabled:enabled of elem}
set end of elementsList to elemInfo
end try
end repeat
return elementsList
end tell
end tell
`;
try {
const result = execSync(`osascript -e '${script}'`, { encoding: 'utf8' });
return this.parseUIElements(result);
} catch (error) {
return [];
}
}
// AI-powered action suggestions
async suggestActions(screenText) {
const suggestions = [];
// Error detection
if (screenText.match(/error|failed|exception|crash/i)) {
suggestions.push({
type: 'error_handler',
action: 'search_for_solution',
confidence: 0.9
});
}
// Meeting detection
if (screenText.match(/zoom|teams|meet|join meeting/i)) {
suggestions.push({
type: 'meeting_mode',
action: 'enable_focus_mode',
confidence: 0.85
});
}
// Code detection
if (screenText.match(/function|class|import|const|let|var/i)) {
suggestions.push({
type: 'code_assist',
action: 'format_and_analyze',
confidence: 0.8
});
}
return suggestions;
}
// The killer feature: Watch and learn
async watchAndLearn(duration = 60000) {
const patterns = [];
const startTime = Date.now();
const interval = setInterval(async () => {
if (Date.now() - startTime > duration) {
clearInterval(interval);
return this.generateWorkflow(patterns);
}
const analysis = await this.analyzeScreen();
patterns.push({
timestamp: Date.now(),
...analysis
});
}, 5000); // Analyze every 5 seconds
return new Promise(resolve => {
setTimeout(() => {
clearInterval(interval);
resolve(this.generateWorkflow(patterns));
}, duration);
});
}
// Generate workflow from patterns
generateWorkflow(patterns) {
// Analyze patterns and create automation
const workflow = {
name: 'AI-Generated Workflow',
triggers: [],
actions: [],
conditions: []
};
// Find repeated actions
const actionCounts = {};
patterns.forEach(p => {
p.suggestions.forEach(s => {
const key = s.action;
actionCounts[key] = (actionCounts[key] || 0) + 1;
});
});
// Create workflow from most common actions
Object.entries(actionCounts)
.sort(([,a], [,b]) => b - a)
.slice(0, 5)
.forEach(([action, count]) => {
workflow.actions.push({
action,
frequency: count,
automated: true
});
});
return workflow;
}
// Parse UI elements from AppleScript output
parseUIElements(output) {
// Parse the AppleScript output into structured data
const elements = [];
const lines = output.split('\n');
lines.forEach(line => {
if (line.includes('button') || line.includes('text field') || line.includes('menu')) {
elements.push({
type: line.match(/(\w+):/)?.[1] || 'unknown',
properties: line
});
}
});
return elements;
}
}
// šŖ DEMO: The "Holy Shit" Moment
export async function holyShitDemo() {
const automation = new VisualAutomation();
console.log('šÆ Starting Visual Automation Demo...\n');
// Demo 1: Screenshot ā Action
console.log('šø Taking screenshot and analyzing...');
const analysis = await automation.analyzeScreen();
console.log('š Found text:', analysis.text);
console.log('š® UI Elements:', analysis.elements.length);
console.log('š” Suggestions:', analysis.suggestions);
// Demo 2: Watch and Learn
console.log('\nš Watching your screen for patterns (10 seconds)...');
const workflow = await automation.watchAndLearn(10000);
console.log('š¤ Generated Workflow:', workflow);
// Demo 3: Error Detection
if (analysis.text.includes('error')) {
console.log('\nšØ Error detected! Automatically searching for solution...');
execSync(`open "https://www.google.com/search?q=${encodeURIComponent(analysis.text)}"`);
}
return 'š Mind = Blown!';
}
// Export for use in main automator
export default VisualAutomation;