/**
* Simple VRAM Manager using Maintenance Workflows
*
* This is a pragmatic approach to VRAM management that works WITH
* ComfyUI's existing LRU cache system rather than trying to fight it.
*
* Core Strategy:
* 1. Monitor VRAM usage via existing ComfyUI endpoints
* 2. Run lightweight workflows to trigger LRU cache eviction
* 3. Use SD 1.5 (2GB) to push out FLUX (11GB) when needed
* 4. Schedule maintenance during idle periods
*/
import fetch from 'node-fetch';
import { runMaintenanceWorkflow, MaintenanceScheduler } from '../workflows/maintenance-workflow.js';
/**
* SAFETY PRINCIPLES:
* 1. Protect GPU above all else - refuse operations if unsafe
* 2. Trust but verify - validate all operations
* 3. Handle edge cases gracefully - never crash
* 4. Think strategically - prevent issues before they occur
* 5. Do no harm - better to refuse than risk hardware
*/
export class SimpleVRAMManager {
constructor(comfyuiClient, options = {}) {
this.client = comfyuiClient;
this.config = {
// Thresholds (conservative for GPU safety)
warningThreshold: options.warningThreshold || 0.70, // 70% VRAM usage (safer)
criticalThreshold: options.criticalThreshold || 0.80, // 80% VRAM usage (safer)
cleanupThreshold: options.cleanupThreshold || 0.85, // 85% triggers cleanup (safer)
emergencyThreshold: options.emergencyThreshold || 0.95, // 95% emergency mode
// Timing
checkInterval: options.checkInterval || 60000, // Check every minute
idleTimeout: options.idleTimeout || 30 * 60 * 1000, // 30 minutes idle (matching plan)
cooldownPeriod: options.cooldownPeriod || 5 * 60 * 1000, // 5 min between cleanups
// Strategies
defaultStrategy: options.defaultStrategy || 'sd15', // Use SD1.5 to evict FLUX
aggressiveStrategy: options.aggressiveStrategy || 'empty', // Minimal workflow
// Features
autoCleanup: options.autoCleanup !== false,
preRequestCheck: options.preRequestCheck !== false, // Check before heavy operations
refuseOnEmergency: options.refuseOnEmergency !== false, // Refuse ops in emergency
verifyCleanup: options.verifyCleanup !== false, // Verify cleanup worked
logging: options.logging !== false,
// LRU cache size for ComfyUI (from --cache-lru parameter)
cacheLRUSize: options.cacheLRUSize || 3
};
this.state = {
lastCheck: null,
lastCleanup: null,
currentUsage: 0,
isMonitoring: false,
consecutiveHighReadings: 0,
consecutiveCleanupFailures: 0, // Track cleanup failures
emergencyMode: false, // GPU protection mode
statCheckFailures: 0 // Track stat check failures
};
// Initialize maintenance scheduler
this.scheduler = new MaintenanceScheduler(comfyuiClient, {
idleTimeout: this.config.idleTimeout,
periodicInterval: 30 * 60 * 1000, // Every 30 minutes
strategy: this.config.defaultStrategy,
enabled: this.config.autoCleanup
});
// Track last activity for idle detection
this.lastActivity = Date.now();
// Track if we're currently processing a heavy operation
this.isProcessingHeavy = false;
}
/**
* Get VRAM stats from ComfyUI
* Uses the confirmed working /system_stats endpoint
*/
async getVRAMStats() {
try {
// Try multiple endpoints in order of preference
const endpoints = [
'/system_stats', // Confirmed working endpoint
'/stats', // Crystools endpoint (backup)
'/api/stats' // Alternative endpoint (backup)
];
for (const endpoint of endpoints) {
try {
const response = await fetch(
`http://${this.client.serverAddress}${endpoint}`
);
if (response.ok) {
const data = await response.json();
const stats = this.parseVRAMStats(data);
// TRUST BUT VERIFY: Validate stats
if (stats && this.validateStats(stats)) {
this.state.statCheckFailures = 0; // Reset failure counter
return stats;
}
}
} catch (e) {
// Try next endpoint
continue;
}
}
// EDGE CASE: No stats available
this.state.statCheckFailures++;
// Enter emergency mode after multiple failures
if (this.state.statCheckFailures >= 5) {
this.log('๐จ CRITICAL: Cannot monitor GPU - entering emergency mode', 'error');
this.state.emergencyMode = true;
}
this.log('โ ๏ธ No VRAM stats available - assuming high usage for safety', 'warn');
return null;
} catch (error) {
this.log(`Error getting VRAM stats: ${error.message}`, 'error');
return null;
}
}
/**
* Parse VRAM stats from various formats
*/
/**
* Validate stats for sanity
* TRUST BUT VERIFY principle
*/
validateStats(stats) {
if (!stats) return false;
// Check for obviously invalid values
if (stats.total <= 0 || stats.usage > 1 || stats.usage < 0) {
this.log('โ ๏ธ Invalid stats detected', 'warn');
return false;
}
// Check for suspiciously low values (might indicate error)
if (stats.totalGB < 1) { // Less than 1GB total VRAM is suspicious
this.log('โ ๏ธ Suspiciously low VRAM total', 'warn');
return false;
}
return true;
}
parseVRAMStats(data) {
// Handle different response formats
if (data.devices) {
// Standard format
for (const device of data.devices) {
if (device.type === 'cuda') {
const total = device.vram_total || 0;
const free = device.vram_free || 0;
const used = total - free;
return {
total,
used,
free,
usage: total > 0 ? used / total : 0,
totalGB: total / (1024**3),
usedGB: used / (1024**3),
freeGB: free / (1024**3)
};
}
}
} else if (data.gpu) {
// Alternative format
return {
total: data.gpu.vram_total || 0,
used: data.gpu.vram_used || 0,
free: data.gpu.vram_free || 0,
usage: data.gpu.vram_usage || 0,
totalGB: (data.gpu.vram_total || 0) / (1024**3),
usedGB: (data.gpu.vram_used || 0) / (1024**3),
freeGB: (data.gpu.vram_free || 0) / (1024**3)
};
}
return null;
}
/**
* Check VRAM and trigger cleanup if needed
*/
async checkAndCleanup() {
const stats = await this.getVRAMStats();
// EDGE CASE: No stats = assume worst case for safety
if (!stats) {
// GPU PROTECTION: Assume high usage when we can't verify
this.log('โ ๏ธ No stats - assuming 90% usage for safety', 'warn');
return await this.performCleanup({ usage: 0.9, usedGB: 20, totalGB: 24 });
}
this.state.currentUsage = stats.usage;
this.state.lastCheck = Date.now();
// Log current status
this.log(
`VRAM: ${stats.usedGB.toFixed(1)}/${stats.totalGB.toFixed(1)}GB ` +
`(${(stats.usage * 100).toFixed(1)}%) | ` +
`Free: ${stats.freeGB.toFixed(1)}GB`
);
// CRITICAL: Check emergency threshold first
if (stats.usage >= this.config.emergencyThreshold) {
this.log(`๐จ EMERGENCY: VRAM at ${(stats.usage * 100).toFixed(1)}%!`, 'error');
this.state.emergencyMode = true;
// Force immediate cleanup
const result = await this.performCleanup(stats);
// GPU PROTECTION: If cleanup fails, recommend service restart
if (!result.success) {
this.log('๐ CRITICAL: Cleanup failed in emergency - restart recommended!', 'error');
}
return result;
}
// Check normal cleanup threshold
if (stats.usage >= this.config.cleanupThreshold) {
this.state.consecutiveHighReadings++;
// Check cooldown
const timeSinceLastCleanup = Date.now() - (this.state.lastCleanup || 0);
if (timeSinceLastCleanup < this.config.cooldownPeriod) {
this.log('โฑ๏ธ Cleanup on cooldown, waiting...', 'info');
return { success: false, reason: 'Cooldown active' };
}
// Trigger cleanup
this.log(`๐จ VRAM critical (${(stats.usage * 100).toFixed(1)}%), triggering cleanup...`, 'warn');
return await this.performCleanup(stats);
} else if (stats.usage >= this.config.criticalThreshold) {
this.state.consecutiveHighReadings++;
this.log(`โ ๏ธ VRAM high (${(stats.usage * 100).toFixed(1)}%)`, 'warn');
// Schedule cleanup if multiple high readings
if (this.state.consecutiveHighReadings >= 3) {
return await this.performCleanup(stats);
}
} else if (stats.usage >= this.config.warningThreshold) {
this.log(`๐ VRAM moderate (${(stats.usage * 100).toFixed(1)}%)`, 'info');
this.state.consecutiveHighReadings = 0;
} else {
this.state.consecutiveHighReadings = 0;
}
return { success: true, stats };
}
/**
* Perform VRAM cleanup using maintenance workflow
*/
async performCleanup(stats) {
this.log('๐งน Starting VRAM cleanup...', 'info');
// Determine strategy based on severity
let strategy;
if (stats.usage >= 0.95) {
strategy = this.config.aggressiveStrategy; // Empty workflow for critical
this.log('Using aggressive strategy (>95% usage)', 'warn');
} else if (stats.usage >= 0.85) {
strategy = this.config.defaultStrategy; // SD1.5 for high usage
this.log('Using default strategy (>85% usage)', 'info');
} else {
strategy = 'noise'; // Noise for moderate
this.log('Using light strategy (<85% usage)', 'info');
}
try {
// Run maintenance workflow to trigger LRU cache eviction
const result = await runMaintenanceWorkflow(this.client, strategy);
// EDGE CASE: Handle cleanup failure
if (!result.success) {
this.state.consecutiveCleanupFailures++;
if (this.state.consecutiveCleanupFailures >= 3) {
this.log('๐จ Multiple cleanup failures - entering emergency mode', 'error');
this.state.emergencyMode = true;
}
return result;
}
if (result.success) {
this.state.lastCleanup = Date.now();
// Wait a moment for cache to settle
await new Promise(resolve => setTimeout(resolve, 2000));
// TRUST BUT VERIFY: Check new stats
const newStats = await this.getVRAMStats();
if (newStats && this.config.verifyCleanup) {
const freedGB = stats.usedGB - newStats.usedGB;
// VERIFY: Check if cleanup was effective
if (freedGB < 0.5) { // Less than 500MB freed
this.log(`โ ๏ธ Cleanup ineffective: only freed ${freedGB.toFixed(1)}GB`, 'warn');
this.state.consecutiveCleanupFailures++;
} else {
this.log(
`โ
Cleanup verified: freed ${freedGB.toFixed(1)}GB ` +
`(${stats.usedGB.toFixed(1)}GB โ ${newStats.usedGB.toFixed(1)}GB)`,
'success'
);
this.state.consecutiveCleanupFailures = 0;
this.state.emergencyMode = false; // Clear emergency if successful
}
return {
success: true,
freedGB,
oldUsage: stats.usage,
newUsage: newStats.usage
};
}
}
return result;
} catch (error) {
this.log(`โ Cleanup failed: ${error.message}`, 'error');
return { success: false, error: error.message };
}
}
/**
* Start monitoring
*/
start() {
if (this.state.isMonitoring) {
return;
}
this.state.isMonitoring = true;
this.log('๐ Starting VRAM monitoring', 'info');
// Initial check
this.checkAndCleanup();
// Set up periodic checks
this.checkInterval = setInterval(() => {
this.checkAndCleanup();
}, this.config.checkInterval);
}
/**
* Stop monitoring
*/
stop() {
if (!this.state.isMonitoring) {
return;
}
this.state.isMonitoring = false;
if (this.checkInterval) {
clearInterval(this.checkInterval);
this.checkInterval = null;
}
this.scheduler.stop();
this.log('โน๏ธ VRAM monitoring stopped', 'info');
}
/**
* Force immediate cleanup
*/
async forceCleanup() {
this.log('๐ช Forcing immediate cleanup', 'info');
const stats = await this.getVRAMStats();
return await this.performCleanup(stats || { usage: 1, usedGB: 0, totalGB: 0 });
}
/**
* Update activity (called when processing requests)
*/
updateActivity() {
this.lastActivity = Date.now();
this.scheduler.updateActivity();
}
/**
* Pre-request check for heavy operations (e.g., FLUX generation)
*/
async preRequestCheck(modelType = 'flux') {
// GPU PROTECTION: Always check in emergency mode
if (this.state.emergencyMode && this.config.refuseOnEmergency) {
this.log('โ Operation refused - emergency mode active', 'error');
return {
success: false,
reason: 'Emergency mode - GPU protection active',
emergency: true
};
}
if (!this.config.preRequestCheck) {
return { success: true, reason: 'Pre-request check disabled' };
}
const stats = await this.getVRAMStats();
if (!stats) {
// DO NO HARM: Refuse operations when we can't verify GPU state
if (this.config.refuseOnEmergency) {
this.log('โ Operation refused - cannot verify GPU state', 'error');
return { success: false, reason: 'Cannot verify GPU state - refusing for safety' };
}
return { success: true, reason: 'No stats available, proceeding with caution' };
}
// Different thresholds for different models
const thresholds = {
flux: 0.75, // Need more headroom for FLUX (11GB)
sd15: 0.90, // SD1.5 only needs 2GB
upscale: 0.85, // Upscaling needs moderate headroom
other: 0.80
};
const threshold = thresholds[modelType] || thresholds.other;
if (stats.usage > threshold) {
this.log(
`โ ๏ธ High VRAM before ${modelType} operation: ${(stats.usage * 100).toFixed(1)}%`,
'warn'
);
// GPU PROTECTION: Don't attempt heavy operations in emergency
if (stats.usage >= this.config.emergencyThreshold) {
this.log('โ Operation refused - VRAM critically high', 'error');
return {
success: false,
reason: 'VRAM critically high - operation refused for GPU safety',
stats
};
}
// Perform cleanup before heavy operation
const cleanup = await this.performCleanup(stats);
if (cleanup.success) {
// Wait for cache to settle
await new Promise(resolve => setTimeout(resolve, 2000));
this.log('โ
Pre-request cleanup completed', 'success');
}
return cleanup;
}
return { success: true, stats };
}
/**
* Check if system is idle
*/
isIdle() {
const idleTime = Date.now() - this.lastActivity;
return idleTime >= this.config.idleTimeout;
}
/**
* Get current status
*/
getStatus() {
const idleTime = Date.now() - this.lastActivity;
return {
monitoring: this.state.isMonitoring,
lastCheck: this.state.lastCheck,
lastCleanup: this.state.lastCleanup,
currentUsage: this.state.currentUsage,
consecutiveHighReadings: this.state.consecutiveHighReadings,
consecutiveCleanupFailures: this.state.consecutiveCleanupFailures,
emergencyMode: this.state.emergencyMode,
statCheckFailures: this.state.statCheckFailures,
isIdle: this.isIdle(),
idleTimeSeconds: Math.floor(idleTime / 1000),
gpuSafe: !this.state.emergencyMode && this.state.currentUsage < this.config.criticalThreshold,
config: {
warningThreshold: this.config.warningThreshold,
criticalThreshold: this.config.criticalThreshold,
cleanupThreshold: this.config.cleanupThreshold,
emergencyThreshold: this.config.emergencyThreshold
}
};
}
/**
* Logging helper
*/
log(message, level = 'info') {
if (!this.config.logging) return;
const timestamp = new Date().toISOString();
const prefix = {
info: 'โน๏ธ',
warn: 'โ ๏ธ',
error: 'โ',
success: 'โ
'
}[level] || '๐';
console.log(`[${timestamp}] ${prefix} ${message}`);
}
}
/**
* Example usage in MCP server
*/
export function setupVRAMManagement(comfyuiClient) {
const vramManager = new SimpleVRAMManager(comfyuiClient, {
warningThreshold: 0.75,
criticalThreshold: 0.85,
cleanupThreshold: 0.90,
checkInterval: 60000, // Check every minute
idleTimeout: 30 * 60 * 1000, // 30 minutes idle
autoCleanup: true,
preRequestCheck: true, // Check before heavy operations
defaultStrategy: 'sd15', // Use SD1.5 to evict FLUX
cacheLRUSize: 3 // Match ComfyUI's --cache-lru setting
});
// Start monitoring
vramManager.start();
// Hook into request processing with pre-request checks
const originalGenerateImage = comfyuiClient.generateImage;
comfyuiClient.generateImage = async function(params, ...args) {
vramManager.updateActivity();
// Pre-request check for FLUX operations
if (params && params.model && params.model.includes('flux')) {
await vramManager.preRequestCheck('flux');
}
return originalGenerateImage.apply(this, [params, ...args]);
};
// Hook into upscaling
if (comfyuiClient.upscaleImage) {
const originalUpscale = comfyuiClient.upscaleImage;
comfyuiClient.upscaleImage = async function(...args) {
vramManager.updateActivity();
await vramManager.preRequestCheck('upscale');
return originalUpscale.apply(this, args);
};
}
// Schedule daily maintenance at 3 AM (if needed)
const scheduleDailyMaintenance = () => {
const now = new Date();
const next3AM = new Date();
next3AM.setHours(3, 0, 0, 0);
if (next3AM <= now) {
next3AM.setDate(next3AM.getDate() + 1);
}
const msUntil3AM = next3AM - now;
setTimeout(() => {
vramManager.forceCleanup();
scheduleDailyMaintenance(); // Schedule next day
}, msUntil3AM);
};
scheduleDailyMaintenance();
return vramManager;
}
export default SimpleVRAMManager;