state-backup-manager.ts•27.9 kB
/**
* State Backup Manager - Automatic Evolution State Snapshots
*
* Provides comprehensive backup and restoration capabilities for GEPA state:
* - Evolution state snapshots
* - Trajectory data backup
* - Configuration and settings backup
* - Point-in-time recovery
* - Incremental and differential backups
*/
import { EventEmitter } from 'events';
import { existsSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, readdirSync, statSync } from 'fs';
import { join } from 'path';
import { createHash } from 'crypto';
import { gzipSync, gunzipSync } from 'zlib';
import type { EvolutionConfig, PromptCandidate, ExecutionTrajectory } from '../../types/gepa';
import { ResilienceSystem } from '../resilience/index';
import { ComponentStatus, type ComponentHealth } from './component-recovery-manager';
export interface BackupConfig {
backupDirectory: string;
maxBackups: number;
compressionEnabled: boolean;
checksumValidation: boolean;
incrementalBackups: boolean;
backupRetentionDays: number;
autoBackupInterval: number; // milliseconds
encryptionEnabled: boolean;
backupMetadataTracking: boolean;
}
export interface BackupEntry {
id: string;
timestamp: Date;
label?: string;
type: 'full' | 'incremental' | 'differential';
size: number;
checksum: string;
compressed: boolean;
encrypted: boolean;
components: BackupComponent[];
metadata: BackupMetadata;
restoreTimeEstimate: number; // seconds
}
export interface BackupComponent {
name: string;
type: 'evolution-state' | 'trajectory-data' | 'configuration' | 'cache' | 'metrics';
size: number;
checksum: string;
path: string;
dependencies: string[];
critical: boolean;
}
export interface BackupMetadata {
systemVersion: string;
evolutionGeneration: number;
activePopulationSize: number;
paretoFrontierSize: number;
totalTrajectories: number;
configurationHash: string;
backupReason: string;
performanceMetrics: Record<string, number>;
warnings: string[];
}
export interface RestoreOptions {
components?: string[]; // Specific components to restore
validateIntegrity?: boolean;
createPreRestoreBackup?: boolean;
overwriteExisting?: boolean;
skipNonCritical?: boolean;
restoreTimeout?: number; // milliseconds
}
export interface RestoreResult {
success: boolean;
backupId: string;
restoredComponents: string[];
failedComponents: string[];
warnings: string[];
restoreTime: number; // milliseconds
preRestoreBackupId?: string;
integrityChecks: IntegrityCheckResult[];
}
export interface IntegrityCheckResult {
component: string;
valid: boolean;
checksumMatch: boolean;
sizeMatch: boolean;
dependenciesValid: boolean;
errors: string[];
}
/**
* State Backup Manager Implementation
*/
export class StateBackupManager extends EventEmitter {
private config: BackupConfig;
private backupRegistry: Map<string, BackupEntry> = new Map();
private autoBackupTimer?: NodeJS.Timeout;
private lastBackupTime: Date | null = null;
private resilience: ResilienceSystem;
constructor(config: Partial<BackupConfig> = {}) {
super();
this.config = {
backupDirectory: join(process.cwd(), '.gepa', 'backups'),
maxBackups: 50,
compressionEnabled: true,
checksumValidation: true,
incrementalBackups: true,
backupRetentionDays: 30,
autoBackupInterval: 3600000, // 1 hour
encryptionEnabled: false,
backupMetadataTracking: true,
...config
};
this.resilience = ResilienceSystem.getInstance();
this.ensureBackupDirectory();
}
/**
* Initialize backup manager
*/
async initialize(): Promise<void> {
try {
// Load existing backup registry
await this.loadBackupRegistry();
// Cleanup old backups
await this.cleanupOldBackups();
// Start auto-backup timer
this.startAutoBackup();
// Verify backup directory integrity
await this.verifyBackupDirectory();
this.emit('initialized', {
backupCount: this.backupRegistry.size,
lastBackup: this.lastBackupTime
});
} catch (error) {
this.emit('error', { operation: 'initialize', error });
throw error;
}
}
/**
* Create evolution state backup
*/
async createEvolutionStateBackup(
evolutionState: {
config: EvolutionConfig;
population: PromptCandidate[];
generation: number;
paretoFrontier: any;
metrics: any;
},
label?: string
): Promise<BackupEntry> {
return this.resilience.executeWithFullProtection(
async () => {
const backupId = this.generateBackupId();
const timestamp = new Date();
// Determine backup type
const backupType = this.determineBackupType();
// Create backup components
const components: BackupComponent[] = [];
// Evolution state component
const evolutionComponent = await this.createEvolutionStateComponent(
backupId,
evolutionState
);
components.push(evolutionComponent);
// Configuration component
const configComponent = await this.createConfigurationComponent(
backupId,
evolutionState.config
);
components.push(configComponent);
// Metrics component
const metricsComponent = await this.createMetricsComponent(
backupId,
evolutionState.metrics
);
components.push(metricsComponent);
// Calculate total size and checksum
const totalSize = components.reduce((sum, comp) => sum + comp.size, 0);
const totalChecksum = this.calculateCombinedChecksum(components);
// Create metadata
const metadata: BackupMetadata = {
systemVersion: process.env.npm_package_version || '1.0.0',
evolutionGeneration: evolutionState.generation,
activePopulationSize: evolutionState.population.length,
paretoFrontierSize: evolutionState.paretoFrontier?.size || 0,
totalTrajectories: 0, // Will be updated when trajectory data is added
configurationHash: this.hashObject(evolutionState.config),
backupReason: label || 'evolution-state-snapshot',
performanceMetrics: evolutionState.metrics?.performance || {},
warnings: []
};
// Create backup entry
const backupEntry: BackupEntry = {
id: backupId,
timestamp,
label: label || `backup-${Date.now()}`,
type: backupType,
size: totalSize,
checksum: totalChecksum,
compressed: this.config.compressionEnabled,
encrypted: this.config.encryptionEnabled,
components,
metadata,
restoreTimeEstimate: this.estimateRestoreTime(totalSize)
};
// Save backup entry
await this.saveBackupEntry(backupEntry);
// Update registry
this.backupRegistry.set(backupId, backupEntry);
this.lastBackupTime = timestamp;
// Cleanup old backups if needed
await this.cleanupOldBackups();
this.emit('backupCreated', backupEntry);
return backupEntry;
},
{
serviceName: 'state-backup',
context: {
name: 'create-evolution-backup',
priority: 'high'
}
}
);
}
/**
* Create trajectory data backup
*/
async createTrajectoryDataBackup(
trajectories: ExecutionTrajectory[],
label?: string
): Promise<BackupEntry> {
return this.resilience.executeWithFullProtection(
async () => {
const backupId = this.generateBackupId();
const timestamp = new Date();
// Create trajectory component
const trajectoryComponent = await this.createTrajectoryComponent(
backupId,
trajectories
);
const components = [trajectoryComponent];
const totalSize = trajectoryComponent.size;
const totalChecksum = trajectoryComponent.checksum;
const metadata: BackupMetadata = {
systemVersion: process.env.npm_package_version || '1.0.0',
evolutionGeneration: 0,
activePopulationSize: 0,
paretoFrontierSize: 0,
totalTrajectories: trajectories.length,
configurationHash: '',
backupReason: label || 'trajectory-data-backup',
performanceMetrics: {},
warnings: []
};
const backupEntry: BackupEntry = {
id: backupId,
timestamp,
label: label || `backup-${Date.now()}`,
type: 'full',
size: totalSize,
checksum: totalChecksum,
compressed: this.config.compressionEnabled,
encrypted: this.config.encryptionEnabled,
components,
metadata,
restoreTimeEstimate: this.estimateRestoreTime(totalSize)
};
await this.saveBackupEntry(backupEntry);
this.backupRegistry.set(backupId, backupEntry);
this.emit('backupCreated', backupEntry);
return backupEntry;
},
{
serviceName: 'state-backup',
context: {
name: 'create-trajectory-backup',
priority: 'medium'
}
}
);
}
/**
* Restore system state from backup
*/
async restoreFromBackup(
backupId: string,
options: RestoreOptions = {}
): Promise<RestoreResult> {
return this.resilience.executeWithFullProtection(
async () => {
const startTime = Date.now();
const backup = this.backupRegistry.get(backupId);
if (!backup) {
throw new Error(`Backup not found: ${backupId}`);
}
const result: RestoreResult = {
success: false,
backupId,
restoredComponents: [],
failedComponents: [],
warnings: [],
restoreTime: 0,
integrityChecks: []
};
try {
// Create pre-restore backup if requested
if (options.createPreRestoreBackup) {
const preBackup = await this.createQuickBackup('pre-restore-snapshot');
result.preRestoreBackupId = preBackup.id;
}
// Validate backup integrity
if (options.validateIntegrity !== false) {
const integrityChecks = await this.validateBackupIntegrity(backup);
result.integrityChecks = integrityChecks;
const hasFailures = integrityChecks.some(check => !check.valid);
if (hasFailures && !options.overwriteExisting) {
throw new Error('Backup integrity validation failed');
}
}
// Determine components to restore
const componentsToRestore = options.components
? backup.components.filter(comp => options.components!.includes(comp.name))
: backup.components.filter(comp => !options.skipNonCritical || comp.critical);
// Restore components
for (const component of componentsToRestore) {
try {
await this.restoreComponent(component, options);
result.restoredComponents.push(component.name);
this.emit('componentRestored', {
backupId,
component: component.name
});
} catch (error) {
result.failedComponents.push(component.name);
result.warnings.push(`Failed to restore ${component.name}: ${(error as Error).message}`);
this.emit('componentRestoreFailed', {
backupId,
component: component.name,
error
});
}
}
result.success = result.failedComponents.length === 0;
result.restoreTime = Date.now() - startTime;
this.emit('restoreCompleted', result);
return result;
} catch (error) {
result.success = false;
result.restoreTime = Date.now() - startTime;
result.warnings.push(`Restore failed: ${(error as Error).message}`);
this.emit('restoreFailed', { backupId, error });
throw error;
}
},
{
serviceName: 'state-backup',
context: {
name: 'restore-from-backup',
priority: 'critical'
}
}
);
}
/**
* List available backups
*/
getAvailableBackups(filter?: {
type?: BackupEntry['type'];
since?: Date;
label?: string;
component?: string;
}): BackupEntry[] {
let backups = Array.from(this.backupRegistry.values());
if (filter) {
if (filter.type) {
backups = backups.filter(b => b.type === filter.type);
}
if (filter.since) {
backups = backups.filter(b => b.timestamp >= filter.since!);
}
if (filter.label) {
backups = backups.filter(b => b.label?.includes(filter.label!));
}
if (filter.component) {
backups = backups.filter(b =>
b.components.some(c => c.name.includes(filter.component!))
);
}
}
return backups.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
}
/**
* Get backup details
*/
getBackupDetails(backupId: string): BackupEntry | null {
return this.backupRegistry.get(backupId) || null;
}
/**
* Delete backup
*/
async deleteBackup(backupId: string): Promise<void> {
const backup = this.backupRegistry.get(backupId);
if (!backup) {
throw new Error(`Backup not found: ${backupId}`);
}
// Delete backup files
for (const component of backup.components) {
try {
if (existsSync(component.path)) {
unlinkSync(component.path);
}
} catch (error) {
// eslint-disable-next-line no-console
console.warn(`Failed to delete backup file: ${component.path}`, error);
}
}
// Remove from registry
this.backupRegistry.delete(backupId);
// Update registry file
await this.saveBackupRegistry();
this.emit('backupDeleted', { backupId });
}
/**
* Get health status
*/
async getHealthStatus(): Promise<ComponentHealth> {
const backupCount = this.backupRegistry.size;
const lastBackupAge = this.lastBackupTime
? (Date.now() - this.lastBackupTime.getTime()) / 60000
: Infinity;
const storageUsage = Array.from(this.backupRegistry.values())
.reduce((sum, backup) => sum + backup.size, 0);
const recommendations: string[] = [];
let status: 'healthy' | 'degraded' | 'critical' = 'healthy';
if (backupCount === 0) {
status = 'critical';
recommendations.push('No backups available - create initial backup');
}
if (lastBackupAge > 120) { // 2 hours
status = status === 'critical' ? 'critical' : 'degraded';
recommendations.push('Last backup is over 2 hours old');
}
if (backupCount > this.config.maxBackups * 0.9) {
recommendations.push('Approaching maximum backup limit');
}
return {
status: status as ComponentStatus,
lastCheck: new Date(),
metrics: { backupCount, lastBackupAge, storageUsage },
errors: [],
warnings: [],
recommendations,
uptime: Date.now() - (this.lastBackupTime?.getTime() || Date.now()),
recoveryCount: 0
};
}
/**
* Private helper methods
*/
private ensureBackupDirectory(): void {
if (!existsSync(this.config.backupDirectory)) {
mkdirSync(this.config.backupDirectory, { recursive: true });
}
}
private generateBackupId(): string {
const timestamp = Date.now();
const random = Math.random().toString(36).substring(2, 15);
return `backup_${timestamp}_${random}`;
}
private determineBackupType(): 'full' | 'incremental' | 'differential' {
if (!this.config.incrementalBackups || this.backupRegistry.size === 0) {
return 'full';
}
// Simple strategy: every 10th backup is full, others are incremental
return this.backupRegistry.size % 10 === 0 ? 'full' : 'incremental';
}
private async createEvolutionStateComponent(
backupId: string,
evolutionState: any
): Promise<BackupComponent> {
const data = JSON.stringify(evolutionState, null, 2);
const compressed = this.config.compressionEnabled ? gzipSync(data) : Buffer.from(data);
const filename = `${backupId}_evolution_state.json${this.config.compressionEnabled ? '.gz' : ''}`;
const path = join(this.config.backupDirectory, filename);
writeFileSync(path, compressed);
return {
name: 'evolution-state',
type: 'evolution-state',
size: compressed.length,
checksum: createHash('sha256').update(compressed).digest('hex'),
path,
dependencies: [],
critical: true
};
}
private async createConfigurationComponent(
backupId: string,
config: any
): Promise<BackupComponent> {
const data = JSON.stringify(config, null, 2);
const compressed = this.config.compressionEnabled ? gzipSync(data) : Buffer.from(data);
const filename = `${backupId}_configuration.json${this.config.compressionEnabled ? '.gz' : ''}`;
const path = join(this.config.backupDirectory, filename);
writeFileSync(path, compressed);
return {
name: 'configuration',
type: 'configuration',
size: compressed.length,
checksum: createHash('sha256').update(compressed).digest('hex'),
path,
dependencies: [],
critical: true
};
}
private async createMetricsComponent(
backupId: string,
metrics: any
): Promise<BackupComponent> {
const data = JSON.stringify(metrics, null, 2);
const compressed = this.config.compressionEnabled ? gzipSync(data) : Buffer.from(data);
const filename = `${backupId}_metrics.json${this.config.compressionEnabled ? '.gz' : ''}`;
const path = join(this.config.backupDirectory, filename);
writeFileSync(path, compressed);
return {
name: 'metrics',
type: 'metrics',
size: compressed.length,
checksum: createHash('sha256').update(compressed).digest('hex'),
path,
dependencies: [],
critical: false
};
}
private async createTrajectoryComponent(
backupId: string,
trajectories: ExecutionTrajectory[]
): Promise<BackupComponent> {
const data = JSON.stringify(trajectories, null, 2);
const compressed = this.config.compressionEnabled ? gzipSync(data) : Buffer.from(data);
const filename = `${backupId}_trajectories.json${this.config.compressionEnabled ? '.gz' : ''}`;
const path = join(this.config.backupDirectory, filename);
writeFileSync(path, compressed);
return {
name: 'trajectory-data',
type: 'trajectory-data',
size: compressed.length,
checksum: createHash('sha256').update(compressed).digest('hex'),
path,
dependencies: [],
critical: true
};
}
private calculateCombinedChecksum(components: BackupComponent[]): string {
const combinedChecksums = components.map(c => c.checksum).join('');
return createHash('sha256').update(combinedChecksums).digest('hex');
}
private hashObject(obj: any): string {
const str = JSON.stringify(obj, Object.keys(obj).sort());
return createHash('sha256').update(str).digest('hex');
}
private estimateRestoreTime(sizeBytes: number): number {
// Estimate based on size: 10MB/second processing rate
const processingRate = 10 * 1024 * 1024; // 10 MB/s
return Math.ceil(sizeBytes / processingRate);
}
private async saveBackupEntry(backup: BackupEntry): Promise<void> {
const backupFile = join(this.config.backupDirectory, `${backup.id}.meta.json`);
const data = JSON.stringify(backup, null, 2);
writeFileSync(backupFile, data);
}
private async loadBackupRegistry(): Promise<void> {
if (!existsSync(this.config.backupDirectory)) {
return;
}
const files = readdirSync(this.config.backupDirectory);
const metaFiles = files.filter(f => f.endsWith('.meta.json'));
for (const metaFile of metaFiles) {
try {
const path = join(this.config.backupDirectory, metaFile);
const data = readFileSync(path, 'utf8');
const backup: BackupEntry = JSON.parse(data);
// Validate backup entry
if (this.validateBackupEntry(backup)) {
this.backupRegistry.set(backup.id, backup);
if (!this.lastBackupTime || backup.timestamp > this.lastBackupTime) {
this.lastBackupTime = backup.timestamp;
}
}
} catch (error) {
// eslint-disable-next-line no-console
console.warn(`Failed to load backup metadata: ${metaFile}`, error);
}
}
}
private validateBackupEntry(backup: BackupEntry): boolean {
return !!(
backup.id &&
backup.timestamp &&
backup.type &&
backup.components &&
Array.isArray(backup.components)
);
}
private async saveBackupRegistry(): Promise<void> {
const registryFile = join(this.config.backupDirectory, 'registry.json');
const data = JSON.stringify(Array.from(this.backupRegistry.entries()), null, 2);
writeFileSync(registryFile, data);
}
private async validateBackupIntegrity(backup: BackupEntry): Promise<IntegrityCheckResult[]> {
const results: IntegrityCheckResult[] = [];
for (const component of backup.components) {
const result: IntegrityCheckResult = {
component: component.name,
valid: true,
checksumMatch: false,
sizeMatch: false,
dependenciesValid: true,
errors: []
};
try {
// Check if file exists
if (!existsSync(component.path)) {
result.valid = false;
result.errors.push('File not found');
results.push(result);
continue;
}
// Check file size
const stats = statSync(component.path);
result.sizeMatch = stats.size === component.size;
if (!result.sizeMatch) {
result.errors.push(`Size mismatch: expected ${component.size}, got ${stats.size}`);
}
// Check checksum
if (this.config.checksumValidation) {
const fileData = readFileSync(component.path);
const actualChecksum = createHash('sha256').update(fileData).digest('hex');
result.checksumMatch = actualChecksum === component.checksum;
if (!result.checksumMatch) {
result.errors.push('Checksum mismatch');
}
} else {
result.checksumMatch = true;
}
result.valid = result.sizeMatch && result.checksumMatch && result.dependenciesValid;
} catch (error) {
result.valid = false;
result.errors.push(`Validation error: ${(error as Error).message}`);
}
results.push(result);
}
return results;
}
private async restoreComponent(
component: BackupComponent,
_options: RestoreOptions
): Promise<void> {
if (!existsSync(component.path)) {
throw new Error(`Backup file not found: ${component.path}`);
}
// Read and decompress component data
const fileData = readFileSync(component.path);
const data = this.config.compressionEnabled && component.path.endsWith('.gz')
? gunzipSync(fileData)
: fileData;
// Validate checksum if enabled
if (this.config.checksumValidation) {
const actualChecksum = createHash('sha256').update(fileData).digest('hex');
if (actualChecksum !== component.checksum) {
throw new Error(`Checksum validation failed for ${component.name}`);
}
}
// Parse and apply component data based on type
const parsedData = JSON.parse(data.toString());
switch (component.type) {
case 'evolution-state':
await this.restoreEvolutionState(parsedData);
break;
case 'trajectory-data':
await this.restoreTrajectoryData(parsedData);
break;
case 'configuration':
await this.restoreConfiguration(parsedData);
break;
case 'metrics':
await this.restoreMetrics(parsedData);
break;
default:
// eslint-disable-next-line no-console
console.warn(`Unknown component type: ${component.type}`);
}
}
private async restoreEvolutionState(data: any): Promise<void> {
// Implementation would restore evolution state to the evolution engine
// This is a placeholder for the actual restoration logic
this.emit('evolutionStateRestored', data);
}
private async restoreTrajectoryData(data: ExecutionTrajectory[]): Promise<void> {
// Implementation would restore trajectory data to the trajectory store
// This is a placeholder for the actual restoration logic
this.emit('trajectoryDataRestored', { count: data.length });
}
private async restoreConfiguration(data: any): Promise<void> {
// Implementation would restore configuration
// This is a placeholder for the actual restoration logic
this.emit('configurationRestored', data);
}
private async restoreMetrics(data: any): Promise<void> {
// Implementation would restore metrics
// This is a placeholder for the actual restoration logic
this.emit('metricsRestored', data);
}
private async cleanupOldBackups(): Promise<void> {
const backups = Array.from(this.backupRegistry.values())
.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
// Remove backups exceeding max count
const excessBackups = backups.slice(this.config.maxBackups);
for (const backup of excessBackups) {
await this.deleteBackup(backup.id);
}
// Remove backups exceeding retention period
const cutoffDate = new Date(Date.now() - this.config.backupRetentionDays * 24 * 60 * 60 * 1000);
const expiredBackups = backups.filter(b => b.timestamp < cutoffDate);
for (const backup of expiredBackups) {
await this.deleteBackup(backup.id);
}
}
private async verifyBackupDirectory(): Promise<void> {
try {
// Test write permissions
const testFile = join(this.config.backupDirectory, '.test_write');
writeFileSync(testFile, 'test');
unlinkSync(testFile);
} catch (error) {
throw new Error(`Backup directory not writable: ${this.config.backupDirectory}`);
}
}
private startAutoBackup(): void {
if (this.config.autoBackupInterval > 0) {
this.autoBackupTimer = setInterval(() => {
this.emit('autoBackupTrigger');
}, this.config.autoBackupInterval);
}
}
private async createQuickBackup(label: string): Promise<BackupEntry> {
// Implementation for quick backup
// This would create a minimal backup with current system state
const mockEvolutionState = {
config: {
taskDescription: 'Mock evolution state',
populationSize: 10,
maxGenerations: 50,
mutationRate: 0.1
},
population: [],
generation: 0,
paretoFrontier: null,
metrics: {}
};
return this.createEvolutionStateBackup(mockEvolutionState, label);
}
/**
* Cleanup resources
*/
async cleanup(): Promise<void> {
if (this.autoBackupTimer) {
clearInterval(this.autoBackupTimer);
this.autoBackupTimer = undefined as any;
}
await this.saveBackupRegistry();
this.removeAllListeners();
}
}