health-checker.ts•10.2 kB
/**
* Health check system for monitoring system status
*/
import { logger } from '../logging/logger.js';
import { metricsCollector } from './metrics-collector.js';
export type HealthStatus = 'healthy' | 'degraded' | 'unhealthy';
export interface HealthCheck {
name: string;
status: HealthStatus;
message: string;
lastChecked: string;
duration: number;
details?: Record<string, any>;
}
export interface SystemHealth {
status: HealthStatus;
timestamp: string;
uptime: number;
version: string;
checks: HealthCheck[];
summary: {
healthy: number;
degraded: number;
unhealthy: number;
};
}
export type HealthCheckFunction = () => Promise<{
status: HealthStatus;
message: string;
details?: Record<string, any>;
}>;
export class HealthChecker {
private checks = new Map<string, HealthCheckFunction>();
private lastResults = new Map<string, HealthCheck>();
private checkInterval: NodeJS.Timeout | null = null;
private isRunning = false;
constructor() {
this.registerDefaultChecks();
}
/**
* Register a health check
*/
public registerCheck(name: string, checkFn: HealthCheckFunction): void {
this.checks.set(name, checkFn);
logger.debug(`Health check registered: ${name}`);
}
/**
* Remove a health check
*/
public unregisterCheck(name: string): void {
this.checks.delete(name);
this.lastResults.delete(name);
logger.debug(`Health check unregistered: ${name}`);
}
/**
* Run all health checks
*/
public async runChecks(): Promise<SystemHealth> {
const startTime = Date.now();
const results: HealthCheck[] = [];
logger.debug('Running health checks', { checkCount: this.checks.size });
// Run all checks in parallel
const checkPromises = Array.from(this.checks.entries()).map(async ([name, checkFn]) => {
const checkStart = Date.now();
try {
const result = await Promise.race([
checkFn(),
new Promise<{ status: HealthStatus; message: string }>((_, reject) =>
setTimeout(() => reject(new Error('Health check timeout')), 5000)
)
]);
const check: HealthCheck = {
name,
status: result.status,
message: result.message,
lastChecked: new Date().toISOString(),
duration: Date.now() - checkStart,
...('details' in result && { details: result.details })
};
this.lastResults.set(name, check);
return check;
} catch (error) {
const check: HealthCheck = {
name,
status: 'unhealthy',
message: error instanceof Error ? error.message : 'Unknown error',
lastChecked: new Date().toISOString(),
duration: Date.now() - checkStart
};
this.lastResults.set(name, check);
logger.error(`Health check failed: ${name}`, error as Error);
return check;
}
});
const checkResults = await Promise.all(checkPromises);
results.push(...checkResults);
// Calculate overall status
const summary = {
healthy: results.filter(r => r.status === 'healthy').length,
degraded: results.filter(r => r.status === 'degraded').length,
unhealthy: results.filter(r => r.status === 'unhealthy').length
};
let overallStatus: HealthStatus = 'healthy';
if (summary.unhealthy > 0) {
overallStatus = 'unhealthy';
} else if (summary.degraded > 0) {
overallStatus = 'degraded';
}
const systemHealth: SystemHealth = {
status: overallStatus,
timestamp: new Date().toISOString(),
uptime: Date.now() - startTime,
version: process.env.npm_package_version || '1.0.0',
checks: results,
summary
};
// Log health check results
logger.info('Health check completed', {
status: overallStatus,
duration: Date.now() - startTime,
summary
});
return systemHealth;
}
/**
* Get last health check results
*/
public getLastResults(): SystemHealth | null {
if (this.lastResults.size === 0) {
return null;
}
const results = Array.from(this.lastResults.values());
const summary = {
healthy: results.filter(r => r.status === 'healthy').length,
degraded: results.filter(r => r.status === 'degraded').length,
unhealthy: results.filter(r => r.status === 'unhealthy').length
};
let overallStatus: HealthStatus = 'healthy';
if (summary.unhealthy > 0) {
overallStatus = 'unhealthy';
} else if (summary.degraded > 0) {
overallStatus = 'degraded';
}
return {
status: overallStatus,
timestamp: new Date().toISOString(),
uptime: process.uptime() * 1000,
version: process.env.npm_package_version || '1.0.0',
checks: results,
summary
};
}
/**
* Start periodic health checks
*/
public startPeriodicChecks(intervalMs: number = 60000): void {
if (this.isRunning) {
logger.warn('Health checks already running');
return;
}
this.isRunning = true;
this.checkInterval = setInterval(async () => {
try {
await this.runChecks();
} catch (error) {
logger.error('Periodic health check failed', error as Error);
}
}, intervalMs);
logger.info('Periodic health checks started', { intervalMs });
}
/**
* Stop periodic health checks
*/
public stopPeriodicChecks(): void {
if (this.checkInterval) {
clearInterval(this.checkInterval);
this.checkInterval = null;
}
this.isRunning = false;
logger.info('Periodic health checks stopped');
}
/**
* Register default health checks
*/
private registerDefaultChecks(): void {
// Memory usage check
this.registerCheck('memory', async () => {
const metrics = metricsCollector.getSystemMetrics();
const memoryPercentage = metrics.memory.percentage;
if (memoryPercentage > 90) {
return {
status: 'unhealthy',
message: `Memory usage critical: ${memoryPercentage.toFixed(2)}%`,
details: { memoryPercentage, memoryUsed: metrics.memory.used }
};
} else if (memoryPercentage > 75) {
return {
status: 'degraded',
message: `Memory usage high: ${memoryPercentage.toFixed(2)}%`,
details: { memoryPercentage, memoryUsed: metrics.memory.used }
};
}
return {
status: 'healthy',
message: `Memory usage normal: ${memoryPercentage.toFixed(2)}%`,
details: { memoryPercentage, memoryUsed: metrics.memory.used }
};
});
// CPU usage check
this.registerCheck('cpu', async () => {
const metrics = metricsCollector.getSystemMetrics();
const cpuUsage = metrics.cpu.usage;
if (cpuUsage > 90) {
return {
status: 'unhealthy',
message: `CPU usage critical: ${cpuUsage.toFixed(2)}%`,
details: { cpuUsage, loadAverage: metrics.cpu.loadAverage }
};
} else if (cpuUsage > 75) {
return {
status: 'degraded',
message: `CPU usage high: ${cpuUsage.toFixed(2)}%`,
details: { cpuUsage, loadAverage: metrics.cpu.loadAverage }
};
}
return {
status: 'healthy',
message: `CPU usage normal: ${cpuUsage.toFixed(2)}%`,
details: { cpuUsage, loadAverage: metrics.cpu.loadAverage }
};
});
// Request error rate check
this.registerCheck('requests', async () => {
const metrics = metricsCollector.getSystemMetrics();
const errorRate = metrics.requests.errorRate;
if (errorRate > 10) {
return {
status: 'unhealthy',
message: `Request error rate critical: ${errorRate.toFixed(2)}%`,
details: {
errorRate,
totalRequests: metrics.requests.total,
activeRequests: metrics.requests.active
}
};
} else if (errorRate > 5) {
return {
status: 'degraded',
message: `Request error rate elevated: ${errorRate.toFixed(2)}%`,
details: {
errorRate,
totalRequests: metrics.requests.total,
activeRequests: metrics.requests.active
}
};
}
return {
status: 'healthy',
message: `Request error rate normal: ${errorRate.toFixed(2)}%`,
details: {
errorRate,
totalRequests: metrics.requests.total,
activeRequests: metrics.requests.active
}
};
});
// Cache performance check
this.registerCheck('cache', async () => {
const metrics = metricsCollector.getSystemMetrics();
const hitRate = metrics.cache.hitRate;
if (hitRate < 30 && metrics.cache.entries > 0) {
return {
status: 'degraded',
message: `Cache hit rate low: ${hitRate.toFixed(2)}%`,
details: {
hitRate,
entries: metrics.cache.entries,
size: metrics.cache.size
}
};
}
return {
status: 'healthy',
message: `Cache performance good: ${hitRate.toFixed(2)}% hit rate`,
details: {
hitRate,
entries: metrics.cache.entries,
size: metrics.cache.size
}
};
});
// Disk space check (if applicable)
this.registerCheck('disk', async () => {
try {
const fs = require('fs');
fs.statSync(process.cwd()); // Just check if we can access the directory
// This is a basic check - in production you'd want to check actual disk usage
return {
status: 'healthy',
message: 'Disk access working',
details: { workingDirectory: process.cwd() }
};
} catch (error) {
return {
status: 'unhealthy',
message: 'Disk access failed',
details: { error: error instanceof Error ? error.message : 'Unknown error' }
};
}
});
}
}
// Export singleton instance
export const healthChecker = new HealthChecker();