/**
* Health check endpoints for Kubernetes and load balancers
*
* Implements standard health probe patterns:
* - /health - Overall health status
* - /health/live - Liveness probe (is the process running?)
* - /health/ready - Readiness probe (can it accept traffic?)
*/
import { Express, Request, Response } from 'express';
import { createLogger } from './logger.js';
import { getConfig } from '../config/index.js';
const logger = createLogger('health');
/**
* Health check result
*/
export interface HealthCheckResult {
status: 'healthy' | 'degraded' | 'unhealthy';
message?: string;
durationMs?: number;
}
/**
* Health check function type
*/
export type HealthCheck = () => Promise<HealthCheckResult>;
/**
* Overall health response
*/
export interface HealthResponse {
status: 'healthy' | 'degraded' | 'unhealthy';
version: string;
uptime: number;
timestamp: string;
checks: Record<string, HealthCheckResult>;
}
/**
* Health check registry
*/
class HealthCheckRegistry {
private readonly checks = new Map<string, HealthCheck>();
private startTime = Date.now();
/**
* Register a health check
*/
register(name: string, check: HealthCheck): void {
this.checks.set(name, check);
logger.debug('Registered health check', { name });
}
/**
* Unregister a health check
*/
unregister(name: string): void {
this.checks.delete(name);
}
/**
* Run all health checks
*/
async runAll(): Promise<HealthResponse> {
const config = getConfig();
const results: Record<string, HealthCheckResult> = {};
let overallStatus: 'healthy' | 'degraded' | 'unhealthy' = 'healthy';
for (const [name, check] of this.checks) {
const startTime = Date.now();
try {
const result = await check();
results[name] = {
...result,
durationMs: Date.now() - startTime,
};
// Update overall status
if (result.status === 'unhealthy') {
overallStatus = 'unhealthy';
} else if (result.status === 'degraded' && overallStatus !== 'unhealthy') {
overallStatus = 'degraded';
}
} catch (error) {
results[name] = {
status: 'unhealthy',
message: error instanceof Error ? error.message : 'Check failed',
durationMs: Date.now() - startTime,
};
overallStatus = 'unhealthy';
}
}
return {
status: overallStatus,
version: config.serverVersion,
uptime: Math.floor((Date.now() - this.startTime) / 1000),
timestamp: new Date().toISOString(),
checks: results,
};
}
/**
* Check if the service is live (process running, not deadlocked)
*/
isLive(): boolean {
// Basic liveness: can we execute code?
// In more complex scenarios, check for deadlocks, event loop delays, etc.
return true;
}
/**
* Check if the service is ready to accept traffic
*/
async isReady(): Promise<boolean> {
const response = await this.runAll();
return response.status !== 'unhealthy';
}
/**
* Reset start time (for testing)
*/
resetStartTime(): void {
this.startTime = Date.now();
}
/**
* Clear all checks (for testing)
*/
clear(): void {
this.checks.clear();
}
}
// Global registry
const registry = new HealthCheckRegistry();
/**
* Get the health check registry
*/
export function getHealthRegistry(): HealthCheckRegistry {
return registry;
}
/**
* Register a health check
*/
export function registerHealthCheck(name: string, check: HealthCheck): void {
registry.register(name, check);
}
/**
* Mount health endpoints on Express app
*/
export function mountHealthEndpoints(app: Express): void {
const config = getConfig();
// Overall health - runs all checks
app.get('/health', async (_req: Request, res: Response) => {
try {
const health = await registry.runAll();
const statusCode = health.status === 'unhealthy' ? 503 : 200;
res.status(statusCode).json(health);
} catch (error) {
logger.error('Health check error', { error: String(error) });
res.status(503).json({
status: 'unhealthy',
version: config.serverVersion,
timestamp: new Date().toISOString(),
error: 'Health check failed',
});
}
});
// Liveness probe - is the process alive?
// Should return 200 unless process is deadlocked
app.get('/health/live', (_req: Request, res: Response) => {
if (registry.isLive()) {
res.status(200).json({
status: 'alive',
timestamp: new Date().toISOString(),
});
} else {
res.status(503).json({
status: 'dead',
timestamp: new Date().toISOString(),
});
}
});
// Readiness probe - can we accept traffic?
// Returns 503 if any critical check fails
app.get('/health/ready', async (_req: Request, res: Response) => {
try {
const isReady = await registry.isReady();
if (isReady) {
res.status(200).json({
status: 'ready',
timestamp: new Date().toISOString(),
});
} else {
res.status(503).json({
status: 'not_ready',
timestamp: new Date().toISOString(),
});
}
} catch (error) {
logger.error('Readiness check error', { error: String(error) });
res.status(503).json({
status: 'not_ready',
timestamp: new Date().toISOString(),
error: 'Readiness check failed',
});
}
});
logger.debug('Health endpoints mounted', {
endpoints: ['/health', '/health/live', '/health/ready'],
});
}
/**
* Common health checks
*/
export const CommonHealthChecks = {
/**
* Memory usage check
*/
memory: (thresholdMb: number = 512): HealthCheck => async () => {
const used = process.memoryUsage().heapUsed / 1024 / 1024;
if (used > thresholdMb) {
return {
status: 'degraded',
message: `High memory usage: ${used.toFixed(2)}MB`,
};
}
return { status: 'healthy' };
},
/**
* Event loop lag check
*/
eventLoop: (thresholdMs: number = 100): HealthCheck => async () => {
const start = Date.now();
await new Promise((resolve) => setImmediate(resolve));
const lag = Date.now() - start;
if (lag > thresholdMs) {
return {
status: 'degraded',
message: `High event loop lag: ${lag}ms`,
};
}
return { status: 'healthy' };
},
/**
* Database connectivity check
*/
database: (checkFn: () => Promise<boolean>): HealthCheck => async () => {
try {
const connected = await checkFn();
if (connected) {
return { status: 'healthy' };
}
return {
status: 'unhealthy',
message: 'Database connection failed',
};
} catch (error) {
return {
status: 'unhealthy',
message: error instanceof Error ? error.message : 'Database check failed',
};
}
},
/**
* External API connectivity check
*/
externalApi: (name: string, checkFn: () => Promise<boolean>): HealthCheck => async () => {
try {
const connected = await checkFn();
if (connected) {
return { status: 'healthy' };
}
return {
status: 'degraded',
message: `${name} API unavailable`,
};
} catch (error) {
return {
status: 'degraded',
message: error instanceof Error ? error.message : `${name} check failed`,
};
}
},
};