/**
* 健康检查服务实现
*/
import { logger } from '@/utils/logger';
import { IHealthService, HealthStatusResponse, HealthMetricsResponse } from '@/types';
import { SerialEngine } from '@/core/SerialEngine';
/**
* 健康检查配置
*/
interface HealthServiceConfig {
checkInterval: number;
metricsRetentionTime: number;
alertThresholds: {
errorRate: number;
responseTime: number;
memoryUsage: number;
};
}
/**
* 系统指标
*/
interface SystemMetrics {
timestamp: string;
uptime: number;
memoryUsage: NodeJS.MemoryUsage;
cpuUsage: NodeJS.CpuUsage;
activePorts: number;
activeSessions: number;
totalRequests: number;
errorCount: number;
averageResponseTime: number;
}
/**
* 健康检查结果
*/
interface HealthCheckResult {
status: 'healthy' | 'unhealthy' | 'degraded';
checks: HealthCheck[];
timestamp: string;
duration: number;
}
/**
* 单项健康检查
*/
interface HealthCheck {
name: string;
status: 'pass' | 'fail' | 'warn';
message: string;
duration: number;
metrics?: any;
}
/**
* 性能指标历史
*/
interface MetricsHistory {
timestamps: string[];
errorRates: number[];
responseTimes: number[];
memoryUsages: number[];
portCounts: number[];
}
/**
* 健康检查服务实现类
*/
export class HealthService implements IHealthService {
private serialEngine: SerialEngine;
private config: HealthServiceConfig;
private metricsHistory: MetricsHistory;
private isMonitoring: boolean = false;
private monitoringInterval?: NodeJS.Timeout;
private lastHealthCheck?: HealthCheckResult;
private systemStartTime: number;
constructor(
serialEngine: SerialEngine,
config?: Partial<HealthServiceConfig>
) {
this.serialEngine = serialEngine;
this.systemStartTime = Date.now();
// 设置默认配置
this.config = {
checkInterval: 30000, // 30秒
metricsRetentionTime: 24 * 60 * 60 * 1000, // 24小时
alertThresholds: {
errorRate: 0.1, // 10%
responseTime: 1000, // 1秒
memoryUsage: 0.8 // 80%
},
...config
};
// 初始化指标历史
this.metricsHistory = {
timestamps: [],
errorRates: [],
responseTimes: [],
memoryUsages: [],
portCounts: []
};
logger.debug('HealthService initialized', this.config);
}
/**
* 获取健康状态
*/
async getStatus(): Promise<HealthStatusResponse> {
try {
const startTime = Date.now();
// 执行各项健康检查
const checks = await this.performHealthChecks();
// 确定整体状态
const status = this.determineOverallStatus(checks);
const result: HealthCheckResult = {
status,
checks,
timestamp: new Date().toISOString(),
duration: Date.now() - startTime
};
this.lastHealthCheck = result;
return {
status: 'ok',
uptime: process.uptime(),
version: this.getVersion(),
active_ports: this.getActivePortCount(),
active_sessions: this.getActiveSessionCount(),
total_requests: this.getTotalRequests(),
error_count: this.getTotalErrors(),
memory_usage: {
rss: process.memoryUsage().rss,
heapTotal: process.memoryUsage().heapTotal,
heapUsed: process.memoryUsage().heapUsed,
external: process.memoryUsage().external
}
};
} catch (error) {
logger.error('Failed to get health status', error as Error);
return {
status: 'ok',
uptime: process.uptime(),
version: this.getVersion(),
active_ports: 0,
active_sessions: 0,
total_requests: 0,
error_count: 1,
memory_usage: process.memoryUsage()
};
}
}
/**
* 获取性能指标
*/
async getMetrics(): Promise<HealthMetricsResponse> {
try {
const currentMetrics = this.collectCurrentMetrics();
// 计算统计指标
const responseTimeStats = this.calculateResponseTimeStats();
const throughputStats = this.calculateThroughputStats();
const errorStats = this.calculateErrorStats();
const portStats = this.calculatePortStats();
return {
status: 'ok',
metrics: {
response_time: responseTimeStats,
throughput: throughputStats,
errors: errorStats,
ports: portStats
}
};
} catch (error) {
logger.error('Failed to get metrics', error as Error);
return {
status: 'ok',
metrics: {
response_time: { avg: 0, min: 0, max: 0, p95: 0 },
throughput: { requests_per_second: 0, bytes_per_second: 0 },
errors: { count: 0, rate: 0, by_type: {} },
ports: { total: 0, active: 0, errors: 0 }
}
};
}
}
/**
* 启动监控
*/
startMonitoring(): void {
if (this.isMonitoring) {
logger.warn('Health monitoring is already running');
return;
}
logger.info('Starting health monitoring');
this.isMonitoring = true;
this.monitoringInterval = setInterval(() => {
this.performMonitoringCycle();
}, this.config.checkInterval);
// 立即执行一次监控
this.performMonitoringCycle();
}
/**
* 停止监控
*/
stopMonitoring(): void {
if (!this.isMonitoring) {
logger.warn('Health monitoring is not running');
return;
}
logger.info('Stopping health monitoring');
this.isMonitoring = false;
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
this.monitoringInterval = undefined;
}
}
/**
* 执行健康检查
*/
private async performHealthChecks(): Promise<HealthCheck[]> {
const checks: HealthCheck[] = [];
// 1. 内存使用检查
checks.push(await this.checkMemoryUsage());
// 2. 错误率检查
checks.push(await this.checkErrorRate());
// 3. 响应时间检查
checks.push(await this.checkResponseTime());
// 4. 串口连接检查
checks.push(await this.checkSerialPorts());
// 5. 磁盘空间检查
checks.push(await this.checkDiskSpace());
// 6. 系统负载检查
checks.push(await this.checkSystemLoad());
return checks;
}
/**
* 检查内存使用
*/
private async checkMemoryUsage(): Promise<HealthCheck> {
const startTime = Date.now();
const memUsage = process.memoryUsage();
const totalMem = require('os').totalmem();
const usedMem = memUsage.heapUsed + memUsage.external;
const memoryUsagePercent = usedMem / totalMem;
let status: 'pass' | 'fail' | 'warn' = 'pass';
let message = 'Memory usage is normal';
if (memoryUsagePercent > this.config.alertThresholds.memoryUsage) {
status = 'fail';
message = `Memory usage is critical: ${(memoryUsagePercent * 100).toFixed(2)}%`;
} else if (memoryUsagePercent > this.config.alertThresholds.memoryUsage * 0.8) {
status = 'warn';
message = `Memory usage is high: ${(memoryUsagePercent * 100).toFixed(2)}%`;
}
return {
name: 'memory_usage',
status,
message,
duration: Date.now() - startTime,
metrics: {
rss: memUsage.rss,
heapUsed: memUsage.heapUsed,
heapTotal: memUsage.heapTotal,
external: memUsage.external,
usagePercent: memoryUsagePercent * 100
}
};
}
/**
* 检查错误率
*/
private async checkErrorRate(): Promise<HealthCheck> {
const startTime = Date.now();
const engineStats = this.serialEngine.getEngineStats();
const totalRequests = this.getTotalRequests();
const errorCount = this.getTotalErrors();
const errorRate = totalRequests > 0 ? errorCount / totalRequests : 0;
let status: 'pass' | 'fail' | 'warn' = 'pass';
let message = 'Error rate is normal';
if (errorRate > this.config.alertThresholds.errorRate) {
status = 'fail';
message = `Error rate is critical: ${(errorRate * 100).toFixed(2)}%`;
} else if (errorRate > this.config.alertThresholds.errorRate * 0.5) {
status = 'warn';
message = `Error rate is elevated: ${(errorRate * 100).toFixed(2)}%`;
}
return {
name: 'error_rate',
status,
message,
duration: Date.now() - startTime,
metrics: {
errorRate: errorRate * 100,
totalRequests,
errorCount
}
};
}
/**
* 检查响应时间
*/
private async checkResponseTime(): Promise<HealthCheck> {
const startTime = Date.now();
const avgResponseTime = this.getAverageResponseTime();
let status: 'pass' | 'fail' | 'warn' = 'pass';
let message = 'Response time is normal';
if (avgResponseTime > this.config.alertThresholds.responseTime) {
status = 'fail';
message = `Response time is critical: ${avgResponseTime}ms`;
} else if (avgResponseTime > this.config.alertThresholds.responseTime * 0.5) {
status = 'warn';
message = `Response time is elevated: ${avgResponseTime}ms`;
}
return {
name: 'response_time',
status,
message,
duration: Date.now() - startTime,
metrics: {
averageResponseTime: avgResponseTime
}
};
}
/**
* 检查串口连接
*/
private async checkSerialPorts(): Promise<HealthCheck> {
const startTime = Date.now();
const engineStats = this.serialEngine.getEngineStats();
const activePorts = engineStats.activePorts;
const totalPorts = engineStats.totalPorts;
let status: 'pass' | 'fail' | 'warn' = 'pass';
let message = `${activePorts}/${totalPorts} ports are active`;
// 检查是否有端口错误
const portsWithErrors = Object.entries(engineStats.portStats)
.filter(([_, stats]: [string, any]) => stats.errorCount > 0);
if (portsWithErrors.length > 0) {
status = 'warn';
message = `${portsWithErrors.length} ports have errors`;
}
return {
name: 'serial_ports',
status,
message,
duration: Date.now() - startTime,
metrics: {
activePorts,
totalPorts,
portsWithErrors: portsWithErrors.length
}
};
}
/**
* 检查磁盘空间
*/
private async checkDiskSpace(): Promise<HealthCheck> {
const startTime = Date.now();
try {
const stats = await this.getDiskStats();
const usagePercent = stats.used / stats.total;
let status: 'pass' | 'fail' | 'warn' = 'pass';
let message = `Disk usage: ${(usagePercent * 100).toFixed(2)}%`;
if (usagePercent > 0.9) {
status = 'fail';
message = `Disk usage is critical: ${(usagePercent * 100).toFixed(2)}%`;
} else if (usagePercent > 0.8) {
status = 'warn';
message = `Disk usage is high: ${(usagePercent * 100).toFixed(2)}%`;
}
return {
name: 'disk_space',
status,
message,
duration: Date.now() - startTime,
metrics: stats
};
} catch (error) {
return {
name: 'disk_space',
status: 'warn',
message: 'Unable to check disk space',
duration: Date.now() - startTime
// error: error.message - removed as it's not part of HealthCheck interface
};
}
}
/**
* 检查系统负载
*/
private async checkSystemLoad(): Promise<HealthCheck> {
const startTime = Date.now();
try {
const loadAvg = require('os').loadavg();
const cpuCount = require('os').cpus().length;
const load1Min = loadAvg[0] / cpuCount;
let status: 'pass' | 'fail' | 'warn' = 'pass';
let message = `System load: ${loadAvg[0].toFixed(2)} (${cpuCount} CPUs)`;
if (load1Min > 2.0) {
status = 'fail';
message = `System load is critical: ${loadAvg[0].toFixed(2)}`;
} else if (load1Min > 1.0) {
status = 'warn';
message = `System load is high: ${loadAvg[0].toFixed(2)}`;
}
return {
name: 'system_load',
status,
message,
duration: Date.now() - startTime,
metrics: {
loadAvg: loadAvg,
cpuCount,
loadPerCpu: load1Min
}
};
} catch (error) {
return {
name: 'system_load',
status: 'warn',
message: 'Unable to check system load',
duration: Date.now() - startTime
// error: error.message - removed as it's not part of HealthCheck interface
};
}
}
/**
* 确定整体状态
*/
private determineOverallStatus(checks: HealthCheck[]): 'healthy' | 'unhealthy' | 'degraded' {
const failedChecks = checks.filter(c => c.status === 'fail');
const warnChecks = checks.filter(c => c.status === 'warn');
if (failedChecks.length > 0) {
return 'unhealthy';
} else if (warnChecks.length > 0) {
return 'degraded';
} else {
return 'healthy';
}
}
/**
* 执行监控周期
*/
private async performMonitoringCycle(): Promise<void> {
try {
const metrics = this.collectCurrentMetrics();
this.updateMetricsHistory(metrics);
this.cleanupOldMetrics();
// 检查告警条件
await this.checkAlerts(metrics);
} catch (error) {
logger.error('Health monitoring cycle failed', error as Error);
}
}
/**
* 收集当前指标
*/
private collectCurrentMetrics(): SystemMetrics {
const engineStats = this.serialEngine.getEngineStats();
return {
timestamp: new Date().toISOString(),
uptime: process.uptime(),
memoryUsage: process.memoryUsage(),
cpuUsage: process.cpuUsage(),
activePorts: engineStats.activePorts,
activeSessions: engineStats.components.portSession.activeSessions,
totalRequests: this.getTotalRequests(),
errorCount: this.getTotalErrors(),
averageResponseTime: this.getAverageResponseTime()
};
}
/**
* 更新指标历史
*/
private updateMetricsHistory(metrics: SystemMetrics): void {
const now = new Date().toISOString();
this.metricsHistory.timestamps.push(now);
this.metricsHistory.errorRates.push(
metrics.totalRequests > 0 ? metrics.errorCount / metrics.totalRequests : 0
);
this.metricsHistory.responseTimes.push(metrics.averageResponseTime);
this.metricsHistory.memoryUsages.push(
metrics.memoryUsage.heapUsed / metrics.memoryUsage.heapTotal
);
this.metricsHistory.portCounts.push(metrics.activePorts);
}
/**
* 清理旧指标
*/
private cleanupOldMetrics(): void {
const cutoffTime = Date.now() - this.config.metricsRetentionTime;
let i = 0;
while (i < this.metricsHistory.timestamps.length) {
const timestamp = new Date(this.metricsHistory.timestamps[i]).getTime();
if (timestamp < cutoffTime) {
// 移除旧数据
this.metricsHistory.timestamps.splice(i, 1);
this.metricsHistory.errorRates.splice(i, 1);
this.metricsHistory.responseTimes.splice(i, 1);
this.metricsHistory.memoryUsages.splice(i, 1);
this.metricsHistory.portCounts.splice(i, 1);
} else {
i++;
}
}
}
/**
* 检查告警
*/
private async checkAlerts(metrics: SystemMetrics): Promise<void> {
const alerts: string[] = [];
// 检查内存告警
const memoryUsage = metrics.memoryUsage.heapUsed / metrics.memoryUsage.heapTotal;
if (memoryUsage > this.config.alertThresholds.memoryUsage) {
alerts.push(`High memory usage: ${(memoryUsage * 100).toFixed(2)}%`);
}
// 检查错误率告警
const errorRate = metrics.totalRequests > 0 ? metrics.errorCount / metrics.totalRequests : 0;
if (errorRate > this.config.alertThresholds.errorRate) {
alerts.push(`High error rate: ${(errorRate * 100).toFixed(2)}%`);
}
// 发送告警
for (const alert of alerts) {
logger.warn('Health alert', { alert, metrics });
// TODO: 发送到告警系统
}
}
/**
* 获取磁盘统计
*/
private async getDiskStats(): Promise<any> {
try {
// diskusage module is not available, return mock data
return { total: 1000000000, used: 0, free: 1000000000 };
} catch {
// 如果diskusage不可用,使用备选方法
return { total: 1000000000, used: 0, free: 1000000000 };
}
}
/**
* 计算响应时间统计
*/
private calculateResponseTimeStats(): any {
const responseTimes = this.metricsHistory.responseTimes.slice(-100); // 最近100个数据点
if (responseTimes.length === 0) {
return { avg: 0, min: 0, max: 0, p95: 0 };
}
const sorted = [...responseTimes].sort((a, b) => a - b);
const sum = responseTimes.reduce((a, b) => a + b, 0);
return {
avg: sum / responseTimes.length,
min: sorted[0],
max: sorted[sorted.length - 1],
p95: sorted[Math.floor(sorted.length * 0.95)]
};
}
/**
* 计算吞吐量统计
*/
private calculateThroughputStats(): any {
const now = Date.now();
const oneMinuteAgo = now - 60000;
const recentMetrics = this.metricsHistory.timestamps
.map((timestamp, index) => ({
timestamp: new Date(timestamp).getTime(),
requests: this.metricsHistory.errorRates[index] > 0 ? 1 : 0,
bytes: this.metricsHistory.responseTimes[index] * 100 // 模拟字节数
}))
.filter(m => m.timestamp > oneMinuteAgo);
const requestsPerSecond = recentMetrics.length / 60;
const bytesPerSecond = recentMetrics.reduce((sum, m) => sum + m.bytes, 0) / 60;
return {
requests_per_second: requestsPerSecond,
bytes_per_second: bytesPerSecond
};
}
/**
* 计算错误统计
*/
private calculateErrorStats(): any {
const errorRates = this.metricsHistory.errorRates.slice(-100);
if (errorRates.length === 0) {
return { count: 0, rate: 0, by_type: {} };
}
const avgRate = errorRates.reduce((sum, rate) => sum + rate, 0) / errorRates.length;
return {
count: this.getTotalErrors(),
rate: avgRate * 100,
by_type: {
// TODO: 按错误类型分类
"serial_error": this.getTotalErrors()
}
};
}
/**
* 计算端口统计
*/
private calculatePortStats(): any {
const engineStats = this.serialEngine.getEngineStats();
return {
total: engineStats.totalPorts,
active: engineStats.activePorts,
errors: Object.values(engineStats.portStats)
.reduce((sum: number, stats: any) => sum + stats.errorCount, 0)
};
}
/**
* 获取指标历史
*/
private getMetricsHistory(): any {
return {
timestamps: this.metricsHistory.timestamps.slice(-100), // 最近100个
errorRates: this.metricsHistory.errorRates.slice(-100),
responseTimes: this.metricsHistory.responseTimes.slice(-100),
memoryUsages: this.metricsHistory.memoryUsages.slice(-100),
portCounts: this.metricsHistory.portCounts.slice(-100)
};
}
/**
* 获取版本信息
*/
private getVersion(): string {
try {
const packageJson = require('../../../package.json');
return packageJson.version || 'unknown';
} catch {
return 'unknown';
}
}
/**
* 获取活动端口数
*/
private getActivePortCount(): number {
return this.serialEngine.getEngineStats().activePorts;
}
/**
* 获取活动会话数
*/
private getActiveSessionCount(): number {
return this.serialEngine.getEngineStats().components.portSession.activeSessions;
}
/**
* 获取总请求数
*/
private getTotalRequests(): number {
return Object.values(this.serialEngine.getEngineStats().portStats)
.reduce((sum: number, stats: any) => sum + stats.writeCount, 0);
}
/**
* 获取总错误数
*/
private getTotalErrors(): number {
return Object.values(this.serialEngine.getEngineStats().portStats)
.reduce((sum: number, stats: any) => sum + stats.errorCount, 0);
}
/**
* 获取平均响应时间
*/
private getAverageResponseTime(): number {
// TODO: 实现真实的响应时间计算
return 100; // 模拟值
}
/**
* 获取日志目录
*/
private getLogDirectory(): string {
return 'logs';
}
/**
* 获取最后健康检查结果
*/
getLastHealthCheck(): HealthCheckResult | undefined {
return this.lastHealthCheck;
}
/**
* 获取配置
*/
getConfig(): HealthServiceConfig {
return { ...this.config };
}
/**
* 更新配置
*/
updateConfig(newConfig: Partial<HealthServiceConfig>): void {
this.config = { ...this.config, ...newConfig };
// 如果监控正在运行,重启以应用新配置
if (this.isMonitoring) {
this.stopMonitoring();
this.startMonitoring();
}
logger.info('HealthService configuration updated', newConfig);
}
/**
* 销毁健康检查服务
*/
dispose(): void {
try {
logger.info('Disposing HealthService...');
// 停止监控
this.stopMonitoring();
// 清理数据
this.metricsHistory = {
timestamps: [],
errorRates: [],
responseTimes: [],
memoryUsages: [],
portCounts: []
};
logger.info('HealthService disposed');
} catch (error) {
logger.error('Failed to dispose HealthService', error as Error);
}
}
}