healthService.ts•10.8 kB
import { McpConfigManager } from '@src/config/mcpConfigManager.js';
import { MCP_SERVER_VERSION } from '@src/constants.js';
import { AgentConfigManager } from '@src/core/server/agentConfig.js';
import { ServerManager } from '@src/core/server/serverManager.js';
import { ClientStatus } from '@src/core/types/index.js';
import logger from '@src/logger/logger.js';
/**
* Health status levels
*/
export enum HealthStatus {
HEALTHY = 'healthy',
DEGRADED = 'degraded',
UNHEALTHY = 'unhealthy',
}
/**
* MCP server health information
*/
export interface McpServerHealth {
name: string;
status: ClientStatus;
healthy: boolean;
lastConnected?: Date;
lastError?: string;
tags?: string[];
}
/**
* System health metrics
*/
export interface SystemHealth {
uptime: number;
memory: {
used: number;
total: number;
percentage: number;
};
process: {
pid: number;
nodeVersion: string;
platform: string;
arch: string;
};
}
/**
* Complete health check response
*/
export interface HealthCheckResponse {
status: HealthStatus;
timestamp: string;
version: string;
system: SystemHealth;
servers: {
total: number;
healthy: number;
unhealthy: number;
details: McpServerHealth[];
};
configuration: {
loaded: boolean;
serverCount: number;
enabledCount: number;
disabledCount: number;
authEnabled: boolean;
transport: string;
};
}
/**
* Health service for monitoring system and MCP server status
*/
export class HealthService {
private static instance: HealthService;
private startTime: number;
private agentConfig: AgentConfigManager;
private constructor() {
this.startTime = Date.now();
this.agentConfig = AgentConfigManager.getInstance();
}
/**
* Get singleton instance
*/
public static getInstance(): HealthService {
if (!HealthService.instance) {
HealthService.instance = new HealthService();
}
return HealthService.instance;
}
/**
* Perform complete health check
*/
public async performHealthCheck(): Promise<HealthCheckResponse> {
try {
const systemHealth = this.getSystemHealth();
const serverHealth = await this.getServerHealth();
const configHealth = this.getConfigurationHealth();
const overallStatus = this.determineOverallHealth(serverHealth, configHealth);
const fullResponse: HealthCheckResponse = {
status: overallStatus,
timestamp: new Date().toISOString(),
version: MCP_SERVER_VERSION,
system: systemHealth,
servers: serverHealth,
configuration: configHealth,
};
// Apply security configuration to sanitize response
return this.sanitizeHealthResponse(fullResponse);
} catch (error) {
logger.error('Health check failed:', error);
throw error;
}
}
/**
* Sanitize health response based on security configuration
*/
private sanitizeHealthResponse(response: HealthCheckResponse): HealthCheckResponse {
const detailLevel = this.agentConfig.getHealthDetailLevel();
// Apply detail level restrictions
switch (detailLevel) {
case 'minimal':
// Minimal: Only basic status, no sensitive details
return {
status: response.status,
timestamp: response.timestamp,
version: response.version,
system: {
uptime: response.system.uptime,
memory: { used: 0, total: 0, percentage: 0 },
process: { pid: 0, nodeVersion: '', platform: '', arch: '' },
},
servers: {
total: response.servers.total,
healthy: response.servers.healthy,
unhealthy: response.servers.unhealthy,
details: [],
},
configuration: {
loaded: response.configuration.loaded,
serverCount: 0,
enabledCount: 0,
disabledCount: 0,
authEnabled: false,
transport: 'http',
},
};
case 'basic':
// Basic: Some server info but sanitized errors and no sensitive system details
return {
...response,
system: {
uptime: response.system.uptime,
memory: {
used: Math.round(response.system.memory.used),
total: Math.round(response.system.memory.total),
percentage: response.system.memory.percentage,
},
process: { pid: 0, nodeVersion: '', platform: '', arch: '' },
},
servers: {
...response.servers,
details: response.servers.details.map((server) => ({
name: server.name,
status: server.status,
healthy: server.healthy,
lastConnected: server.lastConnected,
lastError: this.sanitizeErrorMessage(server.lastError),
tags: server.tags,
})),
},
};
case 'full':
default:
// Full: All details but with basic error sanitization
return {
...response,
servers: {
...response.servers,
details: response.servers.details.map((server) => ({
...server,
lastError: this.sanitizeErrorMessage(server.lastError),
})),
},
};
}
}
/**
* Sanitize error messages to prevent information leakage
*/
private sanitizeErrorMessage(errorMessage?: string): string | undefined {
if (!errorMessage) return undefined;
// Remove potentially sensitive information from error messages
return errorMessage
.replace(/\b(?:password|token|key|secret|auth)\s*[:=]\s*[^\s]+/gi, '[REDACTED_CREDENTIAL]') // Key-value credentials
.replace(/\b[\w.-]+:[\w.-]+@/gi, '[REDACTED_CREDENTIAL]') // User:password@ patterns
.replace(/\bhttps?:\/\/[\w.-]+(?::\d+)?(?:\/[^\s]*)?/gi, '[REDACTED_URL]') // Complete URLs
.replace(/(?:[A-Za-z]:)?[/\\][\w.-]+(?:[/\\][\w.-]+)*\.(?:json|js|ts|py|yml|yaml|conf|ini)\b/g, '[REDACTED_PATH]') // File paths
.replace(/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g, '[REDACTED_IP]') // IP addresses
.replace(/\blocalhost(?::\d+)?\b/gi, '[REDACTED_HOST]') // Localhost references
.substring(0, 100); // Limit error message length
}
/**
* Get system health metrics
*/
private getSystemHealth(): SystemHealth {
const memUsage = process.memoryUsage();
return {
uptime: Math.floor((Date.now() - this.startTime) / 1000),
memory: {
used: Math.round((memUsage.heapUsed / 1024 / 1024) * 100) / 100, // MB
total: Math.round((memUsage.heapTotal / 1024 / 1024) * 100) / 100, // MB
percentage: Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100 * 100) / 100,
},
process: {
pid: process.pid,
nodeVersion: process.version,
platform: process.platform,
arch: process.arch,
},
};
}
/**
* Get MCP server health status
*/
private async getServerHealth(): Promise<HealthCheckResponse['servers']> {
try {
const serverManager = ServerManager.current;
if (!serverManager) {
return {
total: 0,
healthy: 0,
unhealthy: 0,
details: [],
};
}
const clients = serverManager.getClients();
const serverDetails: McpServerHealth[] = [];
let healthyCount = 0;
let unhealthyCount = 0;
for (const [name, clientInfo] of clients.entries()) {
const isHealthy = clientInfo.status === ClientStatus.Connected;
if (isHealthy) {
healthyCount++;
} else {
unhealthyCount++;
}
serverDetails.push({
name,
status: clientInfo.status,
healthy: isHealthy,
lastConnected: clientInfo.lastConnected,
lastError: clientInfo.lastError?.message,
tags: clientInfo.transport.tags,
});
}
return {
total: clients.size,
healthy: healthyCount,
unhealthy: unhealthyCount,
details: serverDetails,
};
} catch (error) {
logger.error('Error getting server health:', error);
return {
total: 0,
healthy: 0,
unhealthy: 0,
details: [],
};
}
}
/**
* Get configuration health status
*/
private getConfigurationHealth(): HealthCheckResponse['configuration'] {
try {
const mcpConfig = McpConfigManager.getInstance();
const agentConfig = AgentConfigManager.getInstance();
const config = mcpConfig.getTransportConfig();
const serverCount = Object.keys(config).length;
const enabledCount = Object.values(config).filter((server) => !server.disabled).length;
const disabledCount = serverCount - enabledCount;
return {
loaded: true,
serverCount,
enabledCount,
disabledCount,
authEnabled: agentConfig.isAuthEnabled(),
transport: 'http', // Since this is the HTTP transport layer
};
} catch (error) {
logger.error('Error getting configuration health:', error);
return {
loaded: false,
serverCount: 0,
enabledCount: 0,
disabledCount: 0,
authEnabled: false,
transport: 'unknown',
};
}
}
/**
* Determine overall health status based on component status
*/
private determineOverallHealth(
serverHealth: { total: number; healthy: number; unhealthy: number },
configHealth: { loaded: boolean },
): HealthStatus {
// Configuration must be loaded
if (!configHealth.loaded) {
return HealthStatus.UNHEALTHY;
}
// If no servers are configured, system is healthy but degraded
if (serverHealth.total === 0) {
return HealthStatus.DEGRADED;
}
// If all servers are healthy, system is healthy
if (serverHealth.healthy === serverHealth.total) {
return HealthStatus.HEALTHY;
}
// If more than half servers are healthy, system is degraded
if (serverHealth.healthy > serverHealth.total / 2) {
return HealthStatus.DEGRADED;
}
// If less than half servers are healthy, system is unhealthy
return HealthStatus.UNHEALTHY;
}
/**
* Get HTTP status code based on health status
*/
public getHttpStatusCode(healthStatus: HealthStatus): number {
switch (healthStatus) {
case HealthStatus.HEALTHY:
return 200;
case HealthStatus.DEGRADED:
return 200; // Still operational, but with warnings
case HealthStatus.UNHEALTHY:
return 503; // Service unavailable
default:
return 500; // Internal server error
}
}
}
export default HealthService;