Skip to main content
Glama
ooples

MCP Console Automation Server

DiagnosticsManager.ts38.1 kB
import { EventEmitter } from 'events'; import { writeFileSync, existsSync, mkdirSync } from 'fs'; import { join } from 'path'; import { Logger } from '../utils/logger.js'; interface DiagnosticEvent { id: string; timestamp: Date; level: 'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal'; category: string; operation: string; sessionId?: string; message: string; data?: any; stack?: string; duration?: number; metadata?: Record<string, any>; } interface SessionDiagnostics { sessionId: string; createdAt: Date; lastActivity: Date; state: 'initializing' | 'ready' | 'active' | 'idle' | 'error' | 'terminated'; operations: OperationTrace[]; errors: ErrorTrace[]; performance: PerformanceMetrics; networkStats?: NetworkDiagnostics; resourceUsage: ResourceMetrics; stateTransitions: StateTransition[]; commandQueue: CommandQueueDiagnostics; healthChecks: HealthCheckResult[]; } interface OperationTrace { operationId: string; operationName: string; startTime: Date; endTime?: Date; duration?: number; status: 'pending' | 'running' | 'completed' | 'failed' | 'timeout'; input?: any; output?: any; error?: string; retries?: number; metadata?: Record<string, any>; } interface ErrorTrace { errorId: string; timestamp: Date; errorType: string; errorMessage: string; stack?: string; context: Record<string, any>; recoveryAttempted: boolean; recoverySuccess?: boolean; impact: 'low' | 'medium' | 'high' | 'critical'; } interface PerformanceMetrics { avgResponseTime: number; maxResponseTime: number; minResponseTime: number; totalOperations: number; failedOperations: number; successRate: number; throughput: number; latencyP50: number; latencyP95: number; latencyP99: number; } interface NetworkDiagnostics { connectionType: string; connectionState: 'connected' | 'disconnected' | 'reconnecting' | 'error'; latency: number; packetLoss: number; bandwidth: number; lastPingTime?: Date; reconnectAttempts: number; sshDetails?: { host: string; port: number; username: string; authMethod: 'password' | 'key' | 'agent'; compressionEnabled: boolean; keepAliveInterval: number; lastKeepAlive?: Date; }; } interface ResourceMetrics { cpuUsage: number; memoryUsage: number; memoryLimit: number; outputBufferSize: number; commandQueueSize: number; activeHandlers: number; fileDescriptors?: number; } interface StateTransition { fromState: string; toState: string; timestamp: Date; trigger: string; duration: number; metadata?: Record<string, any>; } interface CommandQueueDiagnostics { queueSize: number; processingRate: number; averageWaitTime: number; droppedCommands: number; retriedCommands: number; lastProcessedTime?: Date; queueState: 'idle' | 'processing' | 'blocked' | 'error'; blockReason?: string; } interface HealthCheckResult { timestamp: Date; checkType: string; status: 'healthy' | 'degraded' | 'unhealthy'; details: Record<string, any>; recommendations?: string[]; } interface DiagnosticReport { reportId: string; generatedAt: Date; systemInfo: SystemDiagnostics; sessions: SessionDiagnostics[]; recentEvents: DiagnosticEvent[]; alerts: DiagnosticAlert[]; recommendations: string[]; performanceSummary: PerformanceMetrics; errorSummary: ErrorSummary; } interface SystemDiagnostics { version: string; uptime: number; platform: string; nodeVersion: string; totalSessions: number; activeSessions: number; failedSessions: number; systemResources: ResourceMetrics; configuration: Record<string, any>; } interface DiagnosticAlert { alertId: string; timestamp: Date; severity: 'info' | 'warning' | 'error' | 'critical'; category: string; message: string; affectedSessions?: string[]; suggestedAction?: string; autoResolved: boolean; resolvedAt?: Date; } interface ErrorSummary { totalErrors: number; errorsByType: Record<string, number>; errorsBySession: Record<string, number>; topErrors: { error: string; count: number; lastOccurred: Date }[]; errorTrend: 'increasing' | 'stable' | 'decreasing'; } interface DiagnosticMetrics { totalSessions: number; failedSessions: number; averageResponseTime: number; successRate: number; sessions: Record<string, any>; } export class DiagnosticsManager extends EventEmitter { private static instance: DiagnosticsManager | null = null; private logger: Logger; private events: DiagnosticEvent[] = []; private sessionDiagnostics: Map<string, SessionDiagnostics> = new Map(); private alerts: DiagnosticAlert[] = []; private operationTraces: Map<string, OperationTrace> = new Map(); private performanceData: Map<string, number[]> = new Map(); private maxEventsStored = 10000; private diagnosticsDir = './diagnostics'; private metricsInterval: NodeJS.Timeout | null = null; private isRecording = true; private verboseMode = false; constructor( private config: { enableDiagnostics?: boolean; verboseLogging?: boolean; persistDiagnostics?: boolean; diagnosticsPath?: string; maxEventHistory?: number; metricsIntervalMs?: number; } = {} ) { super(); this.logger = new Logger('DiagnosticsManager'); this.diagnosticsDir = config.diagnosticsPath || './diagnostics'; this.maxEventsStored = config.maxEventHistory || 10000; this.verboseMode = config.verboseLogging || false; this.isRecording = config.enableDiagnostics !== false; if (config.persistDiagnostics) { this.ensureDiagnosticsDirectory(); } if (config.metricsIntervalMs) { this.startMetricsCollection(config.metricsIntervalMs); } this.logger.info('DiagnosticsManager initialized', { config }); } static getInstance(config?: { enableDiagnostics?: boolean; verboseLogging?: boolean; persistDiagnostics?: boolean; diagnosticsPath?: string; maxEventHistory?: number; metricsIntervalMs?: number; }): DiagnosticsManager { if (!DiagnosticsManager.instance) { DiagnosticsManager.instance = new DiagnosticsManager(config || {}); } return DiagnosticsManager.instance; } private ensureDiagnosticsDirectory() { if (!existsSync(this.diagnosticsDir)) { mkdirSync(this.diagnosticsDir, { recursive: true }); } } // Core diagnostic recording recordEvent(event: Omit<DiagnosticEvent, 'id' | 'timestamp'>): string { if (!this.isRecording) return ''; const diagnosticEvent: DiagnosticEvent = { id: this.generateEventId(), timestamp: new Date(), ...event, }; this.events.push(diagnosticEvent); // Trim old events if (this.events.length > this.maxEventsStored) { this.events = this.events.slice(-this.maxEventsStored); } // Log based on level this.logEvent(diagnosticEvent); // Check for alert conditions this.checkAlertConditions(diagnosticEvent); // Emit for real-time monitoring this.emit('diagnostic-event', diagnosticEvent); return diagnosticEvent.id; } private logEvent(event: DiagnosticEvent) { const logMessage = `[${event.category}] ${event.operation}: ${event.message}`; if ( this.verboseMode || event.level === 'error' || event.level === 'fatal' ) { const logData = { sessionId: event.sessionId, data: event.data, metadata: event.metadata, stack: event.stack, }; switch (event.level) { case 'trace': case 'debug': this.logger.debug(logMessage, logData); break; case 'info': this.logger.info(logMessage, logData); break; case 'warn': this.logger.warn(logMessage, logData); break; case 'error': this.logger.error(logMessage, logData); break; case 'fatal': this.logger.error(`FATAL: ${logMessage}`, logData); break; } } } // Session lifecycle tracking startSessionDiagnostics( sessionId: string, metadata?: Record<string, any> ): void { const diagnostics: SessionDiagnostics = { sessionId, createdAt: new Date(), lastActivity: new Date(), state: 'initializing', operations: [], errors: [], performance: this.createEmptyPerformanceMetrics(), resourceUsage: this.getCurrentResourceMetrics(), stateTransitions: [], commandQueue: this.createEmptyCommandQueueDiagnostics(), healthChecks: [], }; this.sessionDiagnostics.set(sessionId, diagnostics); this.recordEvent({ level: 'info', category: 'session', operation: 'session_start', sessionId, message: `Session ${sessionId} diagnostics started`, data: metadata, }); } updateSessionState( sessionId: string, newState: SessionDiagnostics['state'], trigger: string ): void { const diagnostics = this.sessionDiagnostics.get(sessionId); if (!diagnostics) { this.recordEvent({ level: 'warn', category: 'session', operation: 'state_update_failed', sessionId, message: `Cannot update state for unknown session ${sessionId}`, }); return; } const oldState = diagnostics.state; const transitionStart = Date.now(); diagnostics.state = newState; diagnostics.lastActivity = new Date(); const transition: StateTransition = { fromState: oldState, toState: newState, timestamp: new Date(), trigger, duration: Date.now() - transitionStart, }; diagnostics.stateTransitions.push(transition); this.recordEvent({ level: 'debug', category: 'session', operation: 'state_transition', sessionId, message: `Session state: ${oldState} -> ${newState}`, data: { transition, trigger }, }); } // Operation tracking startOperation( sessionId: string, operationName: string, input?: any ): string { const operationId = this.generateOperationId(); const trace: OperationTrace = { operationId, operationName, startTime: new Date(), status: 'running', input, }; this.operationTraces.set(operationId, trace); // Add to session diagnostics const diagnostics = this.sessionDiagnostics.get(sessionId); if (diagnostics) { diagnostics.operations.push(trace); diagnostics.lastActivity = new Date(); } this.recordEvent({ level: 'debug', category: 'operation', operation: 'start', sessionId, message: `Starting operation: ${operationName}`, data: { operationId, input }, }); return operationId; } endOperation( operationId: string, status: 'completed' | 'failed' | 'timeout', output?: any, error?: string ): void { const trace = this.operationTraces.get(operationId); if (!trace) { this.logger.warn(`Cannot end unknown operation: ${operationId}`); return; } trace.endTime = new Date(); trace.duration = trace.endTime.getTime() - trace.startTime.getTime(); trace.status = status; trace.output = output; trace.error = error; // Update performance metrics this.updatePerformanceMetrics(trace); this.recordEvent({ level: status === 'failed' ? 'error' : 'debug', category: 'operation', operation: 'end', message: `Operation ${trace.operationName} ${status}`, data: { operationId, duration: trace.duration, status, error }, duration: trace.duration, }); } // Error tracking recordError( sessionId: string, error: Error | string, context: Record<string, any>, impact: ErrorTrace['impact'] = 'medium' ): string { const errorId = this.generateErrorId(); const errorTrace: ErrorTrace = { errorId, timestamp: new Date(), errorType: error instanceof Error ? error.constructor.name : 'Error', errorMessage: error instanceof Error ? error.message : error, stack: error instanceof Error ? error.stack : undefined, context, recoveryAttempted: false, impact, }; const diagnostics = this.sessionDiagnostics.get(sessionId); if (diagnostics) { diagnostics.errors.push(errorTrace); diagnostics.lastActivity = new Date(); } this.recordEvent({ level: impact === 'critical' ? 'fatal' : 'error', category: 'error', operation: 'error_recorded', sessionId, message: errorTrace.errorMessage, data: { errorId, context }, stack: errorTrace.stack, }); // Check if we need to raise an alert if (impact === 'critical' || impact === 'high') { this.raiseAlert({ severity: impact === 'critical' ? 'critical' : 'error', category: 'error', message: `High impact error in session ${sessionId}: ${errorTrace.errorMessage}`, affectedSessions: [sessionId], suggestedAction: 'Investigate immediately and consider session recovery', }); } return errorId; } recordRecoveryAttempt(errorId: string, success: boolean): void { // Find the error across all sessions for (const diagnostics of this.sessionDiagnostics.values()) { const error = diagnostics.errors.find((e) => e.errorId === errorId); if (error) { error.recoveryAttempted = true; error.recoverySuccess = success; this.recordEvent({ level: success ? 'info' : 'warn', category: 'recovery', operation: 'recovery_attempt', sessionId: diagnostics.sessionId, message: `Recovery ${success ? 'successful' : 'failed'} for error ${errorId}`, data: { errorId, success }, }); break; } } } // Performance monitoring recordPerformanceMetric( sessionId: string, metricName: string, value: number ): void { const key = `${sessionId}:${metricName}`; if (!this.performanceData.has(key)) { this.performanceData.set(key, []); } const values = this.performanceData.get(key)!; values.push(value); // Keep only recent values (last 1000) if (values.length > 1000) { values.shift(); } // Check for performance degradation if (metricName === 'response_time' && value > 5000) { this.recordEvent({ level: 'warn', category: 'performance', operation: 'slow_response', sessionId, message: `Slow response time detected: ${value}ms`, data: { metric: metricName, value }, }); } } private updatePerformanceMetrics(operation: OperationTrace): void { if (!operation.duration) return; const sessionId = this.findSessionForOperation(operation.operationId); if (!sessionId) return; const diagnostics = this.sessionDiagnostics.get(sessionId); if (!diagnostics) return; const metrics = diagnostics.performance; // Update metrics metrics.totalOperations++; if (operation.status === 'failed') { metrics.failedOperations++; } // Update response times const responseTimes = this.performanceData.get(`${sessionId}:response_time`) || []; responseTimes.push(operation.duration); if (responseTimes.length > 0) { metrics.avgResponseTime = responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length; metrics.maxResponseTime = Math.max(...responseTimes); metrics.minResponseTime = Math.min(...responseTimes); // Calculate percentiles const sorted = [...responseTimes].sort((a, b) => a - b); metrics.latencyP50 = this.getPercentile(sorted, 50); metrics.latencyP95 = this.getPercentile(sorted, 95); metrics.latencyP99 = this.getPercentile(sorted, 99); } metrics.successRate = ((metrics.totalOperations - metrics.failedOperations) / metrics.totalOperations) * 100; // Calculate throughput (operations per second) const sessionAge = Date.now() - diagnostics.createdAt.getTime(); metrics.throughput = (metrics.totalOperations / sessionAge) * 1000; } private getPercentile(sortedValues: number[], percentile: number): number { const index = Math.ceil((percentile / 100) * sortedValues.length) - 1; return sortedValues[Math.max(0, index)]; } // Health checks recordHealthCheck( sessionId: string, checkType: string, status: HealthCheckResult['status'], details: Record<string, any> ): void { const diagnostics = this.sessionDiagnostics.get(sessionId); if (!diagnostics) return; const healthCheck: HealthCheckResult = { timestamp: new Date(), checkType, status, details, recommendations: this.generateHealthRecommendations(status, details), }; diagnostics.healthChecks.push(healthCheck); // Keep only recent health checks if (diagnostics.healthChecks.length > 100) { diagnostics.healthChecks = diagnostics.healthChecks.slice(-100); } if (status === 'unhealthy') { this.recordEvent({ level: 'error', category: 'health', operation: 'health_check_failed', sessionId, message: `Health check failed: ${checkType}`, data: { status, details }, }); } } private generateHealthRecommendations( status: HealthCheckResult['status'], details: Record<string, any> ): string[] { const recommendations: string[] = []; if (status === 'unhealthy') { if (details.connectionError) { recommendations.push('Check network connectivity'); recommendations.push('Verify firewall settings'); recommendations.push('Consider reconnecting the session'); } if (details.highMemoryUsage) { recommendations.push('Clear output buffers'); recommendations.push('Consider restarting the session'); } if (details.commandQueueBlocked) { recommendations.push('Check for hanging commands'); recommendations.push('Consider sending interrupt signal (Ctrl+C)'); } } else if (status === 'degraded') { if (details.slowResponse) { recommendations.push('Monitor network latency'); recommendations.push('Check system resources'); } if (details.highErrorRate) { recommendations.push('Review recent errors'); recommendations.push('Consider session recovery'); } } return recommendations; } // Alert management private raiseAlert( alert: Omit< DiagnosticAlert, 'alertId' | 'timestamp' | 'autoResolved' | 'resolvedAt' > ): void { const diagnosticAlert: DiagnosticAlert = { alertId: this.generateAlertId(), timestamp: new Date(), autoResolved: false, ...alert, }; this.alerts.push(diagnosticAlert); // Keep only recent alerts if (this.alerts.length > 100) { this.alerts = this.alerts.slice(-100); } this.emit('diagnostic-alert', diagnosticAlert); this.recordEvent({ level: alert.severity === 'critical' ? 'fatal' : alert.severity === 'error' ? 'error' : 'warn', category: 'alert', operation: 'alert_raised', message: alert.message, data: { alertId: diagnosticAlert.alertId, severity: alert.severity }, }); } private checkAlertConditions(event: DiagnosticEvent): void { // Check for critical patterns if (event.level === 'fatal') { this.raiseAlert({ severity: 'critical', category: 'system', message: `Fatal error detected: ${event.message}`, affectedSessions: event.sessionId ? [event.sessionId] : undefined, suggestedAction: 'Immediate investigation required', }); } // Check for error patterns if (event.level === 'error') { const recentErrors = this.events.filter( (e) => e.level === 'error' && e.timestamp.getTime() > Date.now() - 60000 // Last minute ); if (recentErrors.length > 10) { this.raiseAlert({ severity: 'warning', category: 'error_rate', message: `High error rate detected: ${recentErrors.length} errors in last minute`, suggestedAction: 'Review error logs and consider system health check', }); } } // Check for session issues if (event.category === 'session' && event.message.includes('not found')) { const sessionLostEvents = this.events.filter( (e) => e.message.includes('not found') && e.timestamp.getTime() > Date.now() - 300000 // Last 5 minutes ); if (sessionLostEvents.length > 5) { this.raiseAlert({ severity: 'error', category: 'session_management', message: `Multiple session loss events detected: ${sessionLostEvents.length} in last 5 minutes`, suggestedAction: 'Check session manager and consider restart', }); } } } // Diagnostic reports generateReport(): DiagnosticReport { return this.generateDiagnosticReport(); } generateDiagnosticReport(): DiagnosticReport { const report: DiagnosticReport = { reportId: this.generateReportId(), generatedAt: new Date(), systemInfo: this.getSystemDiagnostics(), sessions: Array.from(this.sessionDiagnostics.values()), recentEvents: this.events.slice(-100), // Last 100 events alerts: this.alerts.filter((a) => !a.autoResolved), recommendations: this.generateSystemRecommendations(), performanceSummary: this.calculateOverallPerformance(), errorSummary: this.generateErrorSummary(), }; // Persist report if configured if (this.config.persistDiagnostics) { this.persistReport(report); } return report; } private getSystemDiagnostics(): SystemDiagnostics { const sessions = Array.from(this.sessionDiagnostics.values()); return { version: '1.0.0', uptime: process.uptime(), platform: process.platform, nodeVersion: process.version, totalSessions: sessions.length, activeSessions: sessions.filter((s) => s.state === 'active').length, failedSessions: sessions.filter((s) => s.state === 'error').length, systemResources: this.getCurrentResourceMetrics(), configuration: this.config, }; } private getCurrentResourceMetrics(): ResourceMetrics { const memUsage = process.memoryUsage(); return { cpuUsage: process.cpuUsage().user / 1000000, // Convert to seconds memoryUsage: memUsage.heapUsed / 1024 / 1024, // MB memoryLimit: memUsage.heapTotal / 1024 / 1024, // MB outputBufferSize: 0, // Would need to be tracked commandQueueSize: 0, // Would need to be tracked activeHandlers: this.listenerCount('diagnostic-event'), fileDescriptors: process.platform === 'win32' ? undefined : process.getActiveResourcesInfo?.()?.length, }; } private calculateOverallPerformance(): PerformanceMetrics { const allMetrics: PerformanceMetrics = { avgResponseTime: 0, maxResponseTime: 0, minResponseTime: Number.MAX_VALUE, totalOperations: 0, failedOperations: 0, successRate: 0, throughput: 0, latencyP50: 0, latencyP95: 0, latencyP99: 0, }; const sessions = Array.from(this.sessionDiagnostics.values()); if (sessions.length === 0) { return allMetrics; } // Aggregate metrics from all sessions sessions.forEach((session) => { const metrics = session.performance; allMetrics.totalOperations += metrics.totalOperations; allMetrics.failedOperations += metrics.failedOperations; allMetrics.maxResponseTime = Math.max( allMetrics.maxResponseTime, metrics.maxResponseTime ); allMetrics.minResponseTime = Math.min( allMetrics.minResponseTime, metrics.minResponseTime ); allMetrics.avgResponseTime += metrics.avgResponseTime; allMetrics.throughput += metrics.throughput; }); // Average the averages allMetrics.avgResponseTime /= sessions.length; allMetrics.throughput /= sessions.length; allMetrics.successRate = allMetrics.totalOperations > 0 ? ((allMetrics.totalOperations - allMetrics.failedOperations) / allMetrics.totalOperations) * 100 : 100; return allMetrics; } private generateErrorSummary(): ErrorSummary { const allErrors: ErrorTrace[] = []; const errorsByType: Record<string, number> = {}; const errorsBySession: Record<string, number> = {}; // Collect all errors this.sessionDiagnostics.forEach((diagnostics, sessionId) => { diagnostics.errors.forEach((error) => { allErrors.push(error); errorsByType[error.errorType] = (errorsByType[error.errorType] || 0) + 1; errorsBySession[sessionId] = (errorsBySession[sessionId] || 0) + 1; }); }); // Calculate top errors const errorCounts: Record<string, { count: number; lastOccurred: Date }> = {}; allErrors.forEach((error) => { const key = error.errorMessage; if (!errorCounts[key]) { errorCounts[key] = { count: 0, lastOccurred: error.timestamp }; } errorCounts[key].count++; if (error.timestamp > errorCounts[key].lastOccurred) { errorCounts[key].lastOccurred = error.timestamp; } }); const topErrors = Object.entries(errorCounts) .map(([error, data]) => ({ error, ...data })) .sort((a, b) => b.count - a.count) .slice(0, 10); // Determine trend const recentErrors = allErrors.filter( (e) => e.timestamp.getTime() > Date.now() - 3600000 ); // Last hour const olderErrors = allErrors.filter( (e) => e.timestamp.getTime() > Date.now() - 7200000 && e.timestamp.getTime() <= Date.now() - 3600000 ); // Previous hour let errorTrend: ErrorSummary['errorTrend'] = 'stable'; if (recentErrors.length > olderErrors.length * 1.2) { errorTrend = 'increasing'; } else if (recentErrors.length < olderErrors.length * 0.8) { errorTrend = 'decreasing'; } return { totalErrors: allErrors.length, errorsByType, errorsBySession, topErrors, errorTrend, }; } private generateSystemRecommendations(): string[] { const recommendations: string[] = []; const errorSummary = this.generateErrorSummary(); const performance = this.calculateOverallPerformance(); const sessions = Array.from(this.sessionDiagnostics.values()); // Check error trends if (errorSummary.errorTrend === 'increasing') { recommendations.push( 'Error rate is increasing - investigate recent changes' ); } if (errorSummary.totalErrors > 100) { recommendations.push( 'High error count detected - review error logs and consider system health check' ); } // Check performance if (performance.avgResponseTime > 1000) { recommendations.push( 'Average response time is high - check network and system resources' ); } if (performance.successRate < 95) { recommendations.push( 'Success rate below 95% - investigate failing operations' ); } // Check sessions const staleSessions = sessions.filter( (s) => s.state === 'idle' && Date.now() - s.lastActivity.getTime() > 3600000 // 1 hour ); if (staleSessions.length > 0) { recommendations.push( `${staleSessions.length} idle sessions detected - consider cleanup` ); } const erroredSessions = sessions.filter((s) => s.state === 'error'); if (erroredSessions.length > 0) { recommendations.push( `${erroredSessions.length} sessions in error state - consider recovery or restart` ); } // Check resource usage const resources = this.getCurrentResourceMetrics(); if (resources.memoryUsage / resources.memoryLimit > 0.8) { recommendations.push( 'High memory usage detected - consider increasing memory limit or cleaning up sessions' ); } return recommendations; } private persistReport(report: DiagnosticReport): void { try { const filename = `diagnostic-report-${report.reportId}-${new Date().toISOString().replace(/:/g, '-')}.json`; const filepath = join(this.diagnosticsDir, filename); writeFileSync(filepath, JSON.stringify(report, null, 2)); this.logger.info(`Diagnostic report saved to ${filepath}`); } catch (error) { this.logger.error('Failed to persist diagnostic report:', error); } } // Utility methods private generateEventId(): string { return `evt-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; } private generateOperationId(): string { return `op-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; } private generateErrorId(): string { return `err-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; } private generateAlertId(): string { return `alert-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; } private generateReportId(): string { return `report-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; } private findSessionForOperation(operationId: string): string | undefined { for (const [sessionId, diagnostics] of this.sessionDiagnostics) { if (diagnostics.operations.some((op) => op.operationId === operationId)) { return sessionId; } } return undefined; } private createEmptyPerformanceMetrics(): PerformanceMetrics { return { avgResponseTime: 0, maxResponseTime: 0, minResponseTime: 0, totalOperations: 0, failedOperations: 0, successRate: 100, throughput: 0, latencyP50: 0, latencyP95: 0, latencyP99: 0, }; } private createEmptyCommandQueueDiagnostics(): CommandQueueDiagnostics { return { queueSize: 0, processingRate: 0, averageWaitTime: 0, droppedCommands: 0, retriedCommands: 0, queueState: 'idle', }; } private startMetricsCollection(intervalMs: number): void { this.metricsInterval = setInterval(() => { this.collectMetrics(); }, intervalMs); } private collectMetrics(): void { // Collect metrics for all active sessions this.sessionDiagnostics.forEach((diagnostics, sessionId) => { if (diagnostics.state === 'active' || diagnostics.state === 'ready') { const resources = this.getCurrentResourceMetrics(); diagnostics.resourceUsage = resources; // Record as event for tracking this.recordEvent({ level: 'trace', category: 'metrics', operation: 'collect', sessionId, message: 'Metrics collected', data: resources, }); } }); } // Public API for getting diagnostic info getSessionDiagnostics(sessionId: string): SessionDiagnostics | undefined { return this.sessionDiagnostics.get(sessionId); } getAllSessionDiagnostics(): SessionDiagnostics[] { return Array.from(this.sessionDiagnostics.values()); } getActiveAlerts(): DiagnosticAlert[] { return this.alerts.filter((a) => !a.autoResolved); } getErrorsForSession(sessionId: string): ErrorTrace[] { const diagnostics = this.sessionDiagnostics.get(sessionId); return diagnostics ? diagnostics.errors : []; } getPerformanceMetrics(sessionId?: string): PerformanceMetrics { if (sessionId) { const diagnostics = this.sessionDiagnostics.get(sessionId); return diagnostics ? diagnostics.performance : this.createEmptyPerformanceMetrics(); } return this.calculateOverallPerformance(); } // Public API methods trackEvent(event: Omit<DiagnosticEvent, 'id' | 'timestamp'>): string { return this.recordEvent(event); } trackPerformance(operation: string, duration: number): void { if (!this.performanceData.has(operation)) { this.performanceData.set(operation, []); } const data = this.performanceData.get(operation)!; data.push(duration); // Keep only last 1000 data points per operation if (data.length > 1000) { this.performanceData.set(operation, data.slice(-1000)); } this.recordEvent({ level: 'trace', category: 'performance', operation: operation, message: `Operation completed in ${duration}ms`, duration: duration, }); } trackError(error: Error | any, context?: Record<string, any>): void { const errorTrace: ErrorTrace = { errorId: this.generateErrorId(), timestamp: new Date(), errorType: error.name || 'Unknown', errorMessage: error.message || String(error), stack: error.stack, context: context || {}, recoveryAttempted: false, impact: this.assessErrorImpact(error), }; // Add to session diagnostics if sessionId is in context if (context?.sessionId) { const sessionDiag = this.sessionDiagnostics.get(context.sessionId); if (sessionDiag) { sessionDiag.errors.push(errorTrace); sessionDiag.lastActivity = new Date(); } } this.recordEvent({ level: 'error', category: 'error', operation: context?.operation || 'unknown', sessionId: context?.sessionId, message: error.message || String(error), data: errorTrace, stack: error.stack, }); } private assessErrorImpact( error: any ): 'low' | 'medium' | 'high' | 'critical' { if (error.message?.includes('Session not found')) return 'high'; if (error.message?.includes('timeout')) return 'medium'; if (error.message?.includes('ECONNREFUSED')) return 'critical'; return 'low'; } getMetrics(): DiagnosticMetrics { return { totalSessions: this.sessionDiagnostics.size, failedSessions: Array.from(this.sessionDiagnostics.values()).filter( (s) => s.state === 'error' ).length, averageResponseTime: this.calculateAverageResponseTime(), successRate: this.calculateSuccessRate(), sessions: this.getSessionMetrics(), }; } private calculateAverageResponseTime(): number { const allDurations: number[] = []; this.performanceData.forEach((durations) => { allDurations.push(...durations); }); if (allDurations.length === 0) return 0; return allDurations.reduce((a, b) => a + b, 0) / allDurations.length; } private calculateSuccessRate(): number { const total = this.operationTraces.size; if (total === 0) return 100; const successful = Array.from(this.operationTraces.values()).filter( (op) => op.status === 'completed' ).length; return (successful / total) * 100; } private getSessionMetrics(): Record<string, any> { const metrics: Record<string, any> = {}; this.sessionDiagnostics.forEach((diag, sessionId) => { metrics[sessionId] = { state: diag.state, operationCount: diag.operations.length, errorCount: diag.errors.length, performance: diag.performance, }; }); return metrics; } getRecentErrors(count: number = 10): any[] { return this.events .filter((e) => e.level === 'error') .slice(-count) .map((e) => ({ timestamp: e.timestamp, message: e.message, sessionId: e.sessionId, data: e.data, })); } getRecentEvents(count: number = 100, sessionId?: string): DiagnosticEvent[] { let events = this.events; if (sessionId) { events = events.filter((e) => e.sessionId === sessionId); } return events.slice(-count); } async getHealthStatus(): Promise<{ healthy: boolean; checks: Record<string, any>; }> { const checks: Record<string, any> = { sessions: { healthy: this.sessionDiagnostics.size < 40, message: `${this.sessionDiagnostics.size}/50 sessions`, }, errors: { healthy: this.getRecentErrors(10).length < 5, message: `${this.getRecentErrors(10).length} recent errors`, }, memory: { healthy: process.memoryUsage().heapUsed < 500 * 1024 * 1024, message: `${Math.floor(process.memoryUsage().heapUsed / 1024 / 1024)}MB used`, }, }; const healthy = Object.values(checks).every((c) => c.healthy); return { healthy, checks }; } getEventCount(): number { return this.events.length; } trimEvents(maxEvents: number): void { if (this.events.length > maxEvents) { this.events = this.events.slice(-maxEvents); } } // Control methods setVerboseMode(verbose: boolean): void { this.verboseMode = verbose; this.logger.info(`Verbose mode ${verbose ? 'enabled' : 'disabled'}`); } setRecording(enabled: boolean): void { this.isRecording = enabled; this.logger.info( `Diagnostic recording ${enabled ? 'enabled' : 'disabled'}` ); } clearDiagnostics(sessionId?: string): void { if (sessionId) { this.sessionDiagnostics.delete(sessionId); this.events = this.events.filter((e) => e.sessionId !== sessionId); this.logger.info(`Cleared diagnostics for session ${sessionId}`); } else { this.sessionDiagnostics.clear(); this.events = []; this.alerts = []; this.operationTraces.clear(); this.performanceData.clear(); this.logger.info('Cleared all diagnostics'); } } // Cleanup cleanup(): void { if (this.metricsInterval) { clearInterval(this.metricsInterval); this.metricsInterval = null; } // Generate final report if configured if (this.config.persistDiagnostics && this.sessionDiagnostics.size > 0) { this.generateDiagnosticReport(); } this.removeAllListeners(); this.logger.info('DiagnosticsManager cleaned up'); } } export default DiagnosticsManager;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ooples/mcp-console-automation'

If you have feedback or need assistance with the MCP directory API, please join our Discord server