import { EventEmitter } from 'eventemitter3';
import { Logger } from 'winston';
import {
AzureCloudShellSession,
AzureBastionSession,
AzureArcSession,
AzureTokenInfo,
AzureResourceInfo
} from '../types/index.js';
export interface AzureMonitoringMetrics {
sessionCount: {
total: number;
byType: Record<string, number>;
byRegion: Record<string, number>;
byStatus: Record<string, number>;
};
authentication: {
tokenRefreshCount: number;
tokenExpirySoon: number;
authFailures: number;
lastTokenRefresh: Date | null;
};
performance: {
averageConnectionTime: number;
averageLatency: number;
connectionSuccess: number;
connectionFailures: number;
reconnectAttempts: number;
};
resources: {
activeSubscriptions: Set<string>;
activeResourceGroups: Set<string>;
activeRegions: Set<string>;
quotaUtilization: Record<string, number>;
};
errors: {
azureApiErrors: number;
networkErrors: number;
authenticationErrors: number;
configurationErrors: number;
};
costs: {
estimatedSessionCosts: Record<string, number>;
totalEstimatedCost: number;
costOptimizationSuggestions: string[];
};
}
export interface AzureHealthCheck {
timestamp: Date;
overall: 'healthy' | 'warning' | 'critical';
checks: {
authentication: {
status: 'pass' | 'warn' | 'fail';
message: string;
tokensExpiringSoon?: number;
lastRefresh?: Date;
};
connectivity: {
status: 'pass' | 'warn' | 'fail';
message: string;
activeConnections: number;
failureRate: number;
};
resources: {
status: 'pass' | 'warn' | 'fail';
message: string;
quotaUtilization: Record<string, number>;
recommendations?: string[];
};
performance: {
status: 'pass' | 'warn' | 'fail';
message: string;
averageLatency: number;
connectionSuccessRate: number;
};
};
recommendations: string[];
}
export interface AzureAlertConfig {
tokenExpiryWarningHours: number;
maxConnectionFailureRate: number;
maxLatencyMs: number;
quotaUtilizationThreshold: number;
costThresholdDaily: number;
}
interface AzureMonitoringEvents {
'token-expiry-warning': (sessionId: string, expiresIn: number) => void;
'connection-failure-threshold': (failureRate: number) => void;
'performance-degradation': (latency: number) => void;
'quota-warning': (resource: string, utilization: number) => void;
'cost-alert': (dailyCost: number) => void;
'health-check-completed': (result: AzureHealthCheck) => void;
}
export class AzureMonitoring extends EventEmitter<AzureMonitoringEvents> {
private metrics: AzureMonitoringMetrics;
private alertConfig: AzureAlertConfig;
private sessions: Map<string, AzureCloudShellSession | AzureBastionSession | AzureArcSession> = new Map();
private connectionStats: Map<string, { startTime: Date; endTime?: Date; success: boolean; latency?: number }> = new Map();
private monitoringInterval: NodeJS.Timeout | null = null;
private logger: Logger;
constructor(logger: Logger, config?: Partial<AzureAlertConfig>) {
super();
this.logger = logger;
this.alertConfig = {
tokenExpiryWarningHours: 2, // Warn 2 hours before token expires
maxConnectionFailureRate: 0.15, // 15% failure rate
maxLatencyMs: 5000, // 5 seconds
quotaUtilizationThreshold: 0.8, // 80% quota utilization
costThresholdDaily: 100, // $100 per day
...config
};
this.metrics = {
sessionCount: {
total: 0,
byType: {},
byRegion: {},
byStatus: {}
},
authentication: {
tokenRefreshCount: 0,
tokenExpirySoon: 0,
authFailures: 0,
lastTokenRefresh: null
},
performance: {
averageConnectionTime: 0,
averageLatency: 0,
connectionSuccess: 0,
connectionFailures: 0,
reconnectAttempts: 0
},
resources: {
activeSubscriptions: new Set(),
activeResourceGroups: new Set(),
activeRegions: new Set(),
quotaUtilization: {}
},
errors: {
azureApiErrors: 0,
networkErrors: 0,
authenticationErrors: 0,
configurationErrors: 0
},
costs: {
estimatedSessionCosts: {},
totalEstimatedCost: 0,
costOptimizationSuggestions: []
}
};
this.startMonitoring();
}
/**
* Register a new Azure session for monitoring
*/
registerSession(session: AzureCloudShellSession | AzureBastionSession | AzureArcSession): void {
this.sessions.set(session.sessionId, session);
// Update metrics
this.metrics.sessionCount.total++;
// Determine session type
let sessionType: string;
if ('webSocketUrl' in session) {
sessionType = 'cloud-shell';
} else if ('bastionResourceId' in session) {
sessionType = 'bastion';
} else {
sessionType = 'arc';
}
this.metrics.sessionCount.byType[sessionType] = (this.metrics.sessionCount.byType[sessionType] || 0) + 1;
// Track resources
if ('subscription' in session) {
this.metrics.resources.activeSubscriptions.add(session.subscription);
}
if ('resourceGroup' in session) {
this.metrics.resources.activeResourceGroups.add(session.resourceGroup);
}
if ('location' in session) {
this.metrics.resources.activeRegions.add(session.location);
}
this.logger.debug(`Registered Azure ${sessionType} session for monitoring: ${session.sessionId}`);
}
/**
* Unregister an Azure session from monitoring
*/
unregisterSession(sessionId: string): void {
const session = this.sessions.get(sessionId);
if (!session) {
return;
}
this.sessions.delete(sessionId);
this.connectionStats.delete(sessionId);
this.metrics.sessionCount.total--;
// Update session type count
let sessionType: string;
if ('webSocketUrl' in session) {
sessionType = 'cloud-shell';
} else if ('bastionResourceId' in session) {
sessionType = 'bastion';
} else {
sessionType = 'arc';
}
if (this.metrics.sessionCount.byType[sessionType] > 0) {
this.metrics.sessionCount.byType[sessionType]--;
}
this.logger.debug(`Unregistered Azure session from monitoring: ${sessionId}`);
}
/**
* Record authentication event
*/
recordAuthenticationEvent(eventType: 'token-refresh' | 'auth-failure', tokenInfo?: AzureTokenInfo): void {
switch (eventType) {
case 'token-refresh':
this.metrics.authentication.tokenRefreshCount++;
this.metrics.authentication.lastTokenRefresh = new Date();
if (tokenInfo) {
this.checkTokenExpiry(tokenInfo);
}
break;
case 'auth-failure':
this.metrics.authentication.authFailures++;
this.metrics.errors.authenticationErrors++;
break;
}
}
/**
* Record connection event
*/
recordConnectionEvent(sessionId: string, eventType: 'start' | 'success' | 'failure', latency?: number): void {
switch (eventType) {
case 'start':
this.connectionStats.set(sessionId, { startTime: new Date(), success: false });
break;
case 'success':
const successStat = this.connectionStats.get(sessionId);
if (successStat) {
successStat.success = true;
successStat.endTime = new Date();
successStat.latency = latency || (successStat.endTime.getTime() - successStat.startTime.getTime());
this.metrics.performance.connectionSuccess++;
this.updatePerformanceMetrics();
}
break;
case 'failure':
const failureStat = this.connectionStats.get(sessionId);
if (failureStat) {
failureStat.success = false;
failureStat.endTime = new Date();
this.metrics.performance.connectionFailures++;
this.checkConnectionFailureRate();
}
break;
}
}
/**
* Record error event
*/
recordErrorEvent(errorType: 'api' | 'network' | 'authentication' | 'configuration', error: Error): void {
switch (errorType) {
case 'api':
this.metrics.errors.azureApiErrors++;
break;
case 'network':
this.metrics.errors.networkErrors++;
break;
case 'authentication':
this.metrics.errors.authenticationErrors++;
break;
case 'configuration':
this.metrics.errors.configurationErrors++;
break;
}
this.logger.warn(`Azure ${errorType} error recorded:`, error.message);
}
/**
* Update estimated costs
*/
updateCostEstimates(sessionId: string, costEstimate: number): void {
this.metrics.costs.estimatedSessionCosts[sessionId] = costEstimate;
this.metrics.costs.totalEstimatedCost = Object.values(this.metrics.costs.estimatedSessionCosts)
.reduce((sum, cost) => sum + cost, 0);
if (this.metrics.costs.totalEstimatedCost > this.alertConfig.costThresholdDaily) {
this.emit('cost-alert', this.metrics.costs.totalEstimatedCost);
}
}
/**
* Get current metrics
*/
getMetrics(): AzureMonitoringMetrics {
return {
...this.metrics,
// Convert Sets to arrays for serialization
resources: {
...this.metrics.resources,
activeSubscriptions: new Set(Array.from(this.metrics.resources.activeSubscriptions)),
activeResourceGroups: new Set(Array.from(this.metrics.resources.activeResourceGroups)),
activeRegions: new Set(Array.from(this.metrics.resources.activeRegions))
}
};
}
/**
* Perform comprehensive health check
*/
async performHealthCheck(): Promise<AzureHealthCheck> {
const result: AzureHealthCheck = {
timestamp: new Date(),
overall: 'healthy',
checks: {
authentication: this.checkAuthentication(),
connectivity: this.checkConnectivity(),
resources: this.checkResources(),
performance: this.checkPerformance()
},
recommendations: []
};
// Determine overall health
const checks = Object.values(result.checks);
const failCount = checks.filter(check => check.status === 'fail').length;
const warnCount = checks.filter(check => check.status === 'warn').length;
if (failCount > 0) {
result.overall = 'critical';
} else if (warnCount > 0) {
result.overall = 'warning';
}
// Generate recommendations
result.recommendations = this.generateRecommendations(result.checks);
this.emit('health-check-completed', result);
return result;
}
/**
* Start monitoring
*/
private startMonitoring(): void {
if (this.monitoringInterval) {
return;
}
this.monitoringInterval = setInterval(() => {
this.performRoutineChecks();
}, 60000); // Check every minute
this.logger.info('Azure monitoring started');
}
/**
* Stop monitoring
*/
stop(): void {
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
this.monitoringInterval = null;
}
this.logger.info('Azure monitoring stopped');
}
/**
* Perform routine monitoring checks
*/
private performRoutineChecks(): void {
// Check token expiry for all sessions
this.sessions.forEach((session) => {
const hoursUntilExpiry = (session.tokenExpiry.getTime() - Date.now()) / (1000 * 60 * 60);
if (hoursUntilExpiry <= this.alertConfig.tokenExpiryWarningHours) {
this.emit('token-expiry-warning', session.sessionId, hoursUntilExpiry);
}
});
// Update cost optimization suggestions
this.updateCostOptimizationSuggestions();
}
/**
* Check token expiry
*/
private checkTokenExpiry(tokenInfo: AzureTokenInfo): void {
const hoursUntilExpiry = (tokenInfo.expiresOn.getTime() - Date.now()) / (1000 * 60 * 60);
if (hoursUntilExpiry <= this.alertConfig.tokenExpiryWarningHours) {
this.metrics.authentication.tokenExpirySoon++;
}
}
/**
* Check connection failure rate
*/
private checkConnectionFailureRate(): void {
const totalConnections = this.metrics.performance.connectionSuccess + this.metrics.performance.connectionFailures;
if (totalConnections > 0) {
const failureRate = this.metrics.performance.connectionFailures / totalConnections;
if (failureRate > this.alertConfig.maxConnectionFailureRate) {
this.emit('connection-failure-threshold', failureRate);
}
}
}
/**
* Update performance metrics
*/
private updatePerformanceMetrics(): void {
const latencies = Array.from(this.connectionStats.values())
.filter(stat => stat.success && stat.latency)
.map(stat => stat.latency!);
if (latencies.length > 0) {
this.metrics.performance.averageLatency = latencies.reduce((sum, lat) => sum + lat, 0) / latencies.length;
if (this.metrics.performance.averageLatency > this.alertConfig.maxLatencyMs) {
this.emit('performance-degradation', this.metrics.performance.averageLatency);
}
}
}
/**
* Check authentication health
*/
private checkAuthentication(): AzureHealthCheck['checks']['authentication'] {
const tokensExpiring = Array.from(this.sessions.values())
.filter(session => (session.tokenExpiry.getTime() - Date.now()) / (1000 * 60 * 60) <= this.alertConfig.tokenExpiryWarningHours)
.length;
if (tokensExpiring > 0) {
return {
status: 'warn',
message: `${tokensExpiring} token(s) expiring soon`,
tokensExpiringSoon: tokensExpiring,
lastRefresh: this.metrics.authentication.lastTokenRefresh || undefined
};
}
if (this.metrics.authentication.authFailures > 5) {
return {
status: 'fail',
message: `High number of authentication failures: ${this.metrics.authentication.authFailures}`,
lastRefresh: this.metrics.authentication.lastTokenRefresh || undefined
};
}
return {
status: 'pass',
message: 'Authentication is healthy',
tokensExpiringSoon: 0,
lastRefresh: this.metrics.authentication.lastTokenRefresh || undefined
};
}
/**
* Check connectivity health
*/
private checkConnectivity(): AzureHealthCheck['checks']['connectivity'] {
const totalConnections = this.metrics.performance.connectionSuccess + this.metrics.performance.connectionFailures;
const failureRate = totalConnections > 0 ? this.metrics.performance.connectionFailures / totalConnections : 0;
if (failureRate > this.alertConfig.maxConnectionFailureRate) {
return {
status: 'fail',
message: `High connection failure rate: ${(failureRate * 100).toFixed(1)}%`,
activeConnections: this.sessions.size,
failureRate
};
}
if (failureRate > this.alertConfig.maxConnectionFailureRate * 0.5) {
return {
status: 'warn',
message: `Elevated connection failure rate: ${(failureRate * 100).toFixed(1)}%`,
activeConnections: this.sessions.size,
failureRate
};
}
return {
status: 'pass',
message: 'Connectivity is healthy',
activeConnections: this.sessions.size,
failureRate
};
}
/**
* Check resources health
*/
private checkResources(): AzureHealthCheck['checks']['resources'] {
const highUtilization = Object.entries(this.metrics.resources.quotaUtilization)
.filter(([_, utilization]) => utilization > this.alertConfig.quotaUtilizationThreshold);
if (highUtilization.length > 0) {
return {
status: 'warn',
message: `High quota utilization in ${highUtilization.length} resource(s)`,
quotaUtilization: this.metrics.resources.quotaUtilization,
recommendations: highUtilization.map(([resource, util]) =>
`Consider increasing quota or reducing usage for ${resource} (${(util * 100).toFixed(1)}% utilized)`
)
};
}
return {
status: 'pass',
message: 'Resource utilization is healthy',
quotaUtilization: this.metrics.resources.quotaUtilization
};
}
/**
* Check performance health
*/
private checkPerformance(): AzureHealthCheck['checks']['performance'] {
const totalConnections = this.metrics.performance.connectionSuccess + this.metrics.performance.connectionFailures;
const successRate = totalConnections > 0 ? this.metrics.performance.connectionSuccess / totalConnections : 1;
if (this.metrics.performance.averageLatency > this.alertConfig.maxLatencyMs) {
return {
status: 'fail',
message: `High average latency: ${this.metrics.performance.averageLatency}ms`,
averageLatency: this.metrics.performance.averageLatency,
connectionSuccessRate: successRate
};
}
if (this.metrics.performance.averageLatency > this.alertConfig.maxLatencyMs * 0.7) {
return {
status: 'warn',
message: `Elevated average latency: ${this.metrics.performance.averageLatency}ms`,
averageLatency: this.metrics.performance.averageLatency,
connectionSuccessRate: successRate
};
}
return {
status: 'pass',
message: 'Performance is healthy',
averageLatency: this.metrics.performance.averageLatency,
connectionSuccessRate: successRate
};
}
/**
* Generate recommendations based on health check results
*/
private generateRecommendations(checks: AzureHealthCheck['checks']): string[] {
const recommendations: string[] = [];
if (checks.authentication.status !== 'pass') {
recommendations.push('Consider implementing proactive token refresh before expiry');
}
if (checks.connectivity.status !== 'pass') {
recommendations.push('Review network configuration and Azure service health');
}
if (checks.performance.status !== 'pass') {
recommendations.push('Consider optimizing connection strategies or using regional endpoints');
}
if (this.metrics.costs.totalEstimatedCost > this.alertConfig.costThresholdDaily * 0.8) {
recommendations.push('Review session usage patterns to optimize costs');
}
return recommendations;
}
/**
* Update cost optimization suggestions
*/
private updateCostOptimizationSuggestions(): void {
const suggestions: string[] = [];
// Suggest session consolidation if many small sessions
const activeSessionCount = this.sessions.size;
if (activeSessionCount > 10) {
suggestions.push('Consider consolidating multiple short sessions into fewer long-running sessions');
}
// Suggest region optimization
const regions = Array.from(this.metrics.resources.activeRegions);
if (regions.length > 3) {
suggestions.push('Consider consolidating resources to fewer regions to reduce data transfer costs');
}
// Suggest resource cleanup
const idleSessions = Array.from(this.sessions.entries())
.filter(([_, session]) => {
const lastActivity = session.metadata?.lastActivity as Date;
return lastActivity && (Date.now() - lastActivity.getTime()) > 30 * 60 * 1000; // 30 minutes idle
});
if (idleSessions.length > 0) {
suggestions.push(`Consider closing ${idleSessions.length} idle session(s) to reduce costs`);
}
this.metrics.costs.costOptimizationSuggestions = suggestions;
}
}