Skip to main content
Glama
ooples

MCP Console Automation Server

RetryManager.ts17.8 kB
import { EventEmitter } from 'events'; import { Logger } from '../utils/logger.js'; export interface RetryContext { sessionId: string; operation: string; attemptNumber: number; maxAttempts: number; error?: Error; startTime: number; metadata?: Record<string, any>; } export interface RetryStrategy { maxAttempts: number; baseDelay: number; maxDelay: number; backoffMultiplier: number; jitterMax: number; circuitBreakerThreshold: number; circuitBreakerWindow: number; shouldRetry: (error: Error, context: RetryContext) => boolean; } export interface CircuitBreakerState { state: 'closed' | 'open' | 'half-open'; failures: number; lastFailureTime: number; nextAttemptTime: number; successCount: number; } export class RetryManager extends EventEmitter { private logger: Logger; private circuitBreakers: Map<string, CircuitBreakerState>; private retryStrategies: Map<string, RetryStrategy>; private activeRetries: Map<string, RetryContext>; // Default strategies for different error types private static readonly DEFAULT_STRATEGIES: Record<string, RetryStrategy> = { network: { maxAttempts: 5, baseDelay: 1000, maxDelay: 16000, backoffMultiplier: 2, jitterMax: 1000, circuitBreakerThreshold: 5, circuitBreakerWindow: 60000, shouldRetry: (error: Error) => { const networkErrors = [ 'ECONNREFUSED', 'ECONNRESET', 'ETIMEDOUT', 'ENETDOWN', 'ENETUNREACH', 'EHOSTDOWN', 'EHOSTUNREACH', 'ENOTFOUND', 'EAI_AGAIN', ]; return ( networkErrors.some((code) => error.message.includes(code)) || error.message.toLowerCase().includes('network') || error.message.toLowerCase().includes('connection') ); }, }, authentication: { maxAttempts: 2, baseDelay: 2000, maxDelay: 8000, backoffMultiplier: 2, jitterMax: 500, circuitBreakerThreshold: 3, circuitBreakerWindow: 120000, shouldRetry: (error: Error) => { // Check for explicit non-retryable flag if ((error as any).nonRetryable) { return false; } const authErrors = [ 'authentication failed', 'auth failed', 'invalid credentials', 'access denied', 'permission denied', 'unauthorized', ]; const errorMsg = error.message.toLowerCase(); return ( authErrors.some((msg) => errorMsg.includes(msg)) && !errorMsg.includes('invalid password') && !errorMsg.includes('invalid username') ); // Don't retry bad credentials }, }, resource: { maxAttempts: 4, baseDelay: 2000, maxDelay: 32000, backoffMultiplier: 2, jitterMax: 2000, circuitBreakerThreshold: 5, circuitBreakerWindow: 180000, shouldRetry: (error: Error) => { const resourceErrors = [ 'out of memory', 'resource temporarily unavailable', 'too many open files', 'no space left', 'quota exceeded', 'rate limit', 'throttled', ]; return resourceErrors.some((msg) => error.message.toLowerCase().includes(msg) ); }, }, ssh: { maxAttempts: 3, baseDelay: 1500, maxDelay: 12000, backoffMultiplier: 2, jitterMax: 1000, circuitBreakerThreshold: 4, circuitBreakerWindow: 90000, shouldRetry: (error: Error) => { // Check for explicit non-retryable flag first if ((error as any).nonRetryable) { return false; } const sshErrors = [ 'connection refused', 'connection reset', 'connection timeout', 'host unreachable', 'network unreachable', 'ssh connection lost', ]; const errorMsg = error.message.toLowerCase(); return ( sshErrors.some((msg) => errorMsg.includes(msg)) && !errorMsg.includes('permission denied') && !errorMsg.includes('authentication failed') && !errorMsg.includes('ssh password authentication is not supported') ); }, }, generic: { maxAttempts: 3, baseDelay: 1000, maxDelay: 8000, backoffMultiplier: 2, jitterMax: 500, circuitBreakerThreshold: 5, circuitBreakerWindow: 60000, shouldRetry: (error: Error) => { // Check for explicit non-retryable flag if ((error as any).nonRetryable) { return false; } // Generic transient errors const transientErrors = [ 'timeout', 'temporary', 'busy', 'unavailable', 'try again', ]; return transientErrors.some((msg) => error.message.toLowerCase().includes(msg) ); }, }, }; constructor() { super(); this.logger = new Logger('RetryManager'); this.circuitBreakers = new Map(); this.retryStrategies = new Map(); this.activeRetries = new Map(); // Initialize default strategies Object.entries(RetryManager.DEFAULT_STRATEGIES).forEach( ([name, strategy]) => { this.retryStrategies.set(name, strategy); } ); } /** * Execute an operation with retry logic */ async executeWithRetry<T>( operation: () => Promise<T>, options: { sessionId: string; operationName: string; strategyName?: string; context?: Record<string, any>; onRetry?: (context: RetryContext) => void; } ): Promise<T> { const strategyName = options.strategyName || this.classifyOperation(options.operationName); const strategy = this.retryStrategies.get(strategyName) || this.retryStrategies.get('generic')!; const circuitBreakerKey = `${options.sessionId}-${strategyName}`; // Check circuit breaker if (this.isCircuitOpen(circuitBreakerKey)) { const breaker = this.circuitBreakers.get(circuitBreakerKey)!; const timeUntilNextAttempt = Math.max( 0, breaker.nextAttemptTime - Date.now() ); throw new Error( `Circuit breaker is OPEN for ${strategyName}. ` + `Please wait ${Math.ceil(timeUntilNextAttempt / 1000)}s before retrying. ` + `This typically indicates repeated failures that require manual intervention.` ); } const retryKey = `${options.sessionId}-${options.operationName}`; const context: RetryContext = { sessionId: options.sessionId, operation: options.operationName, attemptNumber: 0, maxAttempts: strategy.maxAttempts, startTime: Date.now(), metadata: options.context, }; this.activeRetries.set(retryKey, context); try { return await this.attemptOperation( operation, strategy, context, circuitBreakerKey, options.onRetry ); } finally { this.activeRetries.delete(retryKey); } } private async attemptOperation<T>( operation: () => Promise<T>, strategy: RetryStrategy, context: RetryContext, circuitBreakerKey: string, onRetry?: (context: RetryContext) => void ): Promise<T> { while (context.attemptNumber < strategy.maxAttempts) { context.attemptNumber++; try { this.logger.debug( `Attempting ${context.operation} for session ${context.sessionId} ` + `(attempt ${context.attemptNumber}/${context.maxAttempts})` ); const result = await operation(); // Success - reset circuit breaker this.recordSuccess(circuitBreakerKey); if (context.attemptNumber > 1) { this.logger.info( `${context.operation} succeeded after ${context.attemptNumber} attempts ` + `for session ${context.sessionId}` ); this.emit('retry-success', { ...context, totalDuration: Date.now() - context.startTime, }); } return result; } catch (error) { context.error = error as Error; // Record failure for circuit breaker this.recordFailure(circuitBreakerKey); // Check if we should retry this error if (!strategy.shouldRetry(context.error, context)) { this.logger.warn( `${context.operation} failed with non-retryable error for session ${context.sessionId}: ${context.error.message}` ); this.emit('retry-failed', { ...context, reason: 'non-retryable-error', totalDuration: Date.now() - context.startTime, }); throw this.enhanceError( context.error, context, 'This error type cannot be automatically retried' ); } // Check if we've exhausted retries if (context.attemptNumber >= strategy.maxAttempts) { this.logger.error( `${context.operation} failed after ${context.attemptNumber} attempts ` + `for session ${context.sessionId}: ${context.error.message}` ); this.emit('retry-exhausted', { ...context, totalDuration: Date.now() - context.startTime, }); throw this.enhanceError( context.error, context, `Operation failed after ${context.maxAttempts} retry attempts. ` + `Consider checking your configuration or waiting before retrying manually.` ); } // Calculate delay for next attempt const delay = this.calculateDelay(context.attemptNumber, strategy); this.logger.info( `${context.operation} attempt ${context.attemptNumber} failed for session ${context.sessionId}. ` + `Retrying in ${delay}ms. Error: ${context.error.message}` ); this.emit('retry-attempt', { ...context, delay, nextAttempt: context.attemptNumber + 1, }); if (onRetry) { onRetry(context); } // Wait before next attempt await this.delay(delay); } } // This should never be reached, but TypeScript needs it throw context.error!; } private classifyOperation(operationName: string): string { const networkOps = ['connect', 'send', 'receive', 'request', 'fetch']; const authOps = ['authenticate', 'login', 'auth', 'credential']; const sshOps = ['ssh', 'shell', 'terminal']; const resourceOps = ['spawn', 'create', 'allocate', 'memory']; const opLower = operationName.toLowerCase(); if (sshOps.some((op) => opLower.includes(op))) return 'ssh'; if (networkOps.some((op) => opLower.includes(op))) return 'network'; if (authOps.some((op) => opLower.includes(op))) return 'authentication'; if (resourceOps.some((op) => opLower.includes(op))) return 'resource'; return 'generic'; } private calculateDelay( attemptNumber: number, strategy: RetryStrategy ): number { // Exponential backoff: baseDelay * (backoffMultiplier ^ (attemptNumber - 1)) const exponentialDelay = Math.min( strategy.baseDelay * Math.pow(strategy.backoffMultiplier, attemptNumber - 1), strategy.maxDelay ); // Add jitter to avoid thundering herd const jitter = Math.random() * strategy.jitterMax; return Math.floor(exponentialDelay + jitter); } private delay(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)); } private isCircuitOpen(key: string): boolean { const breaker = this.circuitBreakers.get(key); if (!breaker) return false; const now = Date.now(); if (breaker.state === 'open') { if (now >= breaker.nextAttemptTime) { // Transition to half-open breaker.state = 'half-open'; breaker.successCount = 0; this.logger.info(`Circuit breaker ${key} transitioning to HALF-OPEN`); return false; } return true; } if (breaker.state === 'half-open') { // Allow limited attempts in half-open state return false; } return false; } private recordSuccess(key: string): void { const breaker = this.circuitBreakers.get(key); if (!breaker) return; if (breaker.state === 'half-open') { breaker.successCount++; if (breaker.successCount >= 3) { // Transition back to closed breaker.state = 'closed'; breaker.failures = 0; this.logger.info(`Circuit breaker ${key} transitioning to CLOSED`); } } else if (breaker.state === 'closed') { // Reset failure count on success breaker.failures = Math.max(0, breaker.failures - 1); } } private recordFailure(key: string): void { const strategy = this.getStrategyForKey(key); let breaker = this.circuitBreakers.get(key); if (!breaker) { breaker = { state: 'closed', failures: 0, lastFailureTime: Date.now(), nextAttemptTime: 0, successCount: 0, }; this.circuitBreakers.set(key, breaker); } breaker.failures++; breaker.lastFailureTime = Date.now(); if ( breaker.state === 'closed' && breaker.failures >= strategy.circuitBreakerThreshold ) { // Transition to open breaker.state = 'open'; breaker.nextAttemptTime = Date.now() + strategy.circuitBreakerWindow; this.logger.warn( `Circuit breaker ${key} is now OPEN due to ${breaker.failures} consecutive failures. ` + `Will remain open for ${strategy.circuitBreakerWindow / 1000}s` ); this.emit('circuit-breaker-open', { key, failures: breaker.failures, windowMs: strategy.circuitBreakerWindow, }); } else if (breaker.state === 'half-open') { // Transition back to open breaker.state = 'open'; breaker.nextAttemptTime = Date.now() + strategy.circuitBreakerWindow; this.logger.warn( `Circuit breaker ${key} returning to OPEN state after failure in half-open` ); } } private getStrategyForKey(key: string): RetryStrategy { // Extract strategy name from key format: sessionId-strategyName const parts = key.split('-'); if (parts.length >= 2) { const strategyName = parts[parts.length - 1]; return ( this.retryStrategies.get(strategyName) || this.retryStrategies.get('generic')! ); } return this.retryStrategies.get('generic')!; } private enhanceError( originalError: Error, context: RetryContext, userGuidance: string ): Error { const enhancedError = new Error( `${originalError.message}\n\n` + `Operation: ${context.operation}\n` + `Session: ${context.sessionId}\n` + `Attempts: ${context.attemptNumber}/${context.maxAttempts}\n` + `Duration: ${Date.now() - context.startTime}ms\n\n` + `Guidance: ${userGuidance}\n\n` + `To resolve this issue:\n` + `1. Check your network connection and firewall settings\n` + `2. Verify credentials and permissions\n` + `3. Check if the target system is accessible and responsive\n` + `4. Consider waiting a few minutes before retrying manually\n` + `5. Review the system logs for additional error details` ); enhancedError.name = originalError.name; enhancedError.stack = originalError.stack; return enhancedError; } /** * Register a custom retry strategy */ registerStrategy(name: string, strategy: RetryStrategy): void { this.retryStrategies.set(name, strategy); this.logger.info(`Registered retry strategy: ${name}`); } /** * Get current circuit breaker states */ getCircuitBreakerStates(): Record<string, CircuitBreakerState> { const states: Record<string, CircuitBreakerState> = {}; this.circuitBreakers.forEach((state, key) => { states[key] = { ...state }; }); return states; } /** * Get active retry operations */ getActiveRetries(): RetryContext[] { return Array.from(this.activeRetries.values()); } /** * Manually reset a circuit breaker */ resetCircuitBreaker(key: string): boolean { const breaker = this.circuitBreakers.get(key); if (breaker) { breaker.state = 'closed'; breaker.failures = 0; breaker.successCount = 0; this.logger.info(`Manually reset circuit breaker: ${key}`); return true; } return false; } /** * Reset all circuit breakers */ resetAllCircuitBreakers(): void { this.circuitBreakers.forEach((breaker, key) => { breaker.state = 'closed'; breaker.failures = 0; breaker.successCount = 0; }); this.logger.info('Reset all circuit breakers'); } /** * Get retry statistics */ getRetryStats(): { activeRetries: number; circuitBreakers: { closed: number; open: number; halfOpen: number }; strategies: string[]; } { const circuitBreakers = { closed: 0, open: 0, halfOpen: 0 }; this.circuitBreakers.forEach((breaker) => { if (breaker.state === 'closed') circuitBreakers.closed++; else if (breaker.state === 'open') circuitBreakers.open++; else circuitBreakers.halfOpen++; }); return { activeRetries: this.activeRetries.size, circuitBreakers, strategies: Array.from(this.retryStrategies.keys()), }; } /** * Clean up resources */ destroy(): void { this.removeAllListeners(); this.circuitBreakers.clear(); this.activeRetries.clear(); this.logger.info('RetryManager destroyed'); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ooples/mcp-console-automation'

If you have feedback or need assistance with the MCP directory API, please join our Discord server