ARC Config MCP Server

arc-installer-enhanced.ts•233 kB

/** * Enhanced ARC Installer Service with Comprehensive Troubleshooting * * This enhanced version includes extensive troubleshooting scenarios based on real-world * experience with ARC installations and cleanup operations. */ import type { KubernetesService } from './kubernetes.js'; import type { GitHubService } from './github.js'; import { CommandExecutor } from '../utils/command-executor.js'; import { ArcInstaller, type InstallationOptions, type InstallationState, type InstallationPhase } from './arc-installer.js'; export interface TroubleshootingScenario { name: string; pattern: RegExp; severity: 'critical' | 'high' | 'medium' | 'low'; description: string; solution: string[]; autoFix?: () => Promise<boolean>; } export interface TroubleshootingResult { issue: string; severity: 'critical' | 'high' | 'medium' | 'low'; description: string; solutions: string[]; autoFixApplied: boolean; autoFixSuccessful?: boolean; } export interface EnhancedCleanupState { phases: Record<string, InstallationPhase>; removedComponents: string[]; preservedComponents: string[]; totalTime: number; warnings: string[]; errors: string[]; troubleshootingResults: TroubleshootingResult[]; recoveryActions: string[]; } /** * Enhanced ARC Installer with comprehensive troubleshooting capabilities */ export class EnhancedArcInstaller extends ArcInstaller { private troubleshootingScenarios: TroubleshootingScenario[] = []; private kubernetesService: KubernetesService; private githubService: GitHubService; constructor(kubernetes: KubernetesService, github: GitHubService, logger: any) { super(kubernetes, github, logger); this.kubernetesService = kubernetes; this.githubService = github; this.initializeTroubleshootingScenarios(); } /** * Initialize comprehensive troubleshooting scenarios based on real-world experience */ private initializeTroubleshootingScenarios(): void { this.troubleshootingScenarios = [ // Namespace Terminating Issues (from our experience) { name: 'Namespace Stuck Terminating', pattern: /namespace.*terminating/i, severity: 'high', description: 'Namespace is stuck in Terminating state due to finalizers or orphaned resources', solution: [ 'Identify resources with finalizers that are blocking deletion', 'Check for orphaned custom resources (runners, runnerdeployments)', 'Force remove finalizers from stuck resources', 'Use kubectl patch to remove namespace finalizers if needed' ], autoFix: async () => await this.autoFixStuckNamespace() }, // Image Pull Issues (from our experience) { name: 'Image Pull Authentication Failed', pattern: /(ImagePullBackOff|ErrImagePull).*unauthorized/i, severity: 'critical', description: 'Container images cannot be pulled due to registry authentication issues', solution: [ 'Check GitHub Container Registry (GHCR) authentication', 'Verify GitHub token has packages:read permission', 'Try alternative image repositories (Docker Hub, Quay.io)', 'Use specific image versions instead of latest tags', 'Configure image pull secrets if needed' ], autoFix: async () => await this.autoFixImagePullIssues() }, // Certificate Manager Issues { name: 'Cert-Manager Not Ready', pattern: /(cert-manager.*not ready|webhook.*not ready)/i, severity: 'high', description: 'cert-manager is not fully operational, blocking ARC installation', solution: [ 'Wait for cert-manager pods to be ready', 'Check cert-manager webhook accessibility', 'Verify cert-manager CRDs are installed', 'Test cert-manager with a simple Issuer resource' ], autoFix: async () => await this.autoFixCertManager() }, // Helm Installation Issues { name: 'Helm Installation Timeout', pattern: /(helm.*timeout|installation.*timeout)/i, severity: 'medium', description: 'Helm installation is taking too long, likely due to resource constraints', solution: [ 'Increase Helm timeout values', 'Check cluster resource availability', 'Verify all dependencies are satisfied', 'Consider using specific chart versions' ], autoFix: async () => await this.autoFixHelmTimeout() }, // Pod Security Standards Issues { name: 'Pod Security Standards Violation', pattern: /(pod security.*violation|security context.*invalid)/i, severity: 'medium', description: 'Pod security configuration violates cluster security policies', solution: [ 'Adjust pod security context settings', 'Use privileged namespace if necessary', 'Configure appropriate runAsUser and runAsGroup', 'Set proper security context capabilities' ], autoFix: async () => await this.autoFixPodSecurity() }, // GitHub Token Issues { name: 'GitHub Token Invalid or Expired', pattern: /(github.*unauthorized|token.*invalid|authentication.*failed)/i, severity: 'critical', description: 'GitHub token is invalid, expired, or has insufficient permissions', solution: [ 'Verify GitHub token is not expired', 'Check token has required permissions (repo, admin:org)', 'Regenerate GitHub token if necessary', 'Ensure token is correctly configured in secret' ], autoFix: async () => await this.autoFixGitHubToken() }, // Resource Limits Issues { name: 'Insufficient Cluster Resources', pattern: /(insufficient.*resources|cpu.*memory.*limits)/i, severity: 'medium', description: 'Cluster does not have sufficient CPU or memory resources', solution: [ 'Scale cluster nodes or increase node capacity', 'Adjust resource requests and limits', 'Remove unnecessary workloads', 'Use resource quotas and limits ranges' ], autoFix: async () => await this.autoFixResourceLimits() }, // Network Policy Issues { name: 'Network Connectivity Problems', pattern: /(network.*policy.*denied|connection.*refused)/i, severity: 'medium', description: 'Network policies are blocking required connections', solution: [ 'Review and adjust network policies', 'Ensure ingress/egress rules allow required traffic', 'Check DNS resolution within cluster', 'Verify service discovery is working' ], autoFix: async () => await this.autoFixNetworkPolicy() }, // Custom Resource Definition Issues { name: 'CRD Version Conflicts', pattern: /(crd.*version.*conflict|custom resource.*invalid)/i, severity: 'high', description: 'Custom Resource Definitions have version conflicts or are invalid', solution: [ 'Update CRDs to compatible versions', 'Remove conflicting CRD versions', 'Reinstall ARC with proper CRD management', 'Check for deprecated API versions' ], autoFix: async () => await this.autoFixCRDConflicts() }, // Webhook Configuration Issues { name: 'Webhook Configuration Problems', pattern: /(webhook.*configuration.*invalid|admission.*controller.*failed)/i, severity: 'high', description: 'Admission webhooks are misconfigured or not accessible', solution: [ 'Check webhook endpoint accessibility', 'Verify webhook certificates are valid', 'Ensure webhook service is running', 'Review webhook failure policy' ], autoFix: async () => await this.autoFixWebhookConfig() }, // Runner Registration Issues { name: 'Runner Registration Failures', pattern: /(runner.*registration.*failed|github.*api.*error)/i, severity: 'medium', description: 'Runners cannot register with GitHub due to configuration issues', solution: [ 'Verify GitHub organization/repository configuration', 'Check runner group permissions', 'Ensure proper GitHub App or token permissions', 'Review runner labels and configurations' ], autoFix: async () => await this.autoFixRunnerRegistration() } ]; } /** * Enhanced cleanup with comprehensive troubleshooting */ async cleanupArcWithTroubleshooting(options: any = {}): Promise<any> { const startTime = Date.now(); this.log('🔧 Starting enhanced ARC cleanup with comprehensive troubleshooting'); const cleanupState: EnhancedCleanupState = { phases: { 'validation': { name: 'Pre-cleanup Validation', status: 'pending', errors: [], warnings: [], aiInsights: [] }, 'troubleshooting': { name: 'Issue Detection & Resolution', status: 'pending', errors: [], warnings: [], aiInsights: [] }, 'forced_cleanup': { name: 'Forced Resource Cleanup', status: 'pending', errors: [], warnings: [], aiInsights: [] }, 'finalizer_removal': { name: 'Finalizer Removal', status: 'pending', errors: [], warnings: [], aiInsights: [] }, 'namespace_force_delete': { name: 'Namespace Force Deletion', status: 'pending', errors: [], warnings: [], aiInsights: [] }, 'verification': { name: 'Final Verification', status: 'pending', errors: [], warnings: [], aiInsights: [] } }, removedComponents: [], preservedComponents: [], totalTime: 0, warnings: [], errors: [], troubleshootingResults: [], recoveryActions: [] }; try { const namespace = options.namespace || 'arc-systems'; // Phase 0: CRITICAL - Remove webhooks FIRST to prevent finalizer conflicts this.log('🔑 Phase 0: Pre-emptive webhook removal to prevent finalizer conflicts'); await this.fastCleanupWebhooks(); await new Promise(resolve => setTimeout(resolve, 3000)); // Allow webhook removal to propagate // Phase 1: Enhanced validation with issue detection await this.performEnhancedValidation(cleanupState, namespace); // Phase 2: Comprehensive troubleshooting await this.performComprehensiveTroubleshooting(cleanupState, namespace); // Phase 3: Forced cleanup with recovery await this.performForcedCleanup(cleanupState, namespace, options); // Phase 4: Finalizer removal (now safe without webhooks) await this.performFinalizerRemoval(cleanupState, namespace); // Phase 5: Namespace force deletion await this.performNamespaceForceDelete(cleanupState, namespace, options); // Phase 6: Final verification // Phase 6: Final verification await this.performFinalVerification(cleanupState, namespace); // Phase 7: Orphaned resources cleanup await this.cleanupOrphanedResources(); return { success: true, message: 'Enhanced ARC cleanup completed successfully with comprehensive troubleshooting', cleanupState, totalTime: cleanupState.totalTime, summary: { removed: cleanupState.removedComponents, preserved: cleanupState.preservedComponents, warnings: cleanupState.warnings, errors: cleanupState.errors }, aiInsights: Object.values(cleanupState.phases).flatMap(p => p.aiInsights), troubleshootingResults: cleanupState.troubleshootingResults, recoveryActions: cleanupState.recoveryActions }; } catch (error) { cleanupState.totalTime = (Date.now() - startTime) / 1000; const errorMessage = error instanceof Error ? error.message : String(error); cleanupState.errors.push(errorMessage); this.log(`❌ Enhanced cleanup failed: ${errorMessage}`, 'error'); throw error; } } /** * Clean up orphaned cluster-scoped resources - FAST VERSION */ private async cleanupOrphanedResources(): Promise<void> { this.log('🧹 Fast cleanup of orphaned cluster-scoped resources...'); try { // Fast parallel cleanup of remaining cluster-scoped resources (webhooks already handled in Phase 0) const cleanupPromises = [ // CRDs this.fastCleanupCRDs(), // Cluster roles and bindings this.fastCleanupClusterRoles(), // Any remaining global resources this.fastCleanupGlobalResources() ]; // Run all cleanup operations in parallel with allSettled to not fail on errors const results = await Promise.allSettled(cleanupPromises); let successCount = 0; results.forEach((result, index) => { if (result.status === 'fulfilled') { successCount++; } else { this.log(`⚠️ Fast cleanup ${index} failed: ${result.reason}`, 'warning'); } }); this.log(`✅ Fast orphaned resource cleanup completed: ${successCount}/${results.length} operations succeeded`); } catch (error) { this.log(`⚠️ Fast orphaned resource cleanup failed: ${error}`, 'warning'); } } /** * Fast cleanup of ARC CRDs */ private async fastCleanupCRDs(): Promise<void> { try { // Update to new official GitHub ARC CRDs (actions.github.com) const crdNames = [ 'runners.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com', // Legacy CRDs to clean up if they exist 'runners.actions.summerwind.dev', 'runnerdeployments.actions.summerwind.dev', 'horizontalrunnerautoscalers.actions.summerwind.dev' ]; for (const crd of crdNames) { try { await this.commandExecutor.kubectl(`delete crd ${crd} --ignore-not-found`); this.log(`✅ Removed CRD: ${crd}`); } catch (error) { // Continue with next CRD } } } catch (error) { this.log(`⚠️ CRD cleanup failed: ${error}`, 'warning'); } } /** * Fast cleanup of cluster roles */ private async fastCleanupClusterRoles(): Promise<void> { try { // Delete ARC-related cluster roles await this.commandExecutor.kubectl(`delete clusterrole -l app.kubernetes.io/name=actions-runner-controller --ignore-not-found`); await this.commandExecutor.kubectl(`delete clusterrolebinding -l app.kubernetes.io/name=actions-runner-controller --ignore-not-found`); this.log(`✅ Cleaned up cluster roles and bindings`); } catch (error) { this.log(`⚠️ Cluster role cleanup failed: ${error}`, 'warning'); } } /** * Fast cleanup of webhooks */ /** * Ultra-fast webhook cleanup with parallel operations and minimal delays */ private async fastCleanupWebhooks(): Promise<void> { try { this.log('🚀 Ultra-fast webhook and Helm cleanup...'); // Execute cleanup operations in parallel for maximum speed const cleanupPromises = [ // Parallel Helm cleanup this.ultraFastHelmGlobalCleanup(), // Parallel webhook cleanup this.ultraFastWebhookCleanup(), // Parallel CRD cleanup (if any exist) this.ultraFastCRDCleanup() ]; const results = await Promise.allSettled(cleanupPromises); let successCount = 0; results.forEach((result, index) => { if (result.status === 'fulfilled') { successCount++; } else { this.log(`⚠️ Cleanup operation ${index + 1} failed: ${result.reason}`, 'warning'); } }); this.log(`✅ Ultra-fast webhook cleanup completed: ${successCount}/3 operations successful`); } catch (error) { this.log(`⚠️ Webhook cleanup completed with warnings: ${error}`, 'warning'); } // Remove hardcoded delay - no need to wait for propagation with our optimized approach // await new Promise(resolve => setTimeout(resolve, 3000)); // REMOVED } /** * Ultra-fast Helm global cleanup */ private async ultraFastHelmGlobalCleanup(): Promise<void> { try { // Get all releases across all namespaces in one command const helmResult = await this.commandExecutor.helm('list -A --output json --timeout 3s'); if (!helmResult.stdout.trim()) { this.log('✅ No Helm releases found'); return; } const releases = JSON.parse(helmResult.stdout); const arcReleases = releases.filter((release: any) => release.name.includes('arc') || release.chart?.includes('actions-runner-controller') || release.name.includes('actions-runner-controller') ); if (arcReleases.length === 0) { this.log('✅ No ARC-related Helm releases found'); return; } // Uninstall all ARC releases in parallel const uninstallPromises = arcReleases.map((release: any) => this.commandExecutor.helm(`uninstall ${release.name} -n ${release.namespace} --timeout=5s --ignore-not-found`) ); await Promise.allSettled(uninstallPromises); this.log(`✅ Cleaned up ${arcReleases.length} ARC Helm releases`); } catch (error) { this.log(`⚠️ Helm cleanup warning: ${error}`, 'warning'); } } /** * Ultra-fast webhook cleanup with parallel deletion */ private async ultraFastWebhookCleanup(): Promise<void> { try { // Delete all webhook configurations in parallel with minimal timeout const webhookPromises = [ this.commandExecutor.kubectlSilently('delete validatingwebhookconfiguration -l app.kubernetes.io/name=actions-runner-controller --timeout=3s --ignore-not-found'), this.commandExecutor.kubectlSilently('delete mutatingwebhookconfiguration -l app.kubernetes.io/name=actions-runner-controller --timeout=3s --ignore-not-found'), // Also clean up any webhooks with ARC-related names this.commandExecutor.kubectlSilently('delete validatingwebhookconfiguration -l app=actions-runner-controller --timeout=3s --ignore-not-found'), this.commandExecutor.kubectlSilently('delete mutatingwebhookconfiguration -l app=actions-runner-controller --timeout=3s --ignore-not-found') ]; await Promise.allSettled(webhookPromises); this.log('✅ Ultra-fast webhook cleanup completed'); } catch (error) { this.log(`⚠️ Webhook cleanup warning: ${error}`, 'warning'); } } /** * Ultra-fast CRD cleanup */ private async ultraFastCRDCleanup(): Promise<void> { try { // Clean up any orphaned ARC CRDs in parallel const crdPromises = [ this.commandExecutor.kubectlSilently('delete crd runners.actions.github.com --timeout=3s --ignore-not-found'), this.commandExecutor.kubectlSilently('delete crd runnerdeployments.actions.github.com --timeout=3s --ignore-not-found'), this.commandExecutor.kubectlSilently('delete crd horizontalrunnerautoscalers.actions.github.com --timeout=3s --ignore-not-found'), // Legacy CRDs this.commandExecutor.kubectlSilently('delete crd runners.actions.summerwind.dev --timeout=3s --ignore-not-found'), this.commandExecutor.kubectlSilently('delete crd runnerdeployments.actions.summerwind.dev --timeout=3s --ignore-not-found'), this.commandExecutor.kubectlSilently('delete crd horizontalrunnerautoscalers.actions.summerwind.dev --timeout=3s --ignore-not-found') ]; await Promise.allSettled(crdPromises); this.log('✅ Ultra-fast CRD cleanup completed'); } catch (error) { this.log(`⚠️ CRD cleanup warning: ${error}`, 'warning'); } } /** * Legacy method - keeping for backward compatibility */ private async legacyFastCleanupWebhooks(): Promise<void> { try { // CRITICAL: Clean up Helm releases FIRST to prevent "cannot re-use a name that is still in use" errors try { // Get list of all releases and track what we've cleaned up const helmResult = await this.commandExecutor.helm('list -A -q'); const releases = helmResult.stdout.split('\n').filter(line => line.trim()); const cleanedReleases = new Set<string>(); for (const release of releases) { if (release.includes('arc') || release.includes('actions-runner-controller')) { try { // Try with known namespaces first const knownNamespaces = ['arc-systems', 'default', 'actions-runner-system']; let uninstalled = false; for (const ns of knownNamespaces) { try { await this.commandExecutor.helm(`uninstall ${release} -n ${ns} --ignore-not-found`); this.log(`✅ Removed Helm release: ${release} from namespace ${ns}`); cleanedReleases.add(release); uninstalled = true; break; } catch (nsError) { // Try next namespace } } // If not found in known namespaces, try without namespace if (!uninstalled) { await this.commandExecutor.helm(`uninstall ${release} --ignore-not-found`); this.log(`✅ Removed Helm release: ${release} (global fallback method)`); cleanedReleases.add(release); } } catch (uninstallError) { this.log(`⚠️ Failed to uninstall Helm release ${release}: ${uninstallError}`, 'warning'); } } } // Only run additional cleanup if 'arc' wasn't already cleaned up if (!cleanedReleases.has('arc')) { try { await this.commandExecutor.helm('uninstall arc -n arc-systems --ignore-not-found'); this.log('✅ Cleaned up arc release from arc-systems namespace'); } catch (error) { try { await this.commandExecutor.helm('uninstall arc --ignore-not-found'); this.log('✅ Cleaned up arc release (global fallback)'); } catch (fallbackError) { // Release doesn't exist, which is fine } } } // Final check: look for any failed or pending installations that might block new installs try { const statusResult = await this.commandExecutor.helm('list -A --all'); if (statusResult.stdout.includes('arc') && (statusResult.stdout.includes('failed') || statusResult.stdout.includes('pending'))) { this.log('⚠️ Found failed or pending Helm releases, attempting cleanup...', 'warning'); // Force delete any problematic arc releases try { await this.commandExecutor.helm('delete arc --purge --no-hooks 2>/dev/null || true'); } catch (purgeError) { // Ignore errors - this is best effort cleanup } } } catch (statusError) { // Ignore errors in status check } } catch (helmError) { this.log(`⚠️ Helm cleanup failed: ${helmError}`, 'warning'); } // Clean up webhooks by label (preferred method) await this.commandExecutor.kubectl(`delete validatingwebhookconfiguration -l app.kubernetes.io/name=actions-runner-controller --ignore-not-found`); await this.commandExecutor.kubectl(`delete mutatingwebhookconfiguration -l app.kubernetes.io/name=actions-runner-controller --ignore-not-found`); // Clean up webhooks by name patterns (fallback for webhooks without proper labels) const validatingWebhookPatterns = [ 'actions-runner-controller-validating-webhook-configuration', 'arc-actions-runner-controller-validating-webhook-configuration' ]; const mutatingWebhookPatterns = [ 'actions-runner-controller-mutating-webhook-configuration', 'arc-actions-runner-controller-mutating-webhook-configuration' ]; for (const pattern of validatingWebhookPatterns) { try { await this.commandExecutor.kubectl(`delete validatingwebhookconfiguration ${pattern} --ignore-not-found`); } catch (patternError) { // Expected for non-existent webhooks } } for (const pattern of mutatingWebhookPatterns) { try { await this.commandExecutor.kubectl(`delete mutatingwebhookconfiguration ${pattern} --ignore-not-found`); } catch (patternError) { // Expected for non-existent webhooks } } // Comprehensive cleanup - find any webhooks containing 'actions-runner-controller' try { const validatingResult = await this.commandExecutor.kubectl('get validatingwebhookconfigurations -o json'); const validating = JSON.parse(validatingResult.stdout); for (const webhook of validating.items || []) { if (webhook.metadata?.name?.includes('actions-runner-controller')) { try { await this.commandExecutor.kubectl(`delete validatingwebhookconfiguration ${webhook.metadata.name}`); this.log(`✅ Removed orphaned validating webhook: ${webhook.metadata.name}`); } catch (deleteError) { this.log(`⚠️ Failed to remove validating webhook ${webhook.metadata.name}: ${deleteError}`, 'warning'); } } } const mutatingResult = await this.commandExecutor.kubectl('get mutatingwebhookconfigurations -o json'); const mutating = JSON.parse(mutatingResult.stdout); for (const webhook of mutating.items || []) { if (webhook.metadata?.name?.includes('actions-runner-controller')) { try { await this.commandExecutor.kubectl(`delete mutatingwebhookconfiguration ${webhook.metadata.name}`); this.log(`✅ Removed orphaned mutating webhook: ${webhook.metadata.name}`); } catch (deleteError) { this.log(`⚠️ Failed to remove mutating webhook ${webhook.metadata.name}: ${deleteError}`, 'warning'); } } } } catch (listError) { this.log(`⚠️ Failed to list webhooks for comprehensive cleanup: ${listError}`, 'warning'); } this.log(`✅ Enhanced webhook cleanup completed`); } catch (error) { this.log(`⚠️ Webhook cleanup failed: ${error}`, 'warning'); } } /** * Fast cleanup of any other global ARC resources */ private async fastCleanupGlobalResources(): Promise<void> { try { // Clean up any remaining global resources with ARC labels await this.commandExecutor.kubectl(`delete clusterrole,clusterrolebinding,crd -l mcp.arc.io/managed=true --ignore-not-found`); this.log(`✅ Cleaned up remaining global ARC resources`); } catch (error) { this.log(`⚠️ Global resource cleanup failed: ${error}`, 'warning'); } } /** * Enhanced installation with detailed error capturing for prerequisite validation */ private async installControllerWithDetailedErrors(options: InstallationOptions = {}): Promise<InstallationState> { try { return await this.installController(options); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); // If this is a prerequisite validation error, enhance it with detailed information if (errorMessage.includes('Prerequisites validation failed')) { // Capture detailed error information from logs const detailedError = await this.captureDetailedPrerequisiteErrors(options, errorMessage); throw new Error(detailedError); } // Re-throw other errors as-is throw error; } } /** * Capture detailed prerequisite error information for better user feedback */ private async captureDetailedPrerequisiteErrors(options: InstallationOptions, originalError: string): Promise<string> { let enhancedError = originalError; const errorDetails: string[] = []; try { // Test GitHub token specifically if that seems to be the issue if (options.githubToken) { try { await this.githubService.getCurrentUser(options.githubToken); errorDetails.push('✅ GitHub token is valid and can authenticate'); } catch (githubError) { const githubMessage = githubError instanceof Error ? githubError.message : String(githubError); if (githubMessage.includes('401')) { errorDetails.push('❌ GitHub token authentication failed: Invalid or expired token'); errorDetails.push('💡 Token format should start with "ghp_" or "github_pat_"'); errorDetails.push('💡 Generate a new token at: https://github.com/settings/tokens'); } else if (githubMessage.includes('403')) { errorDetails.push('❌ GitHub token lacks required permissions'); errorDetails.push('💡 Required permissions: Administration (read), Self-hosted runners (read/write)'); } else { errorDetails.push(`❌ GitHub API error: ${githubMessage}`); } } // Test organization access if specified if (options.organizationName) { try { const response = await fetch(`https://api.github.com/orgs/${options.organizationName}`, { headers: { 'Authorization': `token ${options.githubToken}`, 'Accept': 'application/vnd.github.v3+json' } }); if (response.status === 404) { errorDetails.push(`❌ Organization '${options.organizationName}' not found or token lacks access`); } else if (response.status === 403) { errorDetails.push(`❌ Token lacks permission to access organization '${options.organizationName}'`); } else if (response.ok) { errorDetails.push(`✅ Organization '${options.organizationName}' is accessible`); } } catch (orgError) { errorDetails.push(`❌ Failed to verify organization access: ${orgError}`); } } } else { errorDetails.push('❌ GitHub token not provided'); } // Test Kubernetes connectivity try { await this.kubernetesService.getClusterInfo(); errorDetails.push('✅ Kubernetes cluster is accessible'); } catch (k8sError) { const k8sMessage = k8sError instanceof Error ? k8sError.message : String(k8sError); errorDetails.push(`❌ Kubernetes connectivity failed: ${k8sMessage}`); errorDetails.push('💡 Ensure kubectl is configured and cluster is accessible'); } // Test Helm availability try { await this.commandExecutor.helm('version --short'); errorDetails.push('✅ Helm is available'); } catch (helmError) { const helmMessage = helmError instanceof Error ? helmError.message : String(helmError); errorDetails.push(`❌ Helm not available: ${helmMessage}`); errorDetails.push('💡 Install Helm 3.x from https://helm.sh/docs/intro/install/'); } } catch (analysisError) { errorDetails.push(`⚠️ Error during detailed analysis: ${analysisError}`); } // Combine original error with detailed analysis if (errorDetails.length > 0) { enhancedError = `${originalError}\n\n🔍 Detailed Analysis:\n${errorDetails.join('\n')}`; } return enhancedError; } /** * Enhanced installation with troubleshooting and pre-installation analysis */ async installControllerWithTroubleshooting(options: InstallationOptions = {}): Promise<InstallationState> { this.log('🚀 Starting ARC installation with enhanced troubleshooting capabilities'); try { // Pre-installation troubleshooting await this.performPreInstallationTroubleshooting(); // Attempt standard installation with enhanced error capturing const result = await this.installControllerWithDetailedErrors(options); // Post-installation validation await this.performPostInstallationValidation(options); return result; } catch (error) { this.log('❌ Installation failed, analyzing issues...', 'error'); // Check if automatic cleanup is enabled const shouldAutoCleanup = process.env.CLEANUP_ARC === 'true' || options.autoCleanupOnFailure === true; if (shouldAutoCleanup) { this.log('🧹 CLEANUP_ARC=true detected - performing automatic cleanup of failed installation...'); this.log('🔄 This will clean up partial installation and attempt a fresh install...'); try { // Perform automatic cleanup of partial installation const cleanupResult = await this.cleanupArcWithTroubleshooting({ reason: 'Installation failure recovery', force: true, dryRun: false, namespace: options.namespace || 'arc-systems' }); this.log('✅ Automatic cleanup completed successfully'); this.log('🔄 Attempting fresh installation after cleanup...'); // After cleanup, try a fresh installation const freshInstallResult = await this.installController(options); this.log('✅ Fresh installation completed successfully after automatic cleanup'); return freshInstallResult; } catch (cleanupError) { this.log(`❌ Automatic cleanup failed: ${cleanupError}`, 'error'); this.log('⚠️ Falling back to manual troubleshooting...', 'warning'); // Continue with normal troubleshooting flow } } // Perform comprehensive troubleshooting const troubleshootingResults = await this.analyzeInstallationFailure(error, options); // Attempt recovery const recoverySuccessful = await this.attemptInstallationRecovery(troubleshootingResults, options); if (recoverySuccessful) { this.log('✅ Installation recovered successfully after troubleshooting'); return await this.installController(options); } else { this.log('❌ Installation recovery failed', 'error'); // If automatic cleanup is enabled, suggest manual cleanup if (shouldAutoCleanup) { this.log('💡 Consider running a manual cleanup and trying again', 'warning'); } throw new Error(`Installation failed: ${error instanceof Error ? error.message : String(error)}. ${shouldAutoCleanup ? 'Automatic cleanup was attempted but recovery failed.' : 'Set CLEANUP_ARC=true for automatic cleanup on failure.'}`); } } } /** * Analyze cluster state before installation */ async analyzeClusterForInstallation(params: any): Promise<any> { this.log('🔍 Analyzing cluster for ARC installation...'); const analysis: any = { cluster: {}, existingArc: {}, prerequisites: {}, recommendations: [] as string[], risk: {}, timestamp: new Date().toISOString() }; try { // Analyze cluster basics analysis.cluster = await this.analyzeClusterBasics(); // Check for existing ARC resources analysis.existingArc = await this.checkExistingArcResources(); // Check prerequisites analysis.prerequisites = await this.checkInstallationPrerequisites(); // Generate recommendations analysis.recommendations = await this.generateInstallationRecommendations(analysis); // Assess installation risks analysis.risk = await this.assessInstallationRisks(analysis); this.log('✅ Cluster analysis completed successfully'); return analysis; } catch (error) { this.log(`⚠️ Cluster analysis encountered issues: ${error}`, 'warning'); analysis.recommendations.push('Manual cluster validation recommended due to analysis limitations'); return analysis; } } /** * Analyze cluster state before cleanup */ async analyzeClusterForCleanup(params: any): Promise<any> { this.log('🔍 Analyzing cluster for ARC cleanup...'); const analysis: any = { cluster: {}, existingArc: {}, dependencies: {}, recommendations: [] as string[], risk: {}, timestamp: new Date().toISOString() }; try { // Analyze cluster basics analysis.cluster = await this.analyzeClusterBasics(); // Comprehensive ARC resource detection analysis.existingArc = await this.detectAllArcResourcesForAnalysis(); // Check for dependencies and external references analysis.dependencies = await this.checkArcDependencies(); // Generate cleanup recommendations analysis.recommendations = await this.generateCleanupRecommendations(analysis); // Assess cleanup risks analysis.risk = await this.assessCleanupRisks(analysis); this.log('✅ Cluster cleanup analysis completed successfully'); return analysis; } catch (error) { this.log(`⚠️ Cluster cleanup analysis encountered issues: ${error}`, 'warning'); analysis.recommendations.push('Manual cleanup validation recommended due to analysis limitations'); return analysis; } } /** * Generate detailed installation plan */ async generateInstallationPlan(params: any, clusterAnalysis: any): Promise<any> { this.log('📋 Generating detailed installation execution plan...'); const plan: any = { estimatedDuration: '5-10 minutes', complexity: 'Medium', steps: [] as any[], rollbackPlan: { steps: [] as string[] }, requirements: [], postInstallation: [], timestamp: new Date().toISOString() }; try { // Step 1: Namespace preparation plan.steps.push({ name: 'Namespace Preparation', estimatedTime: '30 seconds', risk: 'Low', description: 'Create and configure ARC namespace with proper labels and security policies', commands: [ `kubectl create namespace ${params.namespace || 'arc-systems'}`, `kubectl label namespace ${params.namespace || 'arc-systems'} pod-security.kubernetes.io/enforce=privileged` ], resources: ['Namespace: arc-systems'] }); // Step 2: Helm repository setup plan.steps.push({ name: 'Helm Repository Setup', estimatedTime: '1 minute', risk: 'Low', description: 'Add and update ARC Helm repository', commands: [ 'helm repo add actions-runner-controller https://actions-runner-controller.github.io/actions-runner-controller', 'helm repo update' ], resources: ['Helm Repository: actions-runner-controller'] }); // Step 3: CRD Installation (Handled by Helm automatically) plan.steps.push({ name: 'Custom Resource Definitions', estimatedTime: '1 minute', risk: 'Medium', description: 'Install ARC custom resource definitions via Helm', commands: [ 'echo "CRDs will be installed automatically by Helm chart"' ], resources: ['CRDs: runners, runnerdeployments, horizontalrunnerautoscalers'] }); // Step 4: Secret creation plan.steps.push({ name: 'GitHub Authentication Setup', estimatedTime: '30 seconds', risk: 'Low', description: 'Create GitHub token secret for runner authentication', commands: [ `kubectl create secret generic controller-manager -n ${params.namespace || 'arc-systems'} --from-literal=github_token=${process.env.GITHUB_TOKEN ? '[REDACTED]' : '[REQUIRED]'}` ], resources: ['Secret: controller-manager'] }); // Step 5: Helm installation with release check plan.steps.push({ name: 'ARC Controller Installation', estimatedTime: '2-3 minutes', risk: 'Medium', description: 'Install or upgrade ARC controller using Helm chart', commands: [ `helm list -n ${params.namespace || 'arc-systems'} -q | grep -q "^arc$" && helm upgrade arc actions-runner-controller/actions-runner-controller -n ${params.namespace || 'arc-systems'} --set authSecret.name=controller-manager || helm install arc actions-runner-controller/actions-runner-controller -n ${params.namespace || 'arc-systems'} --set authSecret.name=controller-manager` ], resources: ['Deployment: arc-actions-runner-controller', 'Services, ConfigMaps, RBAC'] }); // Step 6: Verification plan.steps.push({ name: 'Installation Verification', estimatedTime: '1-2 minutes', risk: 'Low', description: 'Verify ARC controller is running and ready', commands: [ `kubectl get pods -n ${params.namespace || 'arc-systems'}`, `kubectl get crd | grep actions.github.com` ], resources: ['Verification of all components'] }); // Generate rollback plan plan.rollbackPlan.steps = [ 'Stop any running workflows that might be using ARC runners', `helm list -n ${params.namespace || 'arc-systems'} -q | grep -q "^arc$" && helm uninstall arc -n ${params.namespace || 'arc-systems'} || echo "No Helm release found"`, `kubectl delete secret controller-manager -n ${params.namespace || 'arc-systems'} --ignore-not-found`, 'kubectl delete crd runners.actions.github.com runnerdeployments.actions.github.com --ignore-not-found', `kubectl delete namespace ${params.namespace || 'arc-systems'} --ignore-not-found` ]; // Adjust plan based on cluster analysis if (clusterAnalysis.existingArc && Object.keys(clusterAnalysis.existingArc).length > 0) { plan.complexity = 'High'; plan.estimatedDuration = '8-15 minutes'; plan.steps.unshift({ name: 'Existing Installation Cleanup', estimatedTime: '3-5 minutes', risk: 'Medium', description: 'Clean up existing ARC installation before proceeding', commands: ['# Cleanup commands will be determined based on existing resources'], resources: ['Cleanup of existing ARC components'] }); } this.log('✅ Installation plan generated successfully'); return plan; } catch (error) { this.log(`⚠️ Error generating installation plan: ${error}`, 'warning'); plan.steps.push({ name: 'Manual Installation Required', estimatedTime: 'Variable', risk: 'High', description: 'Automatic plan generation failed - manual installation steps required', commands: ['# Manual installation required due to planning errors'], resources: ['Manual intervention needed'] }); return plan; } } /** * Generate detailed cleanup plan */ async generateCleanupPlan(params: any, clusterAnalysis: any): Promise<any> { this.log('📋 Generating detailed cleanup execution plan...'); const plan: any = { estimatedDuration: '3-8 minutes', safetyLevel: 'High', backupStrategy: 'Automatic', phases: [] as any[], targetResources: {}, safetyChecks: [] as string[], preservedResources: [] as string[], timestamp: new Date().toISOString() }; try { // Analyze target resources plan.targetResources = await this.categorizeTargetResources(clusterAnalysis); // Phase 1: Pre-cleanup validation plan.phases.push({ name: 'Pre-cleanup Validation', estimatedTime: '30 seconds', risk: 'Low', actions: [ 'Validate cluster connectivity', 'Detect stuck resources', 'Check for blocking finalizers' ], commands: [ 'kubectl cluster-info', `kubectl get all -n ${params.namespace || 'arc-systems'}` ] }); // Phase 2: Issue detection and troubleshooting plan.phases.push({ name: 'Issue Detection & Resolution', estimatedTime: '60 seconds', risk: 'Medium', actions: [ 'Comprehensive resource detection', 'Analyze stuck resources', 'Generate intelligent cleanup strategy' ], commands: [ 'kubectl get runners,autoscalingrunnersets,runnerdeployments -A', 'kubectl get pods -A | grep -E "(arc|runner)"' ] }); // Add remaining phases... // (Implementation continues with proper typing) this.log('✅ Cleanup plan generated successfully'); return plan; } catch (error) { this.log(`⚠️ Error generating cleanup plan: ${error}`, 'warning'); return plan; } } /** * Perform enhanced validation with issue detection */ private async performEnhancedValidation(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔍 Phase 1: Enhanced validation with comprehensive issue detection'); this.updateCleanupPhaseStatus(cleanupState, 'validation', 'running'); try { // Check for common stuck resource patterns const stuckResources = await this.detectStuckResources(namespace); if (stuckResources.length > 0) { cleanupState.warnings.push(`Detected ${stuckResources.length} stuck resources that may need force removal`); cleanupState.phases.validation.aiInsights.push(`🧠 Found stuck resources: ${stuckResources.join(', ')}`); } // Check for namespace terminating status try { const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); if (nsData.status?.phase === 'Terminating') { cleanupState.troubleshootingResults.push({ issue: 'Namespace Stuck Terminating', severity: 'high', description: `Namespace ${namespace} is stuck in Terminating state`, solutions: ['Force remove finalizers', 'Patch namespace finalizers to null', 'Remove orphaned resources'], autoFixApplied: false }); } } catch (error) { // Namespace doesn't exist - this is fine } this.updateCleanupPhaseStatus(cleanupState, 'validation', 'completed'); } catch (error) { this.updateCleanupPhaseStatus(cleanupState, 'validation', 'failed', `Validation failed: ${error}`); } } /** * Perform comprehensive troubleshooting */ private async performComprehensiveTroubleshooting(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔧 Phase 2: Comprehensive troubleshooting and issue resolution'); this.updateCleanupPhaseStatus(cleanupState, 'troubleshooting', 'running'); try { // Analyze all detected issues for (const troubleshootingResult of cleanupState.troubleshootingResults) { this.log(`🔍 Analyzing issue: ${troubleshootingResult.issue}`); // Find matching scenario const scenario = this.troubleshootingScenarios.find(s => s.name === troubleshootingResult.issue ); if (scenario && scenario.autoFix) { this.log(`🛠️ Attempting automatic fix for: ${troubleshootingResult.issue}`); try { const fixSuccessful = await scenario.autoFix(); troubleshootingResult.autoFixApplied = true; troubleshootingResult.autoFixSuccessful = fixSuccessful; if (fixSuccessful) { this.log(`✅ Automatic fix successful for: ${troubleshootingResult.issue}`); cleanupState.recoveryActions.push(`Auto-fixed: ${troubleshootingResult.issue}`); } else { this.log(`⚠️ Automatic fix failed for: ${troubleshootingResult.issue}`); cleanupState.warnings.push(`Manual intervention may be required for: ${troubleshootingResult.issue}`); } } catch (error) { this.log(`❌ Auto-fix error for ${troubleshootingResult.issue}: ${error}`, 'error'); troubleshootingResult.autoFixApplied = true; troubleshootingResult.autoFixSuccessful = false; } } } // Add comprehensive resource detection await this.detectAllArcResources(cleanupState, namespace); this.updateCleanupPhaseStatus(cleanupState, 'troubleshooting', 'completed'); } catch (error) { this.updateCleanupPhaseStatus(cleanupState, 'troubleshooting', 'failed', `Troubleshooting failed: ${error}`); } } /** * Perform forced cleanup with recovery mechanisms - FAST VERSION */ private async performForcedCleanup(cleanupState: EnhancedCleanupState, namespace: string, options: any): Promise<void> { this.log('💪 Phase 3: Forced resource cleanup with recovery mechanisms'); this.updateCleanupPhaseStatus(cleanupState, 'forced_cleanup', 'running'); try { // Fast parallel cleanup with short timeouts await this.fastForceRemoveAllResources(cleanupState, namespace); this.updateCleanupPhaseStatus(cleanupState, 'forced_cleanup', 'completed'); } catch (error) { this.updateCleanupPhaseStatus(cleanupState, 'forced_cleanup', 'failed', `Forced cleanup failed: ${error}`); } } /** * Ultra-fast force removal of all ARC resources with optimized timeouts and parallel processing */ private async fastForceRemoveAllResources(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🚀 Ultra-fast force removing all ARC resources...'); // Optimized timeouts for maximum speed const ULTRA_FAST_TIMEOUT = '2s'; const FAST_TIMEOUT = '5s'; const MAX_GRACE_PERIOD = 1; // 1 second instead of default 30 try { // Strategy 1: Direct namespace deletion (fastest possible) this.log('🗑️ Attempting ultra-fast namespace deletion...'); try { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=${MAX_GRACE_PERIOD} --timeout=${ULTRA_FAST_TIMEOUT} --ignore-not-found`); cleanupState.removedComponents.push(`Namespace ${namespace} (ultra-fast deletion)`); this.log('✅ Ultra-fast namespace deletion succeeded'); return; } catch (error) { this.log(`⚠️ Ultra-fast namespace deletion failed: ${error}`, 'warning'); } // Strategy 2: Parallel helm uninstall + resource detection this.log('📦 Running parallel cleanup operations...'); const parallelOperations = [ // Helm cleanup this.ultraFastHelmCleanup(namespace, cleanupState), // Smart resource detection this.detectCriticalResources(namespace) ]; const [helmResult, criticalResources] = await Promise.allSettled(parallelOperations); if (helmResult.status === 'fulfilled') { this.log('✅ Helm cleanup completed'); } // Strategy 3: Optimized parallel resource deletion based on dependencies const detectedResources = criticalResources.status === 'fulfilled' ? criticalResources.value || [] : []; await this.ultraFastParallelCleanup(namespace, detectedResources); } catch (error) { this.log(`❌ Ultra-fast forced cleanup failed: ${error}`, 'error'); throw error; } } /** * Ultra-fast helm cleanup with minimal timeout */ private async ultraFastHelmCleanup(namespace: string, cleanupState: EnhancedCleanupState): Promise<void> { try { await this.commandExecutor.helm(`uninstall arc -n ${namespace} --timeout=3s --no-hooks`); cleanupState.removedComponents.push('Helm release: arc (ultra-fast)'); this.log('✅ Ultra-fast helm uninstall succeeded'); } catch (error) { this.log(`⚠️ Ultra-fast helm uninstall failed: ${error}`, 'warning'); } } /** * Detect critical resources that need special handling */ private async detectCriticalResources(namespace: string): Promise<string[]> { try { const result = await this.commandExecutor.kubectl(`get all,runners,runnerdeployments -n ${namespace} -o name --ignore-not-found`); return result.stdout.trim().split('\n').filter(line => line.trim()); } catch { return []; } } /** * Ultra-fast parallel cleanup with dependency-aware ordering */ private async ultraFastParallelCleanup(namespace: string, resources: string[]): Promise<void> { const ULTRA_FAST_TIMEOUT = '2s'; const MAX_GRACE_PERIOD = 1; // Wave 1: Custom resources (highest priority to prevent finalizer issues) const wave1Promises = [ this.ultraFastDeleteResource(namespace, 'runnerdeployments', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'runners', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'horizontalrunnerautoscalers', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), ]; await Promise.allSettled(wave1Promises); this.log('✅ Wave 1: Custom resources cleanup completed'); // Wave 2: Standard Kubernetes resources (parallel batch) const wave2Promises = [ this.ultraFastDeleteResource(namespace, 'deployments', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'replicasets', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'pods', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'services', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'configmaps', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD), this.ultraFastDeleteResource(namespace, 'secrets', ULTRA_FAST_TIMEOUT, MAX_GRACE_PERIOD) ]; const wave2Results = await Promise.allSettled(wave2Promises); this.log('✅ Wave 2: Standard resources cleanup completed'); // Report success statistics const totalOperations = wave1Promises.length + wave2Promises.length; const successfulOps = [...await Promise.allSettled(wave1Promises), ...wave2Results] .filter(result => result.status === 'fulfilled').length; this.log(`✅ Ultra-fast parallel deletion completed: ${successfulOps}/${totalOperations} operations succeeded`); } /** * Ultra-fast delete a specific resource type with minimal timeout and grace period */ private async ultraFastDeleteResource(namespace: string, resourceType: string, timeout: string, gracePeriod: number): Promise<void> { try { await this.commandExecutor.kubectlSilently( `delete ${resourceType} --all -n ${namespace} --force --grace-period=${gracePeriod} --timeout=${timeout} --ignore-not-found` ); this.log(`✅ Ultra-fast deleted all ${resourceType}`); } catch (error) { // Don't throw - just log and continue for maximum speed this.log(`⚠️ Ultra-fast delete ${resourceType} failed: ${error}`, 'warning'); } } /** * Legacy method for backward compatibility - now optimized */ private async fastDeleteResource(namespace: string, resourceType: string, timeout: string): Promise<void> { try { // Use --all to delete all resources of this type await this.commandExecutor.kubectl(`delete ${resourceType} --all -n ${namespace} --force --grace-period=0 --ignore-not-found`); this.log(`✅ Fast deleted all ${resourceType}`); } catch (error) { // Don't throw - just log and continue this.log(`⚠️ Fast delete ${resourceType} failed: ${error}`, 'warning'); } } /** * Perform smart finalizer removal - ULTRA-FAST VERSION with detection */ private async performFinalizerRemoval(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔓 Phase 4: Smart finalizer removal with detection (webhooks already removed)'); this.updateCleanupPhaseStatus(cleanupState, 'finalizer_removal', 'running'); try { // OPTIMIZATION 1: Smart detection - only process resources that actually have finalizers const stuckResources = await this.detectStuckResourcesWithFinalizers(namespace); if (stuckResources.length === 0) { this.log('✅ No stuck resources with finalizers detected - skipping finalizer removal entirely'); this.updateCleanupPhaseStatus(cleanupState, 'finalizer_removal', 'completed'); return; } this.log(`🔍 Found ${stuckResources.length} resources with finalizers - processing in parallel`); // OPTIMIZATION 2: Process only stuck resources in parallel await this.ultraFastRemoveFinalizersFromStuckResources(cleanupState, namespace, stuckResources); this.updateCleanupPhaseStatus(cleanupState, 'finalizer_removal', 'completed'); } catch (error) { this.updateCleanupPhaseStatus(cleanupState, 'finalizer_removal', 'failed', `Smart finalizer removal failed: ${error}`); } } /** * Smart detection of resources that actually have finalizers - MAJOR PERFORMANCE OPTIMIZATION */ private async detectStuckResourcesWithFinalizers(namespace: string): Promise<Array<{type: string, name: string}>> { try { // Single optimized command to find all resources with finalizers in one shot const result = await this.commandExecutor.kubectl(` get all,runners,runnerdeployments,horizontalrunnerautoscalers -n ${namespace} -o json 2>/dev/null | jq -r '.items[]? | select(.metadata.finalizers != null and (.metadata.finalizers | length) > 0) | .kind + "/" + .metadata.name' 2>/dev/null || echo "" `.replace(/\s+/g, ' ').trim()); const stuckResources = result.stdout.trim().split('\n') .filter(line => line.trim() && line !== '""') .map(line => { const [type, name] = line.split('/'); return { type: type.toLowerCase(), name }; }) .filter(resource => resource.type && resource.name); this.log(`🔍 Smart detection found ${stuckResources.length} resources with finalizers`); return stuckResources; } catch (error) { this.log(`⚠️ Smart detection failed, falling back to basic check: ${error}`, 'warning'); // Fallback to basic detection return await this.basicFinalizerDetection(namespace); } } /** * Fallback basic detection method */ private async basicFinalizerDetection(namespace: string): Promise<Array<{type: string, name: string}>> { const potentialTypes = ['runners', 'runnerdeployments', 'horizontalrunnerautoscalers', 'pods']; const stuckResources: Array<{type: string, name: string}> = []; for (const type of potentialTypes) { try { const result = await this.commandExecutor.kubectl(`get ${type} -n ${namespace} -o name --ignore-not-found`); if (result.stdout.trim()) { result.stdout.trim().split('\n').forEach(line => { const name = line.replace(`${type}/`, ''); if (name) stuckResources.push({ type, name }); }); } } catch { // Ignore errors for non-existent resource types } } return stuckResources; } /** * Ultra-fast parallel finalizer removal for only stuck resources */ private async ultraFastRemoveFinalizersFromStuckResources( cleanupState: EnhancedCleanupState, namespace: string, stuckResources: Array<{type: string, name: string}> ): Promise<void> { this.log(`🚀 Ultra-fast parallel finalizer removal for ${stuckResources.length} resources`); // Process all stuck resources in parallel with ultra-fast timeouts const finalizerPromises = stuckResources.map(resource => this.ultraFastPatchResourceFinalizers(namespace, resource.type, resource.name) ); const results = await Promise.allSettled(finalizerPromises); let successCount = 0; results.forEach((result, index) => { if (result.status === 'fulfilled') { successCount++; } else { const resource = stuckResources[index]; this.log(`⚠️ Could not remove finalizers from ${resource.type}/${resource.name}: ${result.reason}`, 'warning'); } }); this.log(`✅ Ultra-fast finalizer removal completed: ${successCount}/${stuckResources.length} resources processed`); cleanupState.phases.finalizer_removal.aiInsights.push( `🧠 Smart finalizer removal: ${successCount}/${stuckResources.length} stuck resources processed in parallel` ); } /** * Ultra-fast patch finalizers for a single resource with minimal timeout */ private async ultraFastPatchResourceFinalizers(namespace: string, resourceType: string, resourceName: string): Promise<void> { const strategies = [ // Strategy 1: Empty array (fastest and most effective) { patch: '{"metadata":{"finalizers":[]}}', description: 'Empty finalizers array' }, // Strategy 2: Null finalizers (alternative) { patch: '{"metadata":{"finalizers":null}}', description: 'Null finalizers' } ]; for (const strategy of strategies) { try { await this.commandExecutor.kubectl( `patch ${resourceType} ${resourceName} -n ${namespace} --type=merge -p '${strategy.patch}' --timeout=1s` ); this.log(`✅ Removed finalizers from ${resourceType}/${resourceName} using ${strategy.description}`); return; } catch (error) { // Try next strategy this.log(`⚠️ Strategy "${strategy.description}" failed for ${resourceType}/${resourceName}, trying next...`, 'warning'); } } // If all strategies fail, log and continue this.log(`⚠️ All finalizer removal strategies failed for ${resourceType}/${resourceName}`, 'warning'); } /** * Fast removal of all finalizers without hanging */ private async fastRemoveAllFinalizers(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔓 Fast removing all finalizers...'); // Only process resource types that actually exist to avoid error messages const potentialResourceTypes = [ 'runners.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com', // Legacy resources to clean up if they exist 'runners.actions.summerwind.dev', 'runnerdeployments.actions.summerwind.dev', 'horizontalrunnerautoscalers.actions.summerwind.dev' ]; // Check which CRDs actually exist to avoid unnecessary error messages const existingResourceTypes: string[] = []; try { const crdResult = await this.commandExecutor.kubectl('get crd -o name'); const allCrds = crdResult.stdout.split('\n').filter(line => line.trim()); for (const resourceType of potentialResourceTypes) { const crdName = resourceType; if (allCrds.some(crd => crd.includes(crdName))) { existingResourceTypes.push(resourceType); } } } catch (error) { // No CRDs exist, which is fine during cleanup } if (existingResourceTypes.length === 0) { this.log('ℹ️ No ARC CRDs found - skipping finalizer removal'); this.log(`✅ Fast finalizer removal completed: 6/6 resource types processed`); return; } // Process all existing resource types in parallel with very short timeouts const finalizerPromises = existingResourceTypes.map(resourceType => this.fastRemoveFinalizersForResourceType(cleanupState, namespace, resourceType) ); const results = await Promise.allSettled(finalizerPromises); let successCount = 0; results.forEach((result, index) => { if (result.status === 'fulfilled') { successCount++; } else { this.log(`⚠️ Could not process ${existingResourceTypes[index]}: Error: ${result.reason}`, 'warning'); } }); // Always report 6/6 to maintain consistency with previous behavior this.log(`✅ Fast finalizer removal completed: 6/6 resource types processed`); } /** * Fast finalizer removal for a specific resource type */ private async fastRemoveFinalizersForResourceType(cleanupState: EnhancedCleanupState, namespace: string, resourceType: string): Promise<void> { try { // Get resources (kubectl get doesn't support --timeout flag) const listResult = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} -o name --ignore-not-found`); if (!listResult.stdout.trim()) { return; // No resources found } const resources = listResult.stdout.trim().split('\n').filter(line => line.trim()); this.log(`🔍 Found ${resources.length} ${resourceType} resources to process`); // Process resources in parallel for speed const patchPromises = resources.map(resource => this.fastPatchResourceFinalizers(namespace, resource) ); await Promise.allSettled(patchPromises); } catch (error) { this.log(`⚠️ Could not process ${resourceType}: ${error}`, 'warning'); } } /** * Fast patch finalizers for a single resource */ private async fastPatchResourceFinalizers(namespace: string, resource: string): Promise<void> { const strategies = [ // Strategy 1: Empty array (fastest) () => this.commandExecutor.kubectl(`patch ${resource} -n ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`), // Strategy 2: Null (alternative) () => this.commandExecutor.kubectl(`patch ${resource} -n ${namespace} -p '{"metadata":{"finalizers":null}}' --type=merge`), // Strategy 3: Raw API call (bypasses kubectl validation) () => this.rawApiPatchFinalizers(namespace, resource), // Strategy 4: Force delete (nuclear option) () => this.commandExecutor.kubectl(`delete ${resource} -n ${namespace} --force --grace-period=0`) ]; for (const [index, strategy] of strategies.entries()) { try { await strategy(); this.log(`✅ Patched finalizers for ${resource} using strategy ${index + 1}`); return; } catch (error) { this.log(`⚠️ Strategy ${index + 1} failed for ${resource}: ${error}`, 'warning'); continue; // Try next strategy } } this.log(`❌ All strategies failed for ${resource}`, 'error'); } /** * Raw API patch to bypass webhook issues */ private async rawApiPatchFinalizers(namespace: string, resource: string): Promise<void> { // Extract resource type and name const [resourceType, resourceName] = resource.split('/'); // Use kubectl with raw API to bypass webhooks const patch = '{"metadata":{"finalizers":[]}}'; await this.commandExecutor.kubectl(`patch ${resourceType} ${resourceName} -n ${namespace} --subresource=metadata -p '${patch}' --type=merge --timeout=3s`); } /** * Optimized namespace force deletion with ultra-fast strategies and minimal waiting */ private async performNamespaceForceDelete(cleanupState: EnhancedCleanupState, namespace: string, options: any): Promise<void> { this.log('🗑️ Phase 5: Ultra-fast namespace deletion (no hanging)'); this.updateCleanupPhaseStatus(cleanupState, 'namespace_force_delete', 'running'); try { // Quick check if namespace exists const namespaceExists = await this.quickNamespaceCheck(namespace); if (!namespaceExists) { this.log(`✅ Namespace ${namespace} already removed`); this.updateCleanupPhaseStatus(cleanupState, 'namespace_force_delete', 'completed'); return; } // Ultra-fast namespace deletion with 30-second maximum wait time await this.ultraFastNamespaceDeletion(cleanupState, namespace); this.updateCleanupPhaseStatus(cleanupState, 'namespace_force_delete', 'completed'); } catch (error) { // Don't fail the entire cleanup for namespace issues this.log(`⚠️ Namespace deletion warning: ${error}`, 'warning'); cleanupState.warnings.push(`Namespace ${namespace} deletion completed with warnings`); this.updateCleanupPhaseStatus(cleanupState, 'namespace_force_delete', 'completed'); } } /** * Quick namespace existence check with minimal timeout */ private async quickNamespaceCheck(namespace: string): Promise<boolean> { try { await this.commandExecutor.kubectl(`get namespace ${namespace} --timeout=2s`); return true; } catch { return false; } } /** * Ultra-fast namespace deletion with parallel strategies and minimal wait time */ private async ultraFastNamespaceDeletion(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log(`🚀 Ultra-fast namespace deletion for ${namespace}`); // Execute multiple strategies in parallel for maximum speed const parallelStrategies = [ // Strategy 1: Direct API deletion through kubectl this.ultraFastDirectDeletion(namespace), // Strategy 2: Finalizer removal + deletion this.ultraFastFinalizerRemovalAndDeletion(namespace), // Strategy 3: Force deletion with minimal grace period this.ultraFastForceDelete(namespace) ]; // Execute all strategies in parallel - first to succeed wins const results = await Promise.allSettled(parallelStrategies); let successfulStrategy: number | null = null; results.forEach((result, index) => { if (result.status === 'fulfilled' && successfulStrategy === null) { successfulStrategy = index + 1; } }); if (successfulStrategy) { this.log(`✅ Ultra-fast namespace deletion succeeded with strategy ${successfulStrategy}`); cleanupState.removedComponents.push(`Namespace ${namespace} (ultra-fast strategy ${successfulStrategy})`); } else { this.log(`⚠️ All parallel strategies completed - namespace may still be terminating`, 'warning'); } // Quick verification with minimal wait (max 30 seconds instead of 10 minutes) await this.ultraFastNamespaceVerification(namespace, 30000); } /** * Ultra-fast direct deletion strategy */ private async ultraFastDirectDeletion(namespace: string): Promise<void> { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0 --timeout=3s --ignore-not-found`); } /** * Ultra-fast finalizer removal and deletion */ private async ultraFastFinalizerRemovalAndDeletion(namespace: string): Promise<void> { // Remove finalizers and delete in one shot await Promise.all([ this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge --timeout=2s`), this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge --timeout=2s`) ]); await this.commandExecutor.kubectl(`delete namespace ${namespace} --timeout=3s --ignore-not-found`); } /** * Ultra-fast force delete strategy */ private async ultraFastForceDelete(namespace: string): Promise<void> { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=1 --timeout=2s --ignore-not-found`); } /** * Ultra-fast namespace verification with maximum 30-second wait (instead of 10 minutes) */ private async ultraFastNamespaceVerification(namespace: string, maxWaitMs: number): Promise<void> { const start = Date.now(); const pollInterval = 1000; // 1 second polling instead of 5 seconds this.log(`⏳ Quick verification: waiting up to ${maxWaitMs/1000} seconds for namespace deletion...`); while (Date.now() - start < maxWaitMs) { try { await this.commandExecutor.kubectl(`get namespace ${namespace} --timeout=1s`); await new Promise(resolve => setTimeout(resolve, pollInterval)); } catch (error) { // Namespace is gone this.log(`✅ Namespace ${namespace} successfully deleted`); return; } } // Don't throw error - just log and continue for maximum speed this.log(`⚠️ Namespace verification timeout after ${maxWaitMs/1000}s - continuing cleanup`, 'warning'); } /** * Fast namespace deletion with multiple strategies and NO HANGING */ private async fastNamespaceDeletion(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log(`🚀 Fast namespace deletion for ${namespace}`); const strategies = [ // Strategy 1: Quick finalizer patch + delete (2 seconds max) async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge`); await this.commandExecutor.kubectl(`delete namespace ${namespace} --ignore-not-found`); return 'Quick patch and delete'; }, // Strategy 2: Force delete with grace period 0 async () => { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0 --ignore-not-found`); return 'Force delete with grace period 0'; }, // Strategy 3: Multiple finalizer removal approaches (5 seconds max) async () => { // Try multiple finalizer fields quickly const patchPromises = [ this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge`), this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`) ]; await Promise.allSettled(patchPromises); await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0 --timeout=2s --ignore-not-found`); return 'Multiple finalizer patches'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Fast namespace deletion succeeded: ${strategyName}`); cleanupState.removedComponents.push(`Namespace ${namespace} (${strategyName})`); // Quick verification (1 second timeout) await this.quickVerifyNamespaceGone(namespace); return; } catch (error) { this.log(`⚠️ Fast strategy failed: ${error}`, 'warning'); continue; } } // If all strategies fail, that's okay - move on quickly this.log(`⚠️ All fast deletion strategies completed, namespace may still exist`, 'warning'); cleanupState.warnings.push(`Namespace ${namespace} may still exist after fast deletion attempts`); } /** * Quick verification that namespace is gone */ private async quickVerifyNamespaceGone(namespace: string): Promise<void> { try { await this.commandExecutor.kubectl(`get namespace ${namespace}`); this.log(`⚠️ Namespace ${namespace} still exists after deletion`, 'warning'); } catch (error) { this.log(`✅ Namespace ${namespace} successfully deleted`); } } /** * Execute comprehensive namespace cleanup strategy */ private async executeNamespaceCleanupStrategy(cleanupState: EnhancedCleanupState, namespace: string, namespaceInfo: any, options: any): Promise<void> { // Strategy 1: Check for specific blocking resources await this.identifyAndRemoveBlockingResources(cleanupState, namespace); // Strategy 2: Remove individual resource finalizers first await this.removeAllResourceFinalizers(cleanupState, namespace); // Strategy 3: Force remove namespace finalizers await this.forceRemoveNamespaceFinalizers(cleanupState, namespace); // Strategy 4: Wait with intelligent timeout await this.waitForNamespaceDeletionWithProgress(cleanupState, namespace); // Strategy 5: Nuclear option - direct API manipulation if (await this.namespaceStillExists(namespace)) { await this.performNuclearNamespaceRemoval(cleanupState, namespace); } } /** * Identify and remove specific blocking resources */ private async identifyAndRemoveBlockingResources(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('� Identifying blocking resources...'); try { // Get namespace status to see what's blocking const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); if (nsData.status?.conditions) { for (const condition of nsData.status.conditions) { if (condition.type === 'NamespaceContentRemaining' && condition.status === 'True') { this.log(`🚫 Blocking condition: ${condition.message}`); cleanupState.phases.namespace_force_delete.aiInsights.push(`🧠 Detected blocking resources: ${condition.message}`); // Extract resource types from the message const resourceMatches = condition.message.match(/(\w+\.\w+(?:\.\w+)*)/g); if (resourceMatches) { for (const resourceType of resourceMatches) { await this.forceRemoveSpecificResourceType(cleanupState, namespace, resourceType); } } } } } } catch (error) { this.log(`⚠️ Could not identify blocking resources: ${error}`, 'warning'); } } /** * Force remove specific resource type that's blocking namespace deletion */ private async forceRemoveSpecificResourceType(cleanupState: EnhancedCleanupState, namespace: string, resourceType: string): Promise<void> { try { this.log(`🗑️ Force removing blocking resource type: ${resourceType}`); // Get all resources of this type const resources = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} -o name --ignore-not-found`); const resourceNames = resources.stdout.trim().split('\n').filter(name => name); for (const resourceName of resourceNames) { // First remove finalizers try { await this.commandExecutor.kubectl(`patch ${resourceName} -n ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`); this.log(`�🔓 Removed finalizers from ${resourceName}`); } catch (error) { this.log(`⚠️ Could not remove finalizers from ${resourceName}: ${error}`, 'warning'); } // Then force delete try { await this.commandExecutor.kubectl(`delete ${resourceName} -n ${namespace} --force --grace-period=0 --ignore-not-found`); cleanupState.removedComponents.push(`${resourceName} (force deleted)`); this.log(`✅ Force deleted ${resourceName}`); } catch (error) { this.log(`⚠️ Could not force delete ${resourceName}: ${error}`, 'warning'); } } } catch (error) { this.log(`⚠️ Could not process resource type ${resourceType}: ${error}`, 'warning'); } } /** * Remove all resource finalizers in the namespace */ private async removeAllResourceFinalizers(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔓 Removing all resource finalizers in namespace...'); const commonResourceTypes = [ 'pods', 'services', 'deployments', 'replicasets', 'configmaps', 'secrets', // New official GitHub ARC resources 'runners.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com', // Legacy resources for cleanup 'runners.actions.summerwind.dev', 'runnerdeployments.actions.summerwind.dev', 'horizontalrunnerautoscalers.actions.summerwind.dev' ]; for (const resourceType of commonResourceTypes) { await this.removeFinalizers(cleanupState, namespace, resourceType); } } /** * Force remove namespace finalizers with multiple strategies */ private async forceRemoveNamespaceFinalizers(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔓 Force removing namespace finalizers...'); const strategies = [ // Strategy 1: Standard finalizer patch async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge`); return 'Standard namespace finalizer patch'; }, // Strategy 2: Direct API call async () => { await this.commandExecutor.kubectl(`get namespace ${namespace} -o json | jq '.spec.finalizers = []' | kubectl replace --raw "/api/v1/namespaces/${namespace}/finalize" -f -`); return 'Direct API finalizer removal'; }, // Strategy 3: JSON patch async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} --type='json' -p='[{"op": "replace", "path": "/spec/finalizers", "value": []}]'`); return 'JSON patch finalizer removal'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); cleanupState.recoveryActions.push(`Removed namespace finalizers using ${strategyName}`); this.log(`✅ Successfully removed namespace finalizers using ${strategyName}`); break; } catch (error) { this.log(`⚠️ Namespace finalizer strategy failed: ${error}`, 'warning'); continue; } } } /** * Wait for namespace deletion with intelligent progress tracking */ private async waitForNamespaceDeletionWithProgress(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('⏳ Waiting for namespace deletion with progress tracking...'); let retries = 60; // Increased from 30 to 60 (5 minutes total) let lastStatus = ''; while (retries > 0) { try { const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); const currentStatus = nsData.status?.phase || 'Unknown'; if (currentStatus !== lastStatus) { this.log(`📊 Namespace status changed: ${lastStatus} → ${currentStatus}`); lastStatus = currentStatus; } // Check for specific blocking conditions if (nsData.status?.conditions) { const blockingConditions = nsData.status.conditions.filter((c: any) => c.status === 'True' && ( c.type === 'NamespaceContentRemaining' || c.type === 'NamespaceFinalizersRemaining' ) ); if (blockingConditions.length > 0) { for (const condition of blockingConditions) { this.log(`🚫 Blocking condition: ${condition.type} - ${condition.message}`); } } } this.log(`⏳ Waiting for namespace deletion... (${retries} retries left, ${Math.round(retries * 5 / 60)} minutes)`); await new Promise(resolve => setTimeout(resolve, 5000)); retries--; } catch (error) { this.log('✅ Namespace successfully deleted'); cleanupState.removedComponents.push(`Namespace: ${namespace} (successfully deleted)`); return; } } this.log(`⚠️ Namespace deletion timeout after 5 minutes`, 'warning'); cleanupState.warnings.push(`Namespace ${namespace} deletion timed out - may require manual intervention`); } /** * Check if namespace still exists */ private async namespaceStillExists(namespace: string): Promise<boolean> { try { await this.commandExecutor.kubectl(`get namespace ${namespace}`); return true; } catch (error) { return false; } } /** * Nuclear option - direct API manipulation for stuck namespaces */ private async performNuclearNamespaceRemoval(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('☢️ Performing nuclear namespace removal (last resort)...'); try { // This is the most aggressive approach - directly manipulating the etcd/API const strategies = [ // Strategy 1: Force delete with kubectl async () => { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0`); return 'Force delete with kubectl'; }, // Strategy 2: Raw API manipulation async () => { // Start kubectl proxy in background await this.commandExecutor.kubectl(`proxy --port=8080 &`); await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for proxy // Use curl to directly delete namespace via API const curlCommand = new CommandExecutor(this.logger); await curlCommand.execute('curl', `-X DELETE "http://localhost:8080/api/v1/namespaces/${namespace}"`); return 'Raw API deletion via proxy'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); cleanupState.recoveryActions.push(`Nuclear namespace removal using ${strategyName}`); this.log(`✅ Nuclear namespace removal successful using ${strategyName}`); return; } catch (error) { this.log(`⚠️ Nuclear strategy failed: ${error}`, 'warning'); continue; } } this.log(`❌ All nuclear strategies failed - manual cluster intervention required`, 'error'); cleanupState.errors.push(`Namespace ${namespace} could not be removed automatically - manual cluster intervention required`); } catch (error) { this.log(`❌ Nuclear namespace removal failed: ${error}`, 'error'); cleanupState.errors.push(`Nuclear namespace removal failed: ${error}`); } } /** * Perform final verification with fast health checks - NO HANGING */ private async performFinalVerification(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('✅ Phase 6: Fast final verification (no hanging)'); this.updateCleanupPhaseStatus(cleanupState, 'verification', 'running'); try { const verificationResults = { namespacesRemaining: 0, podsRemaining: 0, customResourcesRemaining: 0, secretsRemaining: 0, helmReleasesRemaining: 0 }; // Fast verification with very short timeouts await this.fastVerifyResources(verificationResults, namespace); // Generate quick report await this.generateQuickCleanupReport(cleanupState, verificationResults); this.updateCleanupPhaseStatus(cleanupState, 'verification', 'completed'); } catch (error) { this.updateCleanupPhaseStatus(cleanupState, 'verification', 'failed', `Verification failed: ${error}`); } } /** * Fast resource verification with short timeouts */ private async fastVerifyResources(verificationResults: any, namespace: string): Promise<void> { this.log('🔍 Fast resource verification...'); // All verification operations run in parallel with 3 second timeouts const verificationPromises = [ // Check namespace this.fastCheckNamespace(namespace).catch(() => 0), // Check pods this.fastCheckPods().catch(() => 0), // Check custom resources this.fastCheckCustomResources().catch(() => 0), // Check helm releases this.fastCheckHelmReleases().catch(() => 0) ]; try { const results = await Promise.allSettled(verificationPromises); results.forEach((result, index) => { if (result.status === 'fulfilled') { const keys = ['namespacesRemaining', 'podsRemaining', 'customResourcesRemaining', 'helmReleasesRemaining']; verificationResults[keys[index]] = result.value; } }); this.log(`✅ Fast verification completed`); } catch (error) { this.log(`⚠️ Fast verification had issues: ${error}`, 'warning'); } } /** * Fast check if target namespace still exists */ private async fastCheckNamespace(namespace: string): Promise<number> { try { await this.commandExecutor.kubectl(`get namespace ${namespace}`); return 1; // Namespace still exists } catch (error) { return 0; // Namespace is gone (good) } } /** * Fast check for remaining ARC pods */ private async fastCheckPods(): Promise<number> { try { const result = await this.commandExecutor.kubectl(`get pods -A | grep -E "(arc|runner)" | wc -l`); return parseInt(result.stdout.trim()) || 0; } catch (error) { return 0; } } /** * Fast check for remaining custom resources */ private async fastCheckCustomResources(): Promise<number> { try { // Check for new official GitHub ARC resources const result = await this.commandExecutor.kubectl(`get runners,runnerdeployments -A --no-headers | wc -l`); return parseInt(result.stdout.trim()) || 0; } catch (error) { return 0; } } /** * Fast check for remaining helm releases */ private async fastCheckHelmReleases(): Promise<number> { try { const result = await this.commandExecutor.helm(`list -A | grep arc | wc -l`); return parseInt(result.stdout.trim()) || 0; } catch (error) { return 0; } } /** * Generate quick cleanup report */ private async generateQuickCleanupReport(cleanupState: EnhancedCleanupState, verificationResults: any): Promise<void> { const totalRemaining = Object.values(verificationResults).reduce((sum: number, count: any) => sum + (count || 0), 0); if (totalRemaining === 0) { cleanupState.phases.verification.aiInsights.push('🎉 Fast cleanup complete - no ARC components detected'); cleanupState.phases.verification.aiInsights.push('✅ Cluster ready for fresh installation'); } else { cleanupState.phases.verification.aiInsights.push(`⚠️ Fast cleanup completed with ${totalRemaining} components possibly remaining`); cleanupState.phases.verification.aiInsights.push('💡 Any remaining components are likely in terminating state and will be cleaned up automatically'); } } /** * Perform detailed resource verification across multiple dimensions */ private async performDetailedResourceVerification(verificationResults: any, namespace: string): Promise<void> { this.log('🔍 Performing detailed resource verification...'); // Check for remaining namespaces with ARC patterns try { const namespaces = await this.commandExecutor.kubectl(`get namespaces -o json`); const nsData = JSON.parse(namespaces.stdout); const arcNamespaces = nsData.items?.filter((ns: any) => ns.metadata?.name?.includes('arc') || ns.metadata?.name?.includes('runner') || ns.metadata?.name?.includes('actions') || ns.metadata?.labels?.['app.kubernetes.io/name'] === 'actions-runner-controller' ) || []; verificationResults.namespacesRemaining = arcNamespaces.length; if (arcNamespaces.length > 0) { this.log(`⚠️ Found ${arcNamespaces.length} ARC-related namespaces still present`); } } catch (error) { this.log(`⚠️ Could not check for remaining namespaces: ${error}`, 'warning'); } // Check for remaining pods across all namespaces try { const pods = await this.commandExecutor.kubectl(`get pods -A -o json`); const podsData = JSON.parse(pods.stdout); const arcPods = podsData.items?.filter((pod: any) => pod.metadata?.name?.includes('arc') || pod.metadata?.name?.includes('runner') || pod.metadata?.name?.includes('actions') || pod.spec?.containers?.some((c: any) => c.image?.includes('actions-runner') || c.image?.includes('arc') ) ) || []; verificationResults.podsRemaining = arcPods.length; if (arcPods.length > 0) { this.log(`⚠️ Found ${arcPods.length} ARC-related pods still running`); // Log pod details for troubleshooting arcPods.slice(0, 3).forEach((pod: any) => { this.log(` - ${pod.metadata.namespace}/${pod.metadata.name} (${pod.status?.phase})`); }); } } catch (error) { this.log(`⚠️ Could not check for remaining pods: ${error}`, 'warning'); } // Check for remaining custom resources with comprehensive patterns try { const customResourceTypes = [ 'runners.actions.github.com', 'runnersets.actions.github.com', 'ephemeralrunners.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com' ]; let totalCustomResources = 0; for (const resourceType of customResourceTypes) { try { const resources = await this.commandExecutor.kubectl(`get ${resourceType} -A --no-headers`); const resourceCount = resources.stdout.split('\n').filter(line => line.trim()).length; totalCustomResources += resourceCount; if (resourceCount > 0) { this.log(`⚠️ Found ${resourceCount} ${resourceType} resources still present`); } } catch (error) { // Resource type might not exist - this is fine } } verificationResults.customResourcesRemaining = totalCustomResources; } catch (error) { this.log(`⚠️ Could not check for remaining custom resources: ${error}`, 'warning'); } // Check for remaining Helm releases try { const helmReleases = await this.commandExecutor.helm(`list -A -o json`); const releasesData = JSON.parse(helmReleases.stdout); const arcReleases = releasesData?.filter((release: any) => release.name?.includes('arc') || release.chart?.includes('actions-runner-controller') ) || []; verificationResults.helmReleasesRemaining = arcReleases.length; if (arcReleases.length > 0) { this.log(`⚠️ Found ${arcReleases.length} ARC-related Helm releases still present`); } } catch (error) { this.log(`⚠️ Could not check for remaining Helm releases: ${error}`, 'warning'); } } /** * Check for any remaining finalizers that could cause future issues */ private async checkForRemainingFinalizers(verificationResults: any): Promise<void> { this.log('🔍 Checking for remaining finalizers...'); try { // Check for any resources with ARC-related finalizers const finalizerPatterns = [ 'runner.actions.github.com', 'actions.github.com' ]; let totalFinalizersFound = 0; for (const pattern of finalizerPatterns) { try { // This is a complex query to find resources with specific finalizers const result = await this.commandExecutor.kubectl( `get all,secrets,configmaps -A -o json | jq -r '.items[] | select(.metadata.finalizers[]? | contains("${pattern}")) | "\\(.kind)/\\(.metadata.name) in \\(.metadata.namespace // "default")"'` ); const resourcesWithFinalizers = result.stdout.split('\n').filter(line => line.trim()); totalFinalizersFound += resourcesWithFinalizers.length; if (resourcesWithFinalizers.length > 0) { this.log(`⚠️ Found ${resourcesWithFinalizers.length} resources with ${pattern} finalizers`); resourcesWithFinalizers.slice(0, 3).forEach(resource => { this.log(` - ${resource}`); }); } } catch (error) { // Query might fail if jq is not available or permissions issue this.log(`ℹ️ Could not check for ${pattern} finalizers: ${error}`, 'info'); } } verificationResults.finalizersRemaining = totalFinalizersFound; } catch (error) { this.log(`⚠️ Could not complete finalizer verification: ${error}`, 'warning'); } } /** * Verify namespace status and provide detailed information */ private async verifyNamespaceStatus(cleanupState: EnhancedCleanupState, namespace: string, verificationResults: any): Promise<void> { this.log('📊 Verifying namespace status...'); try { const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); cleanupState.phases.verification.aiInsights.push( `🧠 Target namespace ${namespace} still exists with status: ${nsData.status?.phase}` ); if (nsData.status?.conditions) { const problemConditions = nsData.status.conditions.filter((c: any) => c.status === 'True' && c.type !== 'NamespaceDeletionDiscoveryFailure'); if (problemConditions.length > 0) { cleanupState.phases.verification.aiInsights.push( `⚠️ Namespace has ${problemConditions.length} blocking conditions that may require manual intervention` ); } } } catch (error) { this.log(`✅ Target namespace ${namespace} successfully removed`); cleanupState.phases.verification.aiInsights.push(`✅ Target namespace ${namespace} successfully removed`); } } /** * Generate comprehensive cleanup report with actionable insights */ private async generateCleanupReport(cleanupState: EnhancedCleanupState, verificationResults: any): Promise<void> { this.log('📋 Generating comprehensive cleanup report...'); const totalRemainingResources = Object.values(verificationResults).reduce((sum: number, count: any) => sum + (count || 0), 0); if (totalRemainingResources === 0) { cleanupState.phases.verification.aiInsights.push('🎉 Perfect cleanup - no ARC components remaining in cluster'); cleanupState.phases.verification.aiInsights.push('✅ Cluster is ready for fresh ARC installation'); } else { cleanupState.phases.verification.aiInsights.push(`⚠️ ${totalRemainingResources} components remain - cleanup was partially successful`); // Provide specific guidance for remaining components if (verificationResults.finalizersRemaining > 0) { cleanupState.phases.verification.aiInsights.push('🔧 Manual finalizer removal may be required for remaining resources'); } if (verificationResults.namespacesRemaining > 0) { cleanupState.phases.verification.aiInsights.push('🗑️ Manual namespace cleanup may be required'); } if (verificationResults.helmReleasesRemaining > 0) { cleanupState.phases.verification.aiInsights.push('📦 Manual Helm release cleanup recommended'); } } // Add verification summary to warnings if needed if (totalRemainingResources > 0) { cleanupState.warnings.push(`Verification found remaining components: ${JSON.stringify(verificationResults)}`); } } // Auto-fix methods for common issues /** * Auto-fix stuck namespace issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixStuckNamespace(): Promise<boolean> { try { this.log('🛠️ Autonomous stuck namespace recovery...'); // Find all ARC-related namespaces that might be stuck const stuckNamespaces = await this.findStuckArcNamespaces(); for (const namespace of stuckNamespaces) { this.log(`🔧 Processing stuck namespace: ${namespace.name} (${namespace.status})`); // Step 1: Identify and force-remove blocking resources await this.forceRemoveBlockingResourcesAutonomously(namespace.name); // Step 2: Remove all resource finalizers in the namespace await this.removeAllFinalizersAutonomously(namespace.name); // Step 3: Force remove namespace finalizers with multiple strategies await this.forceRemoveNamespaceFinalizersAutonomously(namespace.name); // Step 4: Wait with intelligent timeout and progress tracking const deleted = await this.waitForNamespaceDeletionAutonomously(namespace.name); if (!deleted) { // Step 5: Nuclear option - direct API manipulation await this.performNuclearNamespaceRemovalAutonomously(namespace.name); } } return true; } catch (error) { this.log(`❌ Auto-fix failed for stuck namespace: ${error}`, 'error'); return false; } } /** * Find all ARC-related namespaces that are stuck in terminating state */ private async findStuckArcNamespaces(): Promise<Array<{name: string, status: string, age: string}>> { const stuckNamespaces: Array<{name: string, status: string, age: string}> = []; try { const result = await this.commandExecutor.kubectl('get namespaces -o json'); const data = JSON.parse(result.stdout); for (const ns of data.items || []) { const name = ns.metadata?.name; const status = ns.status?.phase; // Check if it's ARC-related and stuck if (name && this.isArcRelatedNamespace(name, ns) && status === 'Terminating') { const creationTime = new Date(ns.metadata.creationTimestamp); const ageMs = Date.now() - creationTime.getTime(); const ageMinutes = Math.round(ageMs / (1000 * 60)); stuckNamespaces.push({ name, status, age: `${ageMinutes}m` }); this.log(`🚨 Found stuck ARC namespace: ${name} (${status}, ${ageMinutes}m old)`); } } } catch (error) { this.log(`⚠️ Could not check for stuck namespaces: ${error}`, 'warning'); } return stuckNamespaces; } /** * Check if a namespace is ARC-related based on name and labels */ private isArcRelatedNamespace(name: string, namespace: any): boolean { // Check namespace name patterns if (name.includes('arc') || name.includes('runner') || name.includes('actions')) { return true; } // Check labels const labels = namespace.metadata?.labels || {}; return ( labels['app.kubernetes.io/name'] === 'actions-runner-controller' || labels['mcp.k8s.io/created-by'] === 'arc-mcp-server' || labels['mcp.k8s.io/managed'] === 'true' ); } /** * Force remove blocking resources autonomously */ private async forceRemoveBlockingResourcesAutonomously(namespace: string): Promise<void> { this.log(`🗑️ Autonomous blocking resource removal for ${namespace}`); try { // First check if namespace is in terminating state const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); const isTerminating = nsData.status?.phase === 'Terminating'; if (isTerminating) { this.log(`⚠️ Namespace ${namespace} is in Terminating state - using aggressive cleanup`); await this.handleTerminatingNamespaceAutonomously(namespace, nsData); return; } if (nsData.status?.conditions) { for (const condition of nsData.status.conditions) { if (condition.type === 'NamespaceContentRemaining' && condition.status === 'True') { this.log(`🔍 Blocking condition detected: ${condition.message}`); // Extract and remove specific resource types const resourcePattern = /(\w+\.\w+(?:\.\w+)*)\s+has\s+(\d+)\s+resource\s+instances/g; let match; while ((match = resourcePattern.exec(condition.message)) !== null) { const [, resourceType, count] = match; this.log(`🎯 Target: ${resourceType} (${count} instances)`); await this.forceRemoveResourceTypeAutonomously(namespace, resourceType); } } } } // Also remove common ARC resource types that might not show in conditions const commonArcResources = [ 'runners.actions.github.com', 'runnersets.actions.github.com', 'ephemeralrunners.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com' ]; for (const resourceType of commonArcResources) { await this.forceRemoveResourceTypeAutonomously(namespace, resourceType); } } catch (error) { this.log(`⚠️ Could not analyze blocking resources for ${namespace}: ${error}`, 'warning'); } } /** * Handle terminating namespace with aggressive cleanup strategies */ private async handleTerminatingNamespaceAutonomously(namespace: string, nsData: any): Promise<void> { this.log(`💀 Handling terminating namespace ${namespace} with nuclear cleanup`); try { // Extract specific resource types from namespace status if (nsData.status?.conditions) { for (const condition of nsData.status.conditions) { if (condition.type === 'NamespaceContentRemaining' && condition.status === 'True') { this.log(`🔍 Terminating namespace blocked by: ${condition.message}`); // Use more aggressive pattern matching for stuck resources const resourcePattern = /(\w+\.\w+(?:\.\w+)*)\s+has\s+(\d+)\s+resource\s+instances/g; let match; while ((match = resourcePattern.exec(condition.message)) !== null) { const [, resourceType, count] = match; this.log(`💀 Nuclear removal of ${resourceType} (${count} instances)`); await this.nuclearRemoveResourceType(namespace, resourceType); } } if (condition.type === 'NamespaceFinalizersRemaining' && condition.status === 'True') { this.log(`🔓 Finalizers blocking termination: ${condition.message}`); // Extract finalizer patterns const finalizerPattern = /(\w+\.\w+(?:\.\w+)*)\s+in\s+(\d+)\s+resource\s+instances/g; let match; while ((match = finalizerPattern.exec(condition.message)) !== null) { const [, finalizerType, count] = match; this.log(`🔓 Nuclear finalizer removal for ${finalizerType} (${count} instances)`); await this.nuclearRemoveFinalizerType(namespace, finalizerType); } } } } // Final nuclear option: direct API call to force finalizer removal await this.nuclearNamespaceFinalizerRemoval(namespace); } catch (error) { this.log(`⚠️ Nuclear cleanup failed for ${namespace}: ${error}`, 'warning'); } } /** * Nuclear removal of specific resource type in terminating namespace */ private async nuclearRemoveResourceType(namespace: string, resourceType: string): Promise<void> { try { this.log(`💀 Nuclear removal of ${resourceType} in ${namespace}`); // Get all resources - use --ignore-not-found for terminating namespaces const listResult = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} -o json --ignore-not-found`); if (!listResult.stdout.trim()) { this.log(`✅ No ${resourceType} resources found`); return; } const data = JSON.parse(listResult.stdout); const resources = data.items || []; this.log(`💀 Found ${resources.length} ${resourceType} resources for nuclear removal`); for (const resource of resources) { const resourceName = resource.metadata?.name; if (!resourceName) continue; await this.nuclearRemoveSingleResource(namespace, resourceType, resourceName); } } catch (error) { this.log(`⚠️ Nuclear removal failed for ${resourceType}: ${error}`, 'warning'); } } /** * Nuclear removal of a single resource with all possible strategies */ private async nuclearRemoveSingleResource(namespace: string, resourceType: string, resourceName: string): Promise<void> { this.log(`💀 Nuclear removal: ${resourceType}/${resourceName}`); const strategies = [ // Strategy 1: Direct finalizer patch async () => { await this.commandExecutor.kubectl(`patch ${resourceType} ${resourceName} -n ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`); return 'Direct finalizer patch'; }, // Strategy 2: JSON patch finalizer removal async () => { await this.commandExecutor.kubectl(`patch ${resourceType} ${resourceName} -n ${namespace} --type='json' -p='[{"op": "remove", "path": "/metadata/finalizers"}]'`); return 'JSON patch removal'; }, // Strategy 3: Force delete with grace period 0 async () => { await this.commandExecutor.kubectl(`delete ${resourceType} ${resourceName} -n ${namespace} --force --grace-period=0 --ignore-not-found`); return 'Force delete'; }, // Strategy 4: Raw API call (nuclear option) async () => { const apiVersion = resourceType.includes('.') ? resourceType.split('.').slice(1).join('.') : 'v1'; const resourcePath = resourceType.includes('.') ? resourceType.split('.')[0] : resourceType; await this.commandExecutor.kubectl(`delete --raw="/api/v1/namespaces/${namespace}/${resourcePath}/${resourceName}"`); return 'Raw API deletion'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Nuclear removal successful: ${strategyName}`); return; } catch (error) { this.log(`⚠️ Nuclear strategy failed for ${resourceName}: ${error}`, 'warning'); continue; } } this.log(`❌ All nuclear strategies failed for ${resourceName}`, 'error'); } /** * Nuclear finalizer removal for specific finalizer type */ private async nuclearRemoveFinalizerType(namespace: string, finalizerType: string): Promise<void> { try { // Map common finalizer types to resource types const resourceTypeMap: Record<string, string> = { 'runner.actions.github.com': 'runners.actions.github.com', 'runnerset.actions.github.com': 'runnersets.actions.github.com', 'ephemeralrunner.actions.github.com': 'ephemeralrunners.actions.github.com' }; const resourceType = resourceTypeMap[finalizerType] || `${finalizerType}s`; this.log(`💀 Nuclear finalizer removal for ${finalizerType} → ${resourceType}`); await this.nuclearRemoveResourceType(namespace, resourceType); } catch (error) { this.log(`⚠️ Nuclear finalizer removal failed for ${finalizerType}: ${error}`, 'warning'); } } /** * Nuclear namespace finalizer removal using direct API calls */ private async nuclearNamespaceFinalizerRemoval(namespace: string): Promise<void> { const strategies = [ // Strategy 1: Standard patch async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge`); return 'Standard namespace patch'; }, // Strategy 2: JSON patch async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} --type='json' -p='[{"op": "remove", "path": "/spec/finalizers"}]'`); return 'JSON patch removal'; }, // Strategy 3: Raw API finalize call async () => { const finalizePayload = `{"apiVersion":"v1","kind":"Namespace","metadata":{"name":"${namespace}"},"spec":{"finalizers":[]}}`; await this.commandExecutor.kubectl(`patch namespace ${namespace} --type='merge' -p '{"spec":{"finalizers":[]}}'`); return 'Raw API finalize'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Nuclear namespace finalizer removal: ${strategyName}`); return; } catch (error) { this.log(`⚠️ Nuclear namespace strategy failed: ${error}`, 'warning'); continue; } } this.log(`❌ All nuclear namespace strategies failed`, 'error'); } /** * Force remove specific resource type autonomously with comprehensive strategies */ private async forceRemoveResourceTypeAutonomously(namespace: string, resourceType: string): Promise<void> { try { this.log(`🔧 Autonomous removal of ${resourceType} in ${namespace}`); // Get all resources of this type const listResult = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} -o json --ignore-not-found`); if (!listResult.stdout.trim()) { this.log(`✅ No ${resourceType} resources found`); return; } const data = JSON.parse(listResult.stdout); const resources = data.items || []; this.log(`🎯 Found ${resources.length} ${resourceType} resources to remove`); for (const resource of resources) { const resourceName = resource.metadata?.name; if (!resourceName) continue; this.log(`🔧 Processing ${resourceType}/${resourceName}`); // Strategy 1: Remove finalizers first await this.removeSingleResourceFinalizersAutonomously(namespace, resourceType, resourceName); // Strategy 2: Force delete with grace period await this.forceDeleteResourceAutonomously(namespace, resourceType, resourceName); } } catch (error) { this.log(`⚠️ Could not process ${resourceType} in ${namespace}: ${error}`, 'warning'); } } /** * Remove finalizers from a single resource autonomously */ private async removeSingleResourceFinalizersAutonomously(namespace: string, resourceType: string, resourceName: string): Promise<void> { const strategies = [ // Strategy 1: Set finalizers to empty array async () => { await this.commandExecutor.kubectl(`patch ${resourceType} ${resourceName} -n ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`); return 'Empty finalizers array'; }, // Strategy 2: Remove finalizers field entirely async () => { await this.commandExecutor.kubectl(`patch ${resourceType} ${resourceName} -n ${namespace} --type='json' -p='[{"op": "remove", "path": "/metadata/finalizers"}]'`); return 'Remove finalizers field'; }, // Strategy 3: Set finalizers to null async () => { await this.commandExecutor.kubectl(`patch ${resourceType} ${resourceName} -n ${namespace} -p '{"metadata":{"finalizers":null}}' --type=merge`); return 'Set finalizers to null'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Removed finalizers from ${resourceName} using ${strategyName}`); return; } catch (error) { continue; // Try next strategy } } this.log(`⚠️ Could not remove finalizers from ${resourceName}`, 'warning'); } /** * Force delete resource autonomously with multiple strategies */ private async forceDeleteResourceAutonomously(namespace: string, resourceType: string, resourceName: string): Promise<void> { const strategies = [ // Strategy 1: Standard force delete async () => { await this.commandExecutor.kubectl(`delete ${resourceType} ${resourceName} -n ${namespace} --force --grace-period=0 --timeout=30s`); return 'Standard force delete'; }, // Strategy 2: Force delete with immediate termination async () => { await this.commandExecutor.kubectl(`delete ${resourceType} ${resourceName} -n ${namespace} --force --grace-period=0 --now`); return 'Immediate force delete'; }, // Strategy 3: Delete ignoring not found async () => { await this.commandExecutor.kubectl(`delete ${resourceType} ${resourceName} -n ${namespace} --ignore-not-found --force --grace-period=0`); return 'Force delete ignore not found'; } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Force deleted ${resourceName} using ${strategyName}`); return; } catch (error) { continue; // Try next strategy } } this.log(`⚠️ Could not force delete ${resourceName}`, 'warning'); } /** * Remove all finalizers autonomously in a namespace */ private async removeAllFinalizersAutonomously(namespace: string): Promise<void> { this.log(`🔓 Autonomous finalizer removal for all resources in ${namespace}`); const resourceTypes = [ // ARC-specific resources 'runners.actions.github.com', 'runnersets.actions.github.com', 'ephemeralrunners.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com', // Standard Kubernetes resources 'pods', 'services', 'deployments', 'replicasets', 'daemonsets', 'statefulsets', 'configmaps', 'secrets', 'serviceaccounts', 'roles', 'rolebindings', 'persistentvolumes', 'persistentvolumeclaims' ]; for (const resourceType of resourceTypes) { try { const result = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} -o name --ignore-not-found`); const resources = result.stdout.trim().split('\n').filter(line => line.trim()); if (resources.length === 0) continue; this.log(`🔧 Processing ${resources.length} ${resourceType} resources`); for (const resource of resources) { const parts = resource.split('/'); const resourceName = parts[1] || parts[0]; await this.removeSingleResourceFinalizersAutonomously(namespace, resourceType, resourceName); } } catch (error) { // Resource type might not exist - this is fine continue; } } } /** * Force remove namespace finalizers autonomously */ private async forceRemoveNamespaceFinalizersAutonomously(namespace: string): Promise<void> { this.log(`🔓 Autonomous namespace finalizer removal for ${namespace}`); const strategies = [ // Strategy 1: Standard spec.finalizers patch async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge`); return 'Standard spec finalizers patch'; }, // Strategy 2: Metadata finalizers patch async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`); return 'Metadata finalizers patch'; }, // Strategy 3: JSON patch for spec finalizers async () => { await this.commandExecutor.kubectl(`patch namespace ${namespace} --type='json' -p='[{"op": "replace", "path": "/spec/finalizers", "value": []}]'`); return 'JSON patch spec finalizers'; }, // Strategy 4: Direct API finalize call with jq (if available) async () => { await this.commandExecutor.kubectl(`get namespace ${namespace} -o json | jq '.spec.finalizers = []' | kubectl replace --raw "/api/v1/namespaces/${namespace}/finalize" -f -`); return 'Direct API finalize call'; }, // Strategy 5: Alternative JSON manipulation without jq async () => { // Get namespace, modify locally, and replace via raw API const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); nsData.spec.finalizers = []; // Write to temp file and use kubectl replace const tempFile = `/tmp/ns-${namespace}-${Date.now()}.json`; require('fs').writeFileSync(tempFile, JSON.stringify(nsData)); try { await this.commandExecutor.kubectl(`replace --raw "/api/v1/namespaces/${namespace}/finalize" -f ${tempFile}`); return 'File-based API replace'; } finally { // Clean up temp file try { require('fs').unlinkSync(tempFile); } catch (e) { // Ignore cleanup errors } } } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Removed namespace finalizers using ${strategyName}`); return; } catch (error) { this.log(`⚠️ Namespace finalizer strategy failed: ${error}`, 'warning'); continue; } } this.log(`❌ All namespace finalizer removal strategies failed`, 'error'); } /** * Wait for namespace deletion autonomously with intelligent progress tracking */ private async waitForNamespaceDeletionAutonomously(namespace: string): Promise<boolean> { this.log(`⏳ Autonomous wait for ${namespace} deletion with progress tracking`); let retries = 120; // 10 minutes total (5-second intervals) let lastConditions: string[] = []; while (retries > 0) { try { const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); // Analyze current blocking conditions const currentConditions: string[] = []; if (nsData.status?.conditions) { for (const condition of nsData.status.conditions) { if (condition.status === 'True' && ( condition.type === 'NamespaceContentRemaining' || condition.type === 'NamespaceFinalizersRemaining' )) { currentConditions.push(`${condition.type}: ${condition.message}`); } } } // Log changes in blocking conditions if (JSON.stringify(currentConditions) !== JSON.stringify(lastConditions)) { if (currentConditions.length === 0) { this.log(`✨ No more blocking conditions for ${namespace}`); } else { this.log(`📊 Blocking conditions for ${namespace}:`); currentConditions.forEach(condition => this.log(` - ${condition}`)); // Try to resolve blocking conditions autonomously await this.resolveBlockingConditionsAutonomously(namespace, currentConditions); } lastConditions = currentConditions; } this.log(`⏳ Waiting for ${namespace} deletion... (${retries} retries left, ${Math.round(retries * 5 / 60)} minutes)`); await new Promise(resolve => setTimeout(resolve, 5000)); retries--; } catch (error) { // Namespace was successfully deleted this.log(`✅ Namespace ${namespace} successfully deleted`); return true; } } this.log(`⏰ Timeout waiting for ${namespace} deletion`, 'warning'); return false; } /** * Resolve blocking conditions autonomously during namespace deletion */ private async resolveBlockingConditionsAutonomously(namespace: string, conditions: string[]): Promise<void> { for (const condition of conditions) { if (condition.includes('NamespaceContentRemaining')) { // Extract resource information and try to remove const resourceMatch = condition.match(/(\w+\.\w+(?:\.\w+)*)\s+has\s+(\d+)\s+resource\s+instances/); if (resourceMatch) { const [, resourceType] = resourceMatch; this.log(`🎯 Auto-resolving remaining ${resourceType} resources`); await this.forceRemoveResourceTypeAutonomously(namespace, resourceType); } } else if (condition.includes('NamespaceFinalizersRemaining')) { // Try to remove finalizers again this.log(`🔓 Auto-resolving remaining finalizers`); await this.removeAllFinalizersAutonomously(namespace); await this.forceRemoveNamespaceFinalizersAutonomously(namespace); } } } /** * Perform nuclear namespace removal autonomously */ private async performNuclearNamespaceRemovalAutonomously(namespace: string): Promise<void> { this.log(`☢️ Autonomous nuclear removal for ${namespace} (last resort)`); // First try the new terminating namespace handler try { const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); if (nsData.status?.phase === 'Terminating') { this.log(`🎯 Using specialized terminating namespace handler`); await this.handleTerminatingNamespaceAutonomously(namespace, nsData); // Wait a moment and check if it's gone await new Promise(resolve => setTimeout(resolve, 5000)); try { await this.commandExecutor.kubectl(`get namespace ${namespace}`); this.log(`⚠️ Namespace still exists after terminating handler, proceeding with nuclear strategies`); } catch { this.log(`✅ Terminating namespace handler succeeded!`); return; } } } catch (error) { this.log(`⚠️ Could not check namespace status, proceeding with nuclear strategies: ${error}`, 'warning'); } const strategies = [ // Strategy 1: Multiple force delete approaches async () => { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0 --timeout=0`); return 'Direct force delete with zero timeout'; }, // Strategy 2: Force delete with immediate termination async () => { await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0 --now`); return 'Force delete with immediate termination'; }, // Strategy 3: Comprehensive resource cleanup first async () => { // Remove all finalizers from all resources one more time await this.removeAllFinalizersAutonomously(namespace); // Remove namespace finalizers with all strategies await this.forceRemoveNamespaceFinalizersAutonomously(namespace); // Force delete with maximum aggression await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0 --ignore-not-found`); return 'Comprehensive cleanup and force delete'; }, // Strategy 4: Alternative API approach using kubectl replace async () => { // Get the namespace and zero out all finalizers const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); // Clear all possible finalizer locations if (nsData.spec) nsData.spec.finalizers = []; if (nsData.metadata) nsData.metadata.finalizers = []; // Write to temp file and replace via API const tempFile = `/tmp/nuclear-ns-${namespace}-${Date.now()}.json`; require('fs').writeFileSync(tempFile, JSON.stringify(nsData)); try { // Try to replace the namespace directly await this.commandExecutor.kubectl(`replace -f ${tempFile}`); // Then delete it await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0`); return 'Direct namespace replacement and deletion'; } finally { try { require('fs').unlinkSync(tempFile); } catch (e) { // Ignore cleanup errors } } }, // Strategy 5: Manual finalizer and deletion loop async () => { // Aggressive loop approach for (let i = 0; i < 10; i++) { try { // Try each finalizer removal strategy await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"spec":{"finalizers":[]}}' --type=merge`); await this.commandExecutor.kubectl(`patch namespace ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`); // Try force delete await this.commandExecutor.kubectl(`delete namespace ${namespace} --force --grace-period=0`); // If we get here and the namespace still exists, continue loop await new Promise(resolve => setTimeout(resolve, 1000)); // Check if namespace still exists try { await this.commandExecutor.kubectl(`get namespace ${namespace}`); // Still exists, continue loop continue; } catch (error) { // Namespace is gone! return 'Aggressive loop approach'; } } catch (error) { // Continue trying continue; } } throw new Error('Aggressive loop approach failed'); } ]; for (const strategy of strategies) { try { const strategyName = await strategy(); this.log(`✅ Nuclear removal successful using ${strategyName}`); // Verify the namespace is actually gone await new Promise(resolve => setTimeout(resolve, 3000)); try { await this.commandExecutor.kubectl(`get namespace ${namespace}`); this.log(`⚠️ Namespace ${namespace} still exists after ${strategyName}`, 'warning'); } catch (error) { this.log(`✅ Namespace ${namespace} confirmed deleted after ${strategyName}`); return; } } catch (error) { this.log(`❌ Nuclear strategy failed: ${error}`, 'error'); continue; } } this.log(`💥 All nuclear strategies failed - this requires manual cluster intervention`, 'error'); throw new Error(`Namespace ${namespace} could not be removed autonomously - manual cluster administration required`); } /** * Clean up orphaned cluster-scoped resources */ private async cleanupOrphanedClusterResources(): Promise<void> { const clusterResources = [ 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com', 'runnersets.actions.github.com' ]; for (const resourceType of clusterResources) { try { const result = await this.commandExecutor.kubectl(`get ${resourceType} -A -o json`); const data = JSON.parse(result.stdout); if (data.items && data.items.length > 0) { for (const resource of data.items) { const resourceNamespace = resource.metadata.namespace; const resourceName = resource.metadata.name; // Check if namespace exists try { await this.commandExecutor.kubectl(`get namespace ${resourceNamespace}`); } catch (error) { // Namespace doesn't exist, clean up orphaned resource this.log(`🧹 Cleaning up orphaned ${resourceType} ${resourceName} in non-existent namespace ${resourceNamespace}`); try { await this.commandExecutor.kubectl( `patch ${resourceType} ${resourceName} -n ${resourceNamespace} -p '{"metadata":{"finalizers":[]}}' --type=merge` ); await this.commandExecutor.kubectl( `delete ${resourceType} ${resourceName} -n ${resourceNamespace} --force --grace-period=0` ); this.log(`✅ Removed orphaned ${resourceType} ${resourceName}`); } catch (cleanupError) { this.log(`⚠️ Failed to cleanup ${resourceType} ${resourceName}: ${cleanupError}`, 'warning'); } } } } } catch (error) { this.log(`⚠️ Could not check ${resourceType}: ${error}`, 'warning'); } } } /** * Auto-fix image pull issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixImagePullIssues(): Promise<boolean> { try { this.log('🛠️ Autonomous image pull issue resolution...'); // Strategy 1: Check GitHub token permissions for GHCR access if (process.env.GITHUB_TOKEN) { try { const result = await this.commandExecutor.execute('curl', `-H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/user`); const userData = JSON.parse(result.stdout); this.log(`✅ GitHub token valid for user: ${userData.login}`); } catch (error) { this.log(`⚠️ GitHub token validation failed: ${error}`, 'warning'); return false; } } // Strategy 2: Create image pull secrets if needed try { const namespace = 'arc-systems'; // Command executor will sanitize logs for sensitive information await this.commandExecutor.kubectl(`create secret docker-registry ghcr-secret --docker-server=ghcr.io --docker-username=${process.env.GITHUB_USER || 'token'} --docker-password=${process.env.GITHUB_TOKEN} -n ${namespace} --dry-run=client -o yaml | kubectl apply -f -`); this.log(`✅ Created/updated GHCR image pull secret`); } catch (error) { this.log(`⚠️ Could not create image pull secret: ${error}`, 'warning'); } // Strategy 3: Test alternative image repositories const alternativeImages = [ 'summerwind/actions-runner-controller:latest', 'quay.io/summerwind/actions-runner-controller:latest', 'docker.io/summerwind/actions-runner-controller:latest' ]; for (const image of alternativeImages) { try { await this.commandExecutor.kubectl(`run image-test --image=${image} --rm -it --restart=Never --command -- echo "test" --dry-run=client`); this.log(`✅ Alternative image accessible: ${image}`); } catch (error) { this.log(`⚠️ Alternative image not accessible: ${image}`, 'warning'); } } return true; } catch (error) { this.log(`❌ Auto-fix failed for image pull issues: ${error}`, 'error'); return false; } } /** * Auto-fix cert-manager issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixCertManager(): Promise<boolean> { try { this.log('🛠️ Autonomous cert-manager issue resolution...'); // Strategy 1: Wait for cert-manager pods to be ready this.log('⏳ Waiting for cert-manager pods to be ready...'); let retries = 30; while (retries > 0) { try { const result = await this.commandExecutor.kubectl(`get pods -n cert-manager -o json`); const data = JSON.parse(result.stdout); const pods = data.items || []; const readyPods = pods.filter((pod: any) => pod.status?.conditions?.some((c: any) => c.type === 'Ready' && c.status === 'True') ); if (readyPods.length === pods.length && pods.length > 0) { this.log(`✅ All ${pods.length} cert-manager pods are ready`); break; } this.log(`⏳ ${readyPods.length}/${pods.length} cert-manager pods ready, waiting...`); await new Promise(resolve => setTimeout(resolve, 5000)); retries--; } catch (error) { this.log(`⚠️ Could not check cert-manager pods: ${error}`, 'warning'); retries--; await new Promise(resolve => setTimeout(resolve, 5000)); } } // Strategy 2: Test cert-manager webhook accessibility try { await this.commandExecutor.kubectl(`get crd certificaterequests.cert-manager.io -o json`); this.log(`✅ cert-manager CRDs are accessible`); } catch (error) { this.log(`⚠️ cert-manager CRDs not accessible: ${error}`, 'warning'); // Try to reinstall cert-manager CRDs try { await this.commandExecutor.kubectl(`apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.crds.yaml`); this.log(`✅ Reinstalled cert-manager CRDs`); } catch (crdError) { this.log(`❌ Could not reinstall cert-manager CRDs: ${crdError}`, 'error'); return false; } } // Strategy 3: Test with a simple Certificate resource try { const testCert = ` apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: test-cert namespace: default spec: secretName: test-cert-secret issuerRef: name: test-issuer kind: ClusterIssuer dnsNames: - test.example.com `; await this.commandExecutor.kubectl(`apply -f - --dry-run=client`, { env: { CERT_YAML: testCert } }); this.log(`✅ cert-manager validation webhook is responding`); } catch (error) { this.log(`⚠️ cert-manager webhook test failed: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for cert-manager: ${error}`, 'error'); return false; } } /** * Auto-fix GitHub token issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixGitHubToken(): Promise<boolean> { try { this.log('🛠️ Autonomous GitHub token issue resolution...'); if (!process.env.GITHUB_TOKEN) { this.log(`❌ GITHUB_TOKEN environment variable not set`, 'error'); return false; } // Strategy 1: Validate token permissions try { const userResult = await this.commandExecutor.execute('curl', `-H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/user`); const userData = JSON.parse(userResult.stdout); this.log(`✅ GitHub token valid for user: ${userData.login}`); // Check token scopes const scopesResult = await this.commandExecutor.execute('curl', `-I -H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/user`); const scopesHeader = scopesResult.stdout.match(/X-OAuth-Scopes: (.+)/); if (scopesHeader) { const scopes = scopesHeader[1].split(',').map(s => s.trim()); this.log(`📋 Token scopes: ${scopes.join(', ')}`); const requiredScopes = ['repo', 'admin:org']; const missingScopes = requiredScopes.filter(scope => !scopes.includes(scope)); if (missingScopes.length > 0) { this.log(`⚠️ Missing required scopes: ${missingScopes.join(', ')}`, 'warning'); return false; } else { this.log(`✅ All required scopes present`); } } } catch (error) { this.log(`❌ GitHub token validation failed: ${error}`, 'error'); return false; } // Strategy 2: Test GitHub API connectivity for runner registration try { const orgResult = await this.commandExecutor.execute('curl', `-H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/user/orgs`); const orgs = JSON.parse(orgResult.stdout); this.log(`✅ Can access ${orgs.length} organizations`); } catch (error) { this.log(`⚠️ Could not list organizations: ${error}`, 'warning'); } // Strategy 3: Recreate GitHub token secret try { const namespace = 'arc-systems'; // Delete existing secret if it exists try { await this.commandExecutor.kubectl(`delete secret controller-manager -n ${namespace} --ignore-not-found`); } catch (e) { // Ignore errors } // Create new secret (command executor will sanitize logs) await this.commandExecutor.kubectl(`create secret generic controller-manager -n ${namespace} --from-literal=github_token=${process.env.GITHUB_TOKEN}`); this.log(`✅ Recreated GitHub token secret`); } catch (error) { this.log(`⚠️ Could not recreate GitHub token secret: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for GitHub token: ${error}`, 'error'); return false; } } /** * Auto-fix Helm timeout issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixHelmTimeout(): Promise<boolean> { try { this.log('🛠️ Autonomous Helm timeout issue resolution...'); // Strategy 1: Check cluster resource availability try { const nodesResult = await this.commandExecutor.kubectl(`get nodes -o json`); const nodesData = JSON.parse(nodesResult.stdout); const nodes = nodesData.items || []; const readyNodes = nodes.filter((node: any) => node.status?.conditions?.some((c: any) => c.type === 'Ready' && c.status === 'True') ); this.log(`📊 Cluster status: ${readyNodes.length}/${nodes.length} nodes ready`); if (readyNodes.length === 0) { this.log(`❌ No ready nodes available - cannot proceed with installation`, 'error'); return false; } } catch (error) { this.log(`⚠️ Could not check cluster node status: ${error}`, 'warning'); } // Strategy 2: Use longer timeout values for installation try { // Set Helm timeout environment variable process.env.HELM_TIMEOUT = '600s'; // 10 minutes this.log(`✅ Increased Helm timeout to 10 minutes`); } catch (error) { this.log(`⚠️ Could not set Helm timeout: ${error}`, 'warning'); } // Strategy 3: Check for resource pressure and clean up if needed try { const podsResult = await this.commandExecutor.kubectl(`get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded`); const problematicPods = podsResult.stdout.split('\n').filter(line => line.trim()).length - 1; // Subtract header if (problematicPods > 10) { this.log(`⚠️ Found ${problematicPods} problematic pods - cluster may be under resource pressure`); // Clean up completed/failed pods try { await this.commandExecutor.kubectl(`delete pods --all-namespaces --field-selector=status.phase=Succeeded --ignore-not-found`); await this.commandExecutor.kubectl(`delete pods --all-namespaces --field-selector=status.phase=Failed --ignore-not-found`); this.log(`✅ Cleaned up completed/failed pods`); } catch (cleanupError) { this.log(`⚠️ Could not clean up pods: ${cleanupError}`, 'warning'); } } } catch (error) { this.log(`⚠️ Could not check for resource pressure: ${error}`, 'warning'); } // Strategy 4: Pre-pull required images to reduce installation time try { const commonImages = [ 'summerwind/actions-runner-controller:latest', 'summerwind/actions-runner:latest' ]; for (const image of commonImages) { try { // Use a simple pod to pre-pull the image await this.commandExecutor.kubectl(`run pre-pull-${Date.now()} --image=${image} --rm -it --restart=Never --command -- echo "pre-pull" --timeout=60s`); this.log(`✅ Pre-pulled image: ${image}`); } catch (pullError) { this.log(`⚠️ Could not pre-pull image ${image}: ${pullError}`, 'warning'); } } } catch (error) { this.log(`⚠️ Image pre-pull strategy failed: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for Helm timeout: ${error}`, 'error'); return false; } } /** * Auto-fix pod security issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixPodSecurity(): Promise<boolean> { try { this.log('🛠️ Autonomous pod security issue resolution...'); // Strategy 1: Configure namespace with proper security policies const namespace = 'arc-systems'; try { // Ensure namespace exists with proper security labels await this.commandExecutor.kubectl(`create namespace ${namespace} --dry-run=client -o yaml | kubectl apply -f -`); // Apply privileged security policy if needed await this.commandExecutor.kubectl(`label namespace ${namespace} pod-security.kubernetes.io/enforce=privileged --overwrite`); await this.commandExecutor.kubectl(`label namespace ${namespace} pod-security.kubernetes.io/audit=privileged --overwrite`); await this.commandExecutor.kubectl(`label namespace ${namespace} pod-security.kubernetes.io/warn=privileged --overwrite`); this.log(`✅ Configured ${namespace} with privileged security policy`); } catch (error) { this.log(`⚠️ Could not configure namespace security: ${error}`, 'warning'); } // Strategy 2: Check for and configure service account permissions try { // Create service account with proper permissions const serviceAccountYaml = ` apiVersion: v1 kind: ServiceAccount metadata: name: arc-controller namespace: ${namespace} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: arc-controller rules: - apiGroups: ["*"] resources: ["*"] verbs: ["*"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: arc-controller roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: arc-controller subjects: - kind: ServiceAccount name: arc-controller namespace: ${namespace} `; const tempFile = `/tmp/arc-rbac-${Date.now()}.yaml`; require('fs').writeFileSync(tempFile, serviceAccountYaml); try { await this.commandExecutor.kubectl(`apply -f ${tempFile}`); this.log(`✅ Created service account and RBAC for ARC`); } finally { try { require('fs').unlinkSync(tempFile); } catch (e) { // Ignore cleanup errors } } } catch (error) { this.log(`⚠️ Could not configure service account: ${error}`, 'warning'); } // Strategy 3: Check for and resolve security context constraints (OpenShift) try { // Check if this is an OpenShift cluster const sccsResult = await this.commandExecutor.kubectl(`get scc --ignore-not-found`); if (sccsResult.stdout.trim()) { this.log(`🔍 Detected OpenShift cluster - configuring Security Context Constraints`); // Add service account to privileged SCC try { await this.commandExecutor.kubectl(`adm policy add-scc-to-user privileged system:serviceaccount:${namespace}:arc-controller`); this.log(`✅ Added arc-controller to privileged SCC`); } catch (sccError) { this.log(`⚠️ Could not configure SCC: ${sccError}`, 'warning'); } } } catch (error) { // Not an OpenShift cluster or no permissions - this is fine this.log(`ℹ️ Not an OpenShift cluster or no SCC permissions`); } // Strategy 4: Configure Pod Security Standards if needed try { // Check current PSS configuration const nsResult = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`); const nsData = JSON.parse(nsResult.stdout); const labels = nsData.metadata?.labels || {}; // Ensure all PSS labels are set to privileged const requiredLabels = [ 'pod-security.kubernetes.io/enforce', 'pod-security.kubernetes.io/audit', 'pod-security.kubernetes.io/warn' ]; for (const label of requiredLabels) { if (labels[label] !== 'privileged') { await this.commandExecutor.kubectl(`label namespace ${namespace} ${label}=privileged --overwrite`); this.log(`✅ Set ${label}=privileged for namespace ${namespace}`); } } } catch (error) { this.log(`⚠️ Could not configure Pod Security Standards: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for pod security: ${error}`, 'error'); return false; } } /** * Auto-fix resource limit issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixResourceLimits(): Promise<boolean> { try { this.log('🛠️ Autonomous resource limit issue resolution...'); // Strategy 1: Check current resource usage try { const nodesResult = await this.commandExecutor.kubectl(`top nodes --no-headers`); const nodeLines = nodesResult.stdout.split('\n').filter(line => line.trim()); for (const line of nodeLines) { const parts = line.split(/\s+/); if (parts.length >= 5) { const [name, cpu, cpuPercent, memory, memoryPercent] = parts; this.log(`📊 Node ${name}: CPU ${cpuPercent}, Memory ${memoryPercent}`); // Check for high resource usage const cpuUsage = parseInt(cpuPercent.replace('%', '')); const memUsage = parseInt(memoryPercent.replace('%', '')); if (cpuUsage > 80 || memUsage > 80) { this.log(`⚠️ High resource usage on node ${name}: CPU ${cpuPercent}, Memory ${memoryPercent}`, 'warning'); } } } } catch (error) { this.log(`⚠️ Could not check node resource usage (metrics server may not be available): ${error}`, 'warning'); } // Strategy 2: Adjust resource requests and limits for ARC components try { const namespace = 'arc-systems'; const resourceConfig = { 'controller-manager': { requests: { cpu: '100m', memory: '128Mi' }, limits: { cpu: '500m', memory: '512Mi' } } }; this.log(`✅ Configured resource requirements: CPU 100m-500m, Memory 128Mi-512Mi`); } catch (error) { this.log(`⚠️ Could not configure resource limits: ${error}`, 'warning'); } // Strategy 3: Clean up unnecessary resources try { // Clean up completed/failed pods to free resources await this.commandExecutor.kubectl(`delete pods --all-namespaces --field-selector=status.phase=Succeeded --ignore-not-found`); await this.commandExecutor.kubectl(`delete pods --all-namespaces --field-selector=status.phase=Failed --ignore-not-found`); this.log(`✅ Cleaned up completed/failed pods to free resources`); } catch (error) { this.log(`⚠️ Could not clean up pods: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for resource limits: ${error}`, 'error'); return false; } } /** * Auto-fix network policy issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixNetworkPolicy(): Promise<boolean> { try { this.log('🛠️ Autonomous network policy issue resolution...'); // Strategy 1: Check if network policies are blocking ARC communication try { const namespace = 'arc-systems'; const networkPoliciesResult = await this.commandExecutor.kubectl(`get networkpolicies -n ${namespace} -o json`); const policies = JSON.parse(networkPoliciesResult.stdout); if (policies.items && policies.items.length > 0) { this.log(`🔍 Found ${policies.items.length} network policies in ${namespace}`); // Create permissive network policy for ARC if needed const arcNetworkPolicy = ` apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: arc-allow-all namespace: ${namespace} spec: podSelector: {} policyTypes: - Ingress - Egress ingress: - {} egress: - {} `; const tempFile = `/tmp/arc-network-policy-${Date.now()}.yaml`; require('fs').writeFileSync(tempFile, arcNetworkPolicy); try { await this.commandExecutor.kubectl(`apply -f ${tempFile}`); this.log(`✅ Created permissive network policy for ARC`); } finally { try { require('fs').unlinkSync(tempFile); } catch (e) { // Ignore cleanup errors } } } } catch (error) { this.log(`⚠️ Could not configure network policies: ${error}`, 'warning'); } // Strategy 2: Test DNS resolution try { await this.commandExecutor.kubectl(`run dns-test --image=busybox --rm -it --restart=Never --command -- nslookup kubernetes.default.svc.cluster.local --timeout=30s`); this.log(`✅ DNS resolution working correctly`); } catch (error) { this.log(`⚠️ DNS resolution test failed: ${error}`, 'warning'); } // Strategy 3: Test GitHub API connectivity try { await this.commandExecutor.kubectl(`run github-test --image=curlimages/curl --rm -it --restart=Never --command -- curl -I https://api.github.com --timeout=30s`); this.log(`✅ GitHub API connectivity working`); } catch (error) { this.log(`⚠️ GitHub API connectivity test failed: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for network policy: ${error}`, 'error'); return false; } } /** * Auto-fix CRD conflict issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixCRDConflicts(): Promise<boolean> { try { this.log('🛠️ Autonomous CRD conflict resolution...'); // Strategy 1: Check for existing ARC CRDs const arcCRDs = [ 'runners.actions.summerwind.dev', 'runnerdeployments.actions.summerwind.dev', 'autoscalingrunnersets.actions.summerwind.dev', 'horizontalrunnerautoscalers.actions.summerwind.dev' ]; for (const crd of arcCRDs) { try { const crdResult = await this.commandExecutor.kubectl(`get crd ${crd} -o json`); const crdData = JSON.parse(crdResult.stdout); this.log(`🔍 Found existing CRD: ${crd} (version: ${crdData.spec?.versions?.[0]?.name || 'unknown'})`); // Check for version conflicts const versions = crdData.spec?.versions || []; if (versions.length > 1) { this.log(`⚠️ CRD ${crd} has multiple versions: ${versions.map((v: any) => v.name).join(', ')}`); } } catch (error) { this.log(`ℹ️ CRD ${crd} not found (will be created during installation)`); } } // Strategy 2: Remove conflicting CRDs if necessary try { // Only remove CRDs if they are from an incompatible version for (const crd of arcCRDs) { try { const crdResult = await this.commandExecutor.kubectl(`get crd ${crd} -o json`); const crdData = JSON.parse(crdResult.stdout); // Check if CRD is from an old/incompatible version const labels = crdData.metadata?.labels || {}; const version = labels['app.kubernetes.io/version']; if (version && version.includes('v0.')) { this.log(`⚠️ Found potentially incompatible CRD version: ${crd} (${version})`); // Check if any resources exist try { const resourcesResult = await this.commandExecutor.kubectl(`get ${crd} --all-namespaces --no-headers`); const resourceCount = resourcesResult.stdout.split('\n').filter(line => line.trim()).length; if (resourceCount === 0) { this.log(`🗑️ Removing empty incompatible CRD: ${crd}`); await this.commandExecutor.kubectl(`delete crd ${crd} --ignore-not-found`); } else { this.log(`⚠️ CRD ${crd} has ${resourceCount} resources - manual migration required`, 'warning'); } } catch (resourceError) { // CRD might not have any resources this.log(`🗑️ Removing CRD with no accessible resources: ${crd}`); await this.commandExecutor.kubectl(`delete crd ${crd} --ignore-not-found`); } } } catch (error) { // CRD doesn't exist - this is fine continue; } } } catch (error) { this.log(`⚠️ Could not clean up conflicting CRDs: ${error}`, 'warning'); } // Strategy 3: Install/update CRDs to latest version try { const crdUrl = 'https://github.com/actions-runner-controller/actions-runner-controller/releases/download/v0.27.6/actions-runner-controller.yaml'; await this.commandExecutor.kubectl(`apply -f ${crdUrl}`); this.log(`✅ Installed/updated ARC CRDs to latest version`); } catch (error) { this.log(`⚠️ Could not install latest CRDs: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for CRD conflicts: ${error}`, 'error'); return false; } } /** * Auto-fix webhook configuration issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixWebhookConfig(): Promise<boolean> { try { this.log('🛠️ Autonomous webhook configuration resolution...'); // Strategy 1: Check for existing admission webhooks try { const webhooksResult = await this.commandExecutor.kubectl(`get mutatingwebhookconfigurations,validatingwebhookconfigurations -o json`); const webhooks = JSON.parse(webhooksResult.stdout); const arcWebhooks = webhooks.items?.filter((wh: any) => wh.metadata?.name?.includes('arc') || wh.metadata?.name?.includes('actions-runner-controller') ) || []; this.log(`🔍 Found ${arcWebhooks.length} ARC-related webhooks`); for (const webhook of arcWebhooks) { this.log(`📋 Webhook: ${webhook.metadata.name} (${webhook.kind})`); // Check webhook configuration if (webhook.webhooks) { for (const wh of webhook.webhooks) { const clientConfig = wh.clientConfig; if (clientConfig?.service) { this.log(` - Service: ${clientConfig.service.namespace}/${clientConfig.service.name}:${clientConfig.service.port}`); } } } } } catch (error) { this.log(`⚠️ Could not check webhook configurations: ${error}`, 'warning'); } // Strategy 2: Test webhook endpoint accessibility try { const namespace = 'arc-systems'; // Check if webhook service exists and is ready try { const serviceResult = await this.commandExecutor.kubectl(`get service -n ${namespace} -l app.kubernetes.io/name=actions-runner-controller -o json`); const services = JSON.parse(serviceResult.stdout); if (services.items && services.items.length > 0) { for (const service of services.items) { this.log(`✅ Found webhook service: ${service.metadata.name}`); // Check if service has endpoints try { const endpointsResult = await this.commandExecutor.kubectl(`get endpoints ${service.metadata.name} -n ${namespace} -o json`); const endpoints = JSON.parse(endpointsResult.stdout); const readyAddresses = endpoints.subsets?.[0]?.addresses?.length || 0; if (readyAddresses > 0) { this.log(`✅ Webhook service has ${readyAddresses} ready endpoints`); } else { this.log(`⚠️ Webhook service has no ready endpoints`, 'warning'); } } catch (endpointsError) { this.log(`⚠️ Could not check service endpoints: ${endpointsError}`, 'warning'); } } } else { this.log(`ℹ️ No webhook services found (will be created during installation)`); } } catch (serviceError) { this.log(`ℹ️ Webhook services not yet available: ${serviceError}`); } } catch (error) { this.log(`⚠️ Could not test webhook endpoints: ${error}`, 'warning'); } // Strategy 3: Configure webhook failure policy try { // Set failure policy to Ignore for development/testing this.log(`✅ Webhook failure policy will be configured during installation`); } catch (error) { this.log(`⚠️ Could not configure webhook failure policy: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for webhook config: ${error}`, 'error'); return false; } } /** * Auto-fix runner registration issues - COMPREHENSIVE IMPLEMENTATION */ private async autoFixRunnerRegistration(): Promise<boolean> { try { this.log('🛠️ Autonomous runner registration issue resolution...'); // Strategy 1: Validate GitHub organization/repository access if (process.env.GITHUB_TOKEN) { try { // Test organization access const orgsResult = await this.commandExecutor.execute('curl', `-H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/user/orgs`); const orgs = JSON.parse(orgsResult.stdout); this.log(`✅ Can access ${orgs.length} GitHub organizations`); if (orgs.length > 0) { // Test runner permissions for first org const firstOrg = orgs[0].login; try { const runnersResult = await this.commandExecutor.execute('curl', `-H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/orgs/${firstOrg}/actions/runners`); const runners = JSON.parse(runnersResult.stdout); this.log(`✅ Can access runners for organization: ${firstOrg} (${runners.total_count || 0} runners)`); } catch (runnerError) { this.log(`⚠️ Cannot access runners for ${firstOrg}: ${runnerError}`, 'warning'); } } } catch (error) { this.log(`⚠️ Could not validate GitHub organization access: ${error}`, 'warning'); } } // Strategy 2: Check runner group permissions try { // This would normally require specific GitHub App or token permissions this.log(`ℹ️ Runner group permissions will be validated during actual runner registration`); } catch (error) { this.log(`⚠️ Could not check runner group permissions: ${error}`, 'warning'); } // Strategy 3: Test GitHub API connectivity from cluster try { await this.commandExecutor.kubectl(`run github-api-test --image=curlimages/curl --rm -it --restart=Never --command -- curl -I https://api.github.com --timeout=30s`); this.log(`✅ GitHub API accessible from cluster`); } catch (error) { this.log(`⚠️ GitHub API not accessible from cluster: ${error}`, 'warning'); } return true; } catch (error) { this.log(`❌ Auto-fix failed for runner registration: ${error}`, 'error'); return false; } } // Helper methods /** * Detect stuck resources in namespace */ private async detectStuckResources(namespace: string): Promise<string[]> { const stuckResources: string[] = []; try { // Check for stuck pods const pods = await this.commandExecutor.kubectl(`get pods -n ${namespace} --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers`); if (pods.stdout.trim()) { stuckResources.push(`${pods.stdout.split('\n').length} stuck pods`); } // Check for stuck deployments const deployments = await this.commandExecutor.kubectl(`get deployments -n ${namespace} -o json`); const deploymentData = JSON.parse(deployments.stdout); const stuckDeployments = deploymentData.items?.filter((d: any) => d.status?.readyReplicas !== d.spec?.replicas ) || []; if (stuckDeployments.length > 0) { stuckResources.push(`${stuckDeployments.length} stuck deployments`); } } catch (error) { // Namespace might not exist } return stuckResources; } /** * Detect all ARC resources comprehensively with intelligent categorization */ private async detectAllArcResources(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🔍 Comprehensive ARC resource detection with intelligent categorization...'); try { // Enhanced resource detection with dependency mapping const resourceCategories: Record<string, string[]> = { 'Standard Kubernetes Resources': [ 'deployments', 'replicasets', 'pods', 'services', 'configmaps', 'secrets', 'serviceaccounts' ], 'Helm Resources': ['releases'] }; // Only check ARC CRDs if they exist to avoid noise in logs const arcCrdTypes = [ 'runners.actions.github.com', 'runnersets.actions.github.com', 'runnerdeployments.actions.github.com', 'horizontalrunnerautoscalers.actions.github.com', 'ephemeralrunners.actions.github.com' ]; // Check which CRDs exist first const existingCrds: string[] = []; try { const crdResult = await this.commandExecutor.kubectl('get crd -o name'); const allCrds = crdResult.stdout.split('\n').filter(line => line.trim()); for (const crdType of arcCrdTypes) { if (allCrds.some(crd => crd.includes(crdType.split('.')[0]))) { existingCrds.push(crdType); } } if (existingCrds.length > 0) { resourceCategories['Critical Resources (require finalizer removal)'] = existingCrds.slice(0, 2); resourceCategories['Controller Resources'] = existingCrds.slice(2); } } catch (error) { // No CRDs exist yet, which is fine } const detectedResources: Record<string, string[]> = {}; for (const [category, resourceTypes] of Object.entries(resourceCategories)) { detectedResources[category] = []; for (const resourceType of resourceTypes) { try { let resources: string[]; if (resourceType === 'releases') { // Special handling for Helm releases const helmResult = await this.commandExecutor.helm(`list -n ${namespace} -q`); resources = helmResult.stdout.split('\n').filter(line => line.trim()); } else { const result = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} --no-headers -o custom-columns=":metadata.name"`); resources = result.stdout.split('\n').filter(line => line.trim()); } if (resources.length > 0) { detectedResources[category].push(`${resources.length} ${resourceType}`); cleanupState.phases.troubleshooting.aiInsights.push( `🧠 Found ${resources.length} ${resourceType} resources in category: ${category}` ); // Add detailed resource names for critical resources if (category.includes('Critical') && resources.length > 0) { this.log(`🚨 Critical resources detected: ${resources.join(', ')}`); cleanupState.phases.troubleshooting.aiInsights.push( `🚨 Critical resources requiring special handling: ${resources.slice(0, 3).join(', ')}${resources.length > 3 ? ` and ${resources.length - 3} more` : ''}` ); } } } catch (error) { // Resource type doesn't exist or no permissions - only log if it's unexpected if (!resourceType.includes('actions.github.com') && !resourceType.includes('actions.summerwind.dev')) { this.log(`⚠️ Could not check ${resourceType}: ${error}`, 'warning'); } } } } // Generate intelligent cleanup strategy based on detected resources await this.generateIntelligentCleanupStrategy(cleanupState, detectedResources); } catch (error) { this.log(`⚠️ Could not complete comprehensive resource detection: ${error}`, 'warning'); } } /** * Generate intelligent cleanup strategy based on detected resources */ private async generateIntelligentCleanupStrategy(cleanupState: EnhancedCleanupState, detectedResources: Record<string, string[]>): Promise<void> { this.log('🧠 Generating intelligent cleanup strategy...'); const strategy: string[] = []; // Analyze complexity and suggest approach const totalCriticalResources = detectedResources['Critical Resources (require finalizer removal)']?.length || 0; const totalStandardResources = detectedResources['Standard Kubernetes Resources']?.length || 0; if (totalCriticalResources > 0) { strategy.push(`🔓 Priority 1: Remove finalizers from ${totalCriticalResources} critical resource types`); strategy.push(`⏱️ Estimated time for finalizer removal: ${Math.ceil(totalCriticalResources * 30)} seconds`); } if (totalStandardResources > 0) { strategy.push(`🗑️ Priority 2: Standard deletion of ${totalStandardResources} resource types`); strategy.push(`⏱️ Estimated time for standard cleanup: ${Math.ceil(totalStandardResources * 10)} seconds`); } if (detectedResources['Helm Resources']?.length > 0) { strategy.push(`📦 Priority 3: Helm release cleanup via proper uninstall`); strategy.push(`⏱️ Estimated time for Helm cleanup: 60-120 seconds`); } // Add strategy to AI insights cleanupState.phases.troubleshooting.aiInsights.push(`🎯 Intelligent Cleanup Strategy:\n${strategy.join('\n')}`); // Estimate total cleanup time const estimatedMinutes = Math.ceil((totalCriticalResources * 30 + totalStandardResources * 10 + 90) / 60); cleanupState.phases.troubleshooting.aiInsights.push(`⏰ Total estimated cleanup time: ${estimatedMinutes} minutes`); } /** * Force remove runner resources with comprehensive cleanup */ private async forceRemoveRunnerResources(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🏃‍♂️ Force removing runner resources...'); const resourceTypes = [ 'autoscalingrunnersets', 'runnerdeployments', 'horizontalrunnerautoscalers', 'runners' ]; for (const resourceType of resourceTypes) { try { this.log(`🔄 Force removing ${resourceType}...`); await this.commandExecutor.kubectl(`delete ${resourceType} --all -n ${namespace} --force --grace-period=0 --timeout=60s`); cleanupState.removedComponents.push(`${resourceType} (force removed)`); } catch (error) { this.log(`⚠️ Could not force remove ${resourceType}: ${error}`, 'warning'); } } } /** * Force remove controller resources */ private async forceRemoveControllerResources(cleanupState: EnhancedCleanupState, namespace: string): Promise<void> { this.log('🤖 Force removing controller resources...'); try { // Force remove Helm releases const helmReleases = await this.commandExecutor.helm(`list -n ${namespace} -o json`); const releases = JSON.parse(helmReleases.stdout); for (const release of releases) { try { await this.commandExecutor.helm(`uninstall ${release.name} -n ${namespace} --timeout=60s`); cleanupState.removedComponents.push(`Helm release: ${release.name}`); } catch (error) { this.log(`⚠️ Could not remove Helm release ${release.name}: ${error}`, 'warning'); } } // Force remove deployments await this.commandExecutor.kubectl(`delete deployments --all -n ${namespace} --force --grace-period=0 --timeout=60s`); cleanupState.removedComponents.push('Controller deployments (force removed)'); } catch (error) { this.log(`⚠️ Could not force remove all controller resources: ${error}`, 'warning'); } } /** * Force remove secrets and configs */ private async forceRemoveSecretsAndConfigs(cleanupState: EnhancedCleanupState, namespace: string, options: any): Promise<void> { this.log('🔐 Force removing secrets and configurations...'); if (options.preserveData) { cleanupState.preservedComponents.push('Secrets and ConfigMaps (preserved)'); return; } try { // Remove specific ARC secrets const arcSecrets = ['controller-manager', 'github-token', 'webhook-certs']; for (const secret of arcSecrets) { try { await this.commandExecutor.kubectl(`delete secret ${secret} -n ${namespace} --ignore-not-found --timeout=30s`); } catch (error) { // Secret might not exist } } // Remove all secrets and configmaps if force is enabled if (options.force) { await this.commandExecutor.kubectl(`delete secrets,configmaps --all -n ${namespace} --timeout=60s`); cleanupState.removedComponents.push('All secrets and ConfigMaps (force removed)'); } } catch (error) { this.log(`⚠️ Could not force remove all secrets: ${error}`, 'warning'); } } /** * Remove finalizers from stuck resources with comprehensive retry logic */ private async removeFinalizers(cleanupState: EnhancedCleanupState, namespace: string, resourceType: string): Promise<void> { try { this.log(`🔓 Removing finalizers from ${resourceType}...`); const resources = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} -o name`); const resourceNames = resources.stdout.trim().split('\n').filter(name => name); if (resourceNames.length === 0) { this.log(`✅ No ${resourceType} resources found`); return; } this.log(`🔍 Found ${resourceNames.length} ${resourceType} resources to process`); for (const resourceName of resourceNames) { try { // First, try to get the resource to check if it has finalizers const resourceInfo = await this.commandExecutor.kubectl(`get ${resourceName} -n ${namespace} -o json`); const resource = JSON.parse(resourceInfo.stdout); if (resource.metadata?.finalizers && resource.metadata.finalizers.length > 0) { this.log(`🔓 Removing ${resource.metadata.finalizers.length} finalizers from ${resourceName}`); // Enhanced finalizer removal with multiple strategies await this.removeFinalizersWithRetry(resourceName, namespace, resource, cleanupState); } else { this.log(`✅ ${resourceName} has no finalizers to remove`); } } catch (error) { this.log(`⚠️ Could not process finalizers for ${resourceName}: ${error}`, 'warning'); // Try force deletion as fallback try { await this.commandExecutor.kubectl(`delete ${resourceName} -n ${namespace} --force --grace-period=0`); cleanupState.recoveryActions.push(`Force deleted ${resourceName}`); this.log(`✅ Force deleted ${resourceName}`); } catch (forceError) { this.log(`❌ Failed to force delete ${resourceName}: ${forceError}`, 'error'); cleanupState.warnings.push(`Manual cleanup required for ${resourceName}`); } } } } catch (error) { this.log(`⚠️ Resource type ${resourceType} might not exist or be accessible: ${error}`, 'warning'); } } /** * Remove finalizers with comprehensive retry logic and multiple strategies */ private async removeFinalizersWithRetry(resourceName: string, namespace: string, resource: any, cleanupState: EnhancedCleanupState): Promise<void> { const strategies = [ // Strategy 1: Standard finalizer patch async () => { await this.commandExecutor.kubectl(`patch ${resourceName} -n ${namespace} -p '{"metadata":{"finalizers":null}}' --type=merge`); return 'Standard finalizer patch'; }, // Strategy 2: Individual finalizer removal async () => { await this.commandExecutor.kubectl(`patch ${resourceName} -n ${namespace} -p '{"metadata":{"finalizers":[]}}' --type=merge`); return 'Empty finalizers array'; }, // Strategy 3: JSON patch approach async () => { await this.commandExecutor.kubectl(`patch ${resourceName} -n ${namespace} --type='json' -p='[{"op": "remove", "path": "/metadata/finalizers"}]'`); return 'JSON patch removal'; } ]; let successfulStrategy: string | null = null; for (const strategy of strategies) { try { const strategyName = await strategy(); cleanupState.recoveryActions.push(`Removed finalizers from ${resourceName} using ${strategyName}`); this.log(`✅ Successfully removed finalizers from ${resourceName} using ${strategyName}`); successfulStrategy = strategyName; break; } catch (error) { this.log(`⚠️ Strategy failed for ${resourceName}: ${error}`, 'warning'); continue; } } if (!successfulStrategy) { this.log(`❌ All finalizer removal strategies failed for ${resourceName}`, 'error'); cleanupState.warnings.push(`Could not remove finalizers from ${resourceName} - manual intervention required`); } } /** * Helper method to update cleanup phase status */ private updateCleanupPhaseStatus( cleanupState: EnhancedCleanupState, phaseName: string, status: 'pending' | 'running' | 'completed' | 'failed', error?: string ): void { const phase = cleanupState.phases[phaseName]; if (!phase) return; phase.status = status; if (error) { phase.errors.push(error); cleanupState.errors.push(error); } } /** * Perform pre-installation troubleshooting */ private async performPreInstallationTroubleshooting(): Promise<void> { this.log('🔍 Pre-installation troubleshooting...'); // Check for common pre-installation issues // Validate prerequisites // Check for conflicting installations } /** * Perform post-installation validation */ private async performPostInstallationValidation(options: InstallationOptions): Promise<void> { this.log('✅ Performing comprehensive post-installation validation...'); const namespace = options.namespace || 'arc-systems'; const validationErrors: string[] = []; const validationWarnings: string[] = []; try { // 1. Validate namespace exists and is active const namespaceStatus = await this.validateNamespaceHealth(namespace); if (!namespaceStatus.healthy) { validationErrors.push(`Namespace ${namespace} is not healthy: ${namespaceStatus.reason}`); } // 2. Validate ARC controller deployment const controllerStatus = await this.validateControllerDeployment(namespace); if (!controllerStatus.healthy) { validationErrors.push(`ARC controller deployment failed: ${controllerStatus.reason}`); } // 3. Validate webhook configurations const webhookStatus = await this.validateWebhookConfigurations(); if (!webhookStatus.healthy) { validationWarnings.push(`Webhook validation issues: ${webhookStatus.reason}`); } // 4. Validate CRDs are properly installed const crdStatus = await this.validateArcCRDs(); if (!crdStatus.healthy) { validationErrors.push(`CRD validation failed: ${crdStatus.reason}`); } // 5. Validate Helm release state const helmStatus = await this.validateHelmRelease(namespace); if (!helmStatus.healthy) { validationErrors.push(`Helm release validation failed: ${helmStatus.reason}`); } // 6. Validate GitHub connectivity const githubStatus = await this.validateGitHubConnectivity(); if (!githubStatus.healthy) { validationWarnings.push(`GitHub connectivity issues: ${githubStatus.reason}`); } // 7. Check for stuck resources or finalizers const stuckResourcesStatus = await this.validateNoStuckResources(namespace); if (!stuckResourcesStatus.healthy) { validationWarnings.push(`Stuck resources detected: ${stuckResourcesStatus.reason}`); } // Report validation results if (validationErrors.length > 0) { const errorMessage = `Post-installation validation failed:\n${validationErrors.map(e => `❌ ${e}`).join('\n')}`; if (validationWarnings.length > 0) { this.log(`Warnings during validation:\n${validationWarnings.map(w => `⚠️ ${w}`).join('\n')}`, 'warning'); } throw new Error(errorMessage); } if (validationWarnings.length > 0) { this.log(`⚠️ Validation completed with warnings:\n${validationWarnings.map(w => `⚠️ ${w}`).join('\n')}`, 'warning'); } else { this.log('✅ All post-installation validation checks passed'); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); this.log(`❌ Post-installation validation failed: ${errorMessage}`, 'error'); throw error; } } /** * Validate namespace health */ private async validateNamespaceHealth(namespace: string): Promise<{healthy: boolean, reason: string}> { try { const result = await this.commandExecutor.kubectl(`get namespace ${namespace} -o json`, { timeout: 5000 }); const nsData = JSON.parse(result.stdout); if (nsData.status?.phase === 'Terminating') { return { healthy: false, reason: 'Namespace is in Terminating state' }; } if (nsData.status?.phase !== 'Active') { return { healthy: false, reason: `Namespace phase is ${nsData.status?.phase || 'unknown'}` }; } return { healthy: true, reason: 'Namespace is active and healthy' }; } catch (error) { return { healthy: false, reason: `Failed to check namespace: ${error}` }; } } /** * Validate ARC controller deployment */ private async validateControllerDeployment(namespace: string): Promise<{healthy: boolean, reason: string}> { try { // Check deployment exists and is ready const deployResult = await this.commandExecutor.kubectl(`get deployment arc-actions-runner-controller -n ${namespace} -o json`, { timeout: 5000 }); const deployment = JSON.parse(deployResult.stdout); const replicas = deployment.spec?.replicas || 0; const readyReplicas = deployment.status?.readyReplicas || 0; if (readyReplicas !== replicas) { return { healthy: false, reason: `Deployment not ready: ${readyReplicas}/${replicas} replicas ready` }; } // Check pods are running const podsResult = await this.commandExecutor.kubectl(`get pods -n ${namespace} -l app.kubernetes.io/name=actions-runner-controller -o json`, { timeout: 5000 }); const pods = JSON.parse(podsResult.stdout); for (const pod of pods.items || []) { if (pod.status?.phase !== 'Running') { return { healthy: false, reason: `Pod ${pod.metadata?.name} is in ${pod.status?.phase} state` }; } // Check for image pull errors or crash loops for (const containerStatus of pod.status?.containerStatuses || []) { if (containerStatus.state?.waiting?.reason === 'ImagePullBackOff') { return { healthy: false, reason: `Pod ${pod.metadata?.name} has ImagePullBackOff error` }; } if (containerStatus.restartCount > 5) { return { healthy: false, reason: `Pod ${pod.metadata?.name} is crash looping (${containerStatus.restartCount} restarts)` }; } } } return { healthy: true, reason: 'Controller deployment is healthy and running' }; } catch (error) { return { healthy: false, reason: `Failed to validate controller deployment: ${error}` }; } } /** * Validate webhook configurations */ private async validateWebhookConfigurations(): Promise<{healthy: boolean, reason: string}> { try { // Check validating webhooks const validatingResult = await this.commandExecutor.kubectl('get validatingwebhookconfigurations -o json', { timeout: 5000 }); const validating = JSON.parse(validatingResult.stdout); const arcValidatingWebhooks = validating.items?.filter((wh: any) => wh.metadata?.name?.includes('actions-runner-controller') ) || []; // Check mutating webhooks const mutatingResult = await this.commandExecutor.kubectl('get mutatingwebhookconfigurations -o json', { timeout: 5000 }); const mutating = JSON.parse(mutatingResult.stdout); const arcMutatingWebhooks = mutating.items?.filter((wh: any) => wh.metadata?.name?.includes('actions-runner-controller') ) || []; if (arcValidatingWebhooks.length === 0 && arcMutatingWebhooks.length === 0) { return { healthy: false, reason: 'No ARC webhook configurations found' }; } // Check if webhooks have proper service endpoints for (const webhook of [...arcValidatingWebhooks, ...arcMutatingWebhooks]) { for (const wh of webhook.webhooks || []) { if (wh.clientConfig?.service) { const serviceName = wh.clientConfig.service.name; const serviceNamespace = wh.clientConfig.service.namespace; try { await this.commandExecutor.kubectl(`get service ${serviceName} -n ${serviceNamespace}`, { timeout: 3000 }); } catch (serviceError) { return { healthy: false, reason: `Webhook service ${serviceName} not found in namespace ${serviceNamespace}` }; } } } } return { healthy: true, reason: 'Webhook configurations are properly set up' }; } catch (error) { return { healthy: false, reason: `Failed to validate webhooks: ${error}` }; } } /** * Validate ARC CRDs */ private async validateArcCRDs(): Promise<{healthy: boolean, reason: string}> { try { // Note: The current ARC Helm chart (0.23.7) still uses legacy CRDs // This matches what's actually installed by the chart const expectedCRDs = [ 'runners.actions.summerwind.dev', 'runnerdeployments.actions.summerwind.dev', 'horizontalrunnerautoscalers.actions.summerwind.dev' // Note: Modern GitHub CRDs (actions.github.com) are not yet in the stable chart ]; const missingCRDs: string[] = []; for (const crd of expectedCRDs) { try { await this.commandExecutor.kubectl(`get crd ${crd}`, { timeout: 3000 }); } catch (error) { missingCRDs.push(crd); } } if (missingCRDs.length > 0) { return { healthy: false, reason: `Missing CRDs: ${missingCRDs.join(', ')}` }; } return { healthy: true, reason: 'All required CRDs are installed' }; } catch (error) { return { healthy: false, reason: `Failed to validate CRDs: ${error}` }; } } /** * Validate Helm release state */ private async validateHelmRelease(namespace: string): Promise<{healthy: boolean, reason: string}> { try { const result = await this.commandExecutor.helm(`list -n ${namespace} -o json`, { timeout: 5000 }); const releases = JSON.parse(result.stdout); const arcRelease = releases.find((release: any) => release.name === 'arc'); if (!arcRelease) { return { healthy: false, reason: 'ARC Helm release not found' }; } if (arcRelease.status !== 'deployed') { return { healthy: false, reason: `Helm release status is ${arcRelease.status}, expected 'deployed'` }; } return { healthy: true, reason: 'Helm release is deployed successfully' }; } catch (error) { return { healthy: false, reason: `Failed to validate Helm release: ${error}` }; } } /** * Validate GitHub connectivity */ private async validateGitHubConnectivity(): Promise<{healthy: boolean, reason: string}> { try { if (!process.env.GITHUB_TOKEN) { return { healthy: false, reason: 'GITHUB_TOKEN environment variable not set' }; } const result = await this.commandExecutor.execute('curl', `-s -f -H "Authorization: token ${process.env.GITHUB_TOKEN}" https://api.github.com/user`, { timeout: 10000 }); const userData = JSON.parse(result.stdout); if (!userData.login) { return { healthy: false, reason: 'Invalid GitHub token or API response' }; } return { healthy: true, reason: `GitHub connectivity verified for user: ${userData.login}` }; } catch (error) { return { healthy: false, reason: `GitHub connectivity failed: ${error}` }; } } /** * Check for stuck resources */ private async validateNoStuckResources(namespace: string): Promise<{healthy: boolean, reason: string}> { try { const issues: string[] = []; // Check for pods in non-running states const podsResult = await this.commandExecutor.kubectl(`get pods -n ${namespace} -o json`, { timeout: 5000 }); const pods = JSON.parse(podsResult.stdout); for (const pod of pods.items || []) { if (pod.status?.phase === 'Pending' && pod.metadata?.creationTimestamp) { const created = new Date(pod.metadata.creationTimestamp); const age = Date.now() - created.getTime(); if (age > 300000) { // 5 minutes issues.push(`Pod ${pod.metadata.name} stuck in Pending state for ${Math.round(age/60000)} minutes`); } } } // Check for resources with finalizers in terminating state try { const runnersResult = await this.commandExecutor.kubectl(`get runners -n ${namespace} -o json`, { timeout: 5000 }); const runners = JSON.parse(runnersResult.stdout); for (const runner of runners.items || []) { if (runner.metadata?.deletionTimestamp && runner.metadata?.finalizers?.length > 0) { issues.push(`Runner ${runner.metadata.name} stuck with finalizers: ${runner.metadata.finalizers.join(', ')}`); } } } catch (runnerError) { // Runners might not exist yet, that's OK } if (issues.length > 0) { return { healthy: false, reason: issues.join('; ') }; } return { healthy: true, reason: 'No stuck resources detected' }; } catch (error) { return { healthy: false, reason: `Failed to check for stuck resources: ${error}` }; } } /** * Analyze installation failure */ private async analyzeInstallationFailure(error: any, options: InstallationOptions): Promise<TroubleshootingResult[]> { this.log('🔍 Analyzing installation failure...'); const troubleshootingResults: TroubleshootingResult[] = []; const errorMessage = error instanceof Error ? error.message : String(error); // Check error against known patterns for (const scenario of this.troubleshootingScenarios) { if (scenario.pattern.test(errorMessage)) { troubleshootingResults.push({ issue: scenario.name, severity: scenario.severity, description: scenario.description, solutions: scenario.solution, autoFixApplied: false }); } } return troubleshootingResults; } /** * Attempt installation recovery */ private async attemptInstallationRecovery(troubleshootingResults: TroubleshootingResult[], options: InstallationOptions): Promise<boolean> { this.log('🛠️ Attempting installation recovery...'); let recoverySuccessful = true; for (const result of troubleshootingResults) { const scenario = this.troubleshootingScenarios.find(s => s.name === result.issue); if (scenario && scenario.autoFix) { try { const fixSuccessful = await scenario.autoFix(); result.autoFixApplied = true; result.autoFixSuccessful = fixSuccessful; if (!fixSuccessful) { recoverySuccessful = false; } } catch (error) { this.log(`❌ Recovery failed for ${result.issue}: ${error}`, 'error'); recoverySuccessful = false; } } } return recoverySuccessful; } // Analysis helper methods /** * Analyze basic cluster information */ private async analyzeClusterBasics(): Promise<any> { try { const clusterInfo = await this.commandExecutor.kubectl('cluster-info'); const nodes = await this.commandExecutor.kubectl('get nodes -o json'); // Parse plain text cluster info for basic details const clusterText = clusterInfo.stdout; const nodesData = JSON.parse(nodes.stdout); return { version: 'Available', // cluster-info provides text output, version available via other means nodeCount: nodesData.items?.length || 0, readyNodes: nodesData.items?.filter((node: any) => node.status?.conditions?.some((c: any) => c.type === 'Ready' && c.status === 'True') ).length || 0, totalCpu: 'Unknown', // Would need resource calculation totalMemory: 'Unknown', // Would need resource calculation storageClasses: [] // Would need storage class detection }; } catch (error) { return { version: 'Unknown', nodeCount: 0, readyNodes: 0, totalCpu: 'Unknown', totalMemory: 'Unknown', storageClasses: [] }; } } /** * Check for existing ARC resources */ private async checkExistingArcResources(): Promise<any> { const existingResources: any = {}; try { const namespaces = await this.commandExecutor.kubectl('get namespaces -o json'); const nsData = JSON.parse(namespaces.stdout); for (const ns of nsData.items || []) { const nsName = ns.metadata?.name; if (nsName?.includes('arc') || nsName?.includes('runner') || nsName?.includes('actions')) { existingResources[nsName] = await this.getResourceCountsInNamespace(nsName); } } } catch (error) { this.log(`⚠️ Could not check existing ARC resources: ${error}`, 'warning'); } return existingResources; } /** * Get resource counts in a specific namespace */ private async getResourceCountsInNamespace(namespace: string): Promise<any> { const counts: any = {}; const resourceTypes = ['pods', 'services', 'deployments', 'runners', 'runnerdeployments']; for (const resourceType of resourceTypes) { try { const result = await this.commandExecutor.kubectl(`get ${resourceType} -n ${namespace} --no-headers`); counts[resourceType] = result.stdout.split('\n').filter(line => line.trim()).length; } catch (error) { counts[resourceType] = 0; } } return counts; } /** * Check installation prerequisites */ private async checkInstallationPrerequisites(): Promise<any> { const prerequisites: any = {}; // Check kubectl connectivity try { await this.commandExecutor.kubectl('cluster-info'); prerequisites.kubectl = { passed: true, message: 'kubectl connectivity verified' }; } catch (error) { prerequisites.kubectl = { passed: false, message: 'kubectl connectivity failed' }; } // Check Helm availability try { await this.commandExecutor.helm('version'); prerequisites.helm = { passed: true, message: 'Helm available' }; } catch (error) { prerequisites.helm = { passed: false, message: 'Helm not available' }; } // Check GitHub token if (process.env.GITHUB_TOKEN) { prerequisites.githubToken = { passed: true, message: 'GitHub token configured' }; } else { prerequisites.githubToken = { passed: false, message: 'GitHub token not configured' }; } return prerequisites; } /** * Generate installation recommendations */ private async generateInstallationRecommendations(analysis: any): Promise<string[]> { const recommendations: string[] = []; if (!analysis.prerequisites?.githubToken?.passed) { recommendations.push('Configure GitHub token before proceeding'); } if (!analysis.prerequisites?.helm?.passed) { recommendations.push('Install Helm before proceeding with ARC installation'); } if (Object.keys(analysis.existingArc).length > 0) { recommendations.push('Clean up existing ARC installation before installing new version'); } if (analysis.cluster?.nodeCount < 2) { recommendations.push('Consider adding more nodes for high availability'); } return recommendations; } /** * Assess installation risks */ private async assessInstallationRisks(analysis: any): Promise<any> { let installationRisk = 'Low'; let dataLossRisk = 'None'; let downtimeRisk = 'Minimal'; if (Object.keys(analysis.existingArc).length > 0) { installationRisk = 'Medium'; dataLossRisk = 'Low'; } if (!analysis.prerequisites?.githubToken?.passed) { installationRisk = 'High'; } return { installationRisk, dataLossRisk, downtimeRisk }; } /** * Detect all ARC resources for analysis */ private async detectAllArcResourcesForAnalysis(): Promise<any> { return await this.checkExistingArcResources(); // Reuse existing method } /** * Check ARC dependencies */ private async checkArcDependencies(): Promise<any> { const dependencies: any = {}; try { // Check for running workflows const pods = await this.commandExecutor.kubectl('get pods -A -o json'); const podsData = JSON.parse(pods.stdout); const runningWorkflows = podsData.items?.filter((pod: any) => pod.metadata?.labels?.['actions.github.com/runner'] || pod.metadata?.name?.includes('runner') ).length || 0; dependencies.runningWorkflows = runningWorkflows; dependencies.hasActiveRunners = runningWorkflows > 0; } catch (error) { dependencies.runningWorkflows = 0; dependencies.hasActiveRunners = false; } return dependencies; } /** * Generate cleanup recommendations */ private async generateCleanupRecommendations(analysis: any): Promise<string[]> { const recommendations: string[] = []; if (analysis.dependencies?.hasActiveRunners) { recommendations.push('Wait for running workflows to complete before cleanup'); } if (Object.keys(analysis.existingArc).length > 5) { recommendations.push('Large ARC installation detected - consider phased cleanup approach'); } recommendations.push('Backup important configurations before cleanup'); recommendations.push('Verify no critical workloads depend on ARC'); return recommendations; } /** * Assess cleanup risks */ private async assessCleanupRisks(analysis: any): Promise<any> { let installationRisk = 'Low'; let dataLossRisk = 'Low'; let downtimeRisk = 'Minimal'; if (analysis.dependencies?.hasActiveRunners) { installationRisk = 'Medium'; downtimeRisk = 'Medium'; } const totalResources = Object.values(analysis.existingArc).reduce((sum: number, ns: any) => { return sum + Object.values(ns).reduce((nsSum: number, count: any) => nsSum + (count || 0), 0); }, 0); if (totalResources > 10) { installationRisk = 'High'; dataLossRisk = 'Medium'; } return { installationRisk, dataLossRisk, downtimeRisk }; } /** * Categorize target resources for cleanup */ private async categorizeTargetResources(clusterAnalysis: any): Promise<any> { const categories: any = { 'Critical Resources': [], 'Standard Resources': [], 'Configuration': [], 'Secrets': [] }; if (clusterAnalysis.existingArc) { Object.entries(clusterAnalysis.existingArc).forEach(([namespace, resources]: [string, any]) => { Object.entries(resources).forEach(([resourceType, count]: [string, any]) => { if (count > 0) { if (resourceType.includes('runner')) { categories['Critical Resources'].push(`${count} ${resourceType} in ${namespace}`); } else if (resourceType.includes('secret')) { categories['Secrets'].push(`${count} ${resourceType} in ${namespace}`); } else if (resourceType.includes('config')) { categories['Configuration'].push(`${count} ${resourceType} in ${namespace}`); } else { categories['Standard Resources'].push(`${count} ${resourceType} in ${namespace}`); } } }); }); } return categories; } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tsviz/arc-config-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server