Skip to main content
Glama
health.py8.41 kB
""" Health Domain Models This module contains all models related to Ceph cluster health, extracted from the original ceph_models.py and enhanced. """ from datetime import datetime from enum import Enum from pydantic import BaseModel, Field from ceph_mcp.models.base import BaseComponentInfo class HealthStatus(str, Enum): """ Standardized health status values that Ceph uses. Using an enum ensures we only work with valid status values and makes it easier to handle status-specific logic throughout the application. """ OK = "HEALTH_OK" WARN = "HEALTH_WARN" ERR = "HEALTH_ERR" class HealthCheckSeverity(str, Enum): """Severity levels for individual health checks.""" INFO = "HEALTH_INFO" WARN = "HEALTH_WARN" ERR = "HEALTH_ERR" class HealthCheck(BaseModel): """ Individual health check information. Represents a single health check with its details and recommendations. """ type: str = Field(..., description="Health check type") severity: HealthCheckSeverity = Field(description="Severity level of this check") summary: str = Field(description="Human-readable summary of the issue") details: str = Field(default="", description="Detailed information about the issue") count: int = Field(default=0, description="Number of affected components") def is_critical(self) -> bool: """ Check if this health check represents a critical issue. """ return self.severity == HealthCheckSeverity.ERR def is_warning(self) -> bool: """ Check if this health check represents a warning. """ return self.severity == HealthCheckSeverity.WARN def get_priority_score(self) -> int: """ Get a numeric priority score for sorting (higher = more urgent). """ priority_map = { HealthCheckSeverity.ERR: 100, HealthCheckSeverity.WARN: 50, HealthCheckSeverity.INFO: 10, } return priority_map.get(self.severity, 0) class ClusterHealth(BaseComponentInfo): """ Represents the overall health status of a Ceph cluster. """ name: str = Field( default="cluster_health", description="Identifier for the Component" ) cluster_fsid: str = Field(description="FSID of Ceph cluster") status: HealthStatus = Field(description="Overall cluster health status") checks: list[HealthCheck] = Field( default_factory=list, description="Individual health checks" ) overall_status_description: str | None = Field( None, description="Detailed explanation of current status" ) collected_at: datetime = Field( default_factory=datetime.now, description="Timestamp when metrics were collected", ) def is_healthy(self) -> bool: """ Convenience method to check if cluster is in good health. """ return self.status == HealthStatus.OK def has_warnings(self) -> bool: """ Check if cluster has warnings that need attention. """ return self.status == HealthStatus.WARN or any( check.is_warning() for check in self.checks ) def has_errors(self) -> bool: """ Check if cluster has errors requiring immediate attention. """ return self.status == HealthStatus.ERR or any( check.is_critical() for check in self.checks ) def get_critical_checks(self) -> list[HealthCheck]: """Get all critical health checks.""" return [check for check in self.checks if check.is_critical()] def get_warning_checks(self) -> list[HealthCheck]: """Get all warning health checks.""" return [check for check in self.checks if check.is_warning()] def get_checks_by_priority(self) -> list[HealthCheck]: """Get health checks sorted by priority (most urgent first).""" return sorted(self.checks, key=lambda x: x.get_priority_score(), reverse=True) def get_health_score(self) -> int: """ Get a numeric health score (0-100, where 100 is perfect health). This provides a single number that can be used for monitoring and trend analysis. """ if self.status == HealthStatus.OK: return 100 # Deduct points based on check severity score = 100 for check in self.checks: if check.is_critical(): score -= 30 # Critical issues heavily impact score elif check.is_warning(): score -= 10 # Warnings have moderate impact return max(0, score) def get_recommendations(self) -> list[str]: """Get actionable recommendations based on current health status.""" recommendations = [] if self.is_healthy(): recommendations.append("Cluster is healthy - continue regular monitoring") return recommendations critical_checks = self.get_critical_checks() warning_checks = self.get_warning_checks() if critical_checks: recommendations.append( f"🔴 Address {len(critical_checks)} critical issue(s) immediately" ) for check in critical_checks[:3]: # Show top 3 critical issues recommendations.append(f" - {check.summary}") if warning_checks: recommendations.append( f"🟡 Investigate {len(warning_checks)} warning(s) when possible" ) for check in warning_checks[:2]: # Show top 2 warnings recommendations.append(f" - {check.summary}") recommendations.append("📊 Monitor cluster status regularly for changes") return recommendations def get_executive_summary(self) -> str: """Get a one-line executive summary suitable for dashboards.""" status_emoji = { HealthStatus.OK: "🟢", HealthStatus.WARN: "🟡", HealthStatus.ERR: "🔴", } emoji = status_emoji.get(self.status, "⚪") score = self.get_health_score() if self.is_healthy(): return f"{emoji} Cluster is healthy (Score: {score}/100)" issues = len(self.checks) return ( f"{emoji} Cluster has {issues} issue(s) requiring attention " f"(Score: {score}/100)" ) class ClusterCapacity(BaseModel): """ Cluster capacity information. This model represents the overall storage capacity and usage statistics for the entire Ceph cluster. """ total_avail_bytes: int = Field( ..., description="Total available bytes in the cluster" ) total_bytes: int = Field(..., description="Total bytes in the cluster") total_used_raw_bytes: int = Field( ..., description="Total raw bytes used in the cluster" ) total_objects: int = Field( ..., description="Total number of objects in the cluster" ) total_pool_bytes_used: int = Field( ..., description="Total bytes used across all pools" ) average_object_size: int = Field(..., description="Average object size in bytes") def get_total_capacity_gb(self) -> float: """Get total capacity in GB.""" return round(self.total_bytes / (1024**3), 2) def get_used_capacity_gb(self) -> float: """Get used capacity in GB.""" return round(self.total_used_raw_bytes / (1024**3), 2) def get_available_capacity_gb(self) -> float: """Get available capacity in GB.""" return round(self.total_avail_bytes / (1024**3), 2) def get_pool_bytes_used_gb(self) -> float: """Get total pool bytes used in GB.""" return round(self.total_pool_bytes_used / (1024**3), 2) def get_usage_percentage(self) -> float: """Get cluster usage percentage.""" if self.total_bytes == 0: return 0.0 return round((self.total_used_raw_bytes / self.total_bytes) * 100, 1) def get_average_object_size_kb(self) -> float: """Get average object size in KB.""" return round(self.average_object_size / 1024, 2) def get_capacity_summary(self) -> str: """Get a human-readable capacity summary.""" return f"{self.get_used_capacity_gb()}GB used of {self.get_total_capacity_gb()}GB total ({self.get_usage_percentage()}% used)"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/rajmohanram/ceph-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server