cpu_memory_health
Monitors CPU, memory, and swap usage, kernel OOM-kill count over 24 hours, and load averages. Detects OOM-imminent or triggered conditions to assess health.
Instructions
CPU% + memory% + swap% snapshot, kernel OOM-kill count over 24h, load averages. Each component gets a HealthLevel; CRITICAL when OOM-imminent or already triggered.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/openclaw_health_mcp/server.py:59-67 (registration)Tool 'cpu_memory_health' is registered in the list_tools() function with its name, description, and inputSchema.
Tool( name="cpu_memory_health", description=( "CPU% + memory% + swap% snapshot, kernel OOM-kill count over 24h, " "load averages. Each component gets a HealthLevel; CRITICAL when " "OOM-imminent or already triggered." ), inputSchema={"type": "object", "properties": {}, "required": []}, ), - The call_tool() handler for 'cpu_memory_health' delegates to backend.get_resource_metrics() and serializes the result.
if name == "cpu_memory_health": return _serialize(await backend.get_resource_metrics()) - ResourceMetrics Pydantic model — the schema/return type for the cpu_memory_health tool (CPU%, memory%, swap%, OOM count, load averages, health levels).
class ResourceMetrics(BaseModel): """CPU + memory + swap snapshot.""" model_config = ConfigDict(frozen=True) cpu_percent: float | None = None memory_percent: float | None = None memory_used_mb: float | None = None memory_total_mb: float | None = None swap_percent: float | None = None swap_used_mb: float | None = None oom_events_24h: int = 0 """Kernel OOM-killer invocations in the last 24h (read from journalctl/dmesg).""" load_average_1m: float | None = None load_average_5m: float | None = None load_average_15m: float | None = None cpu_health: HealthLevel = HealthLevel.UNKNOWN memory_health: HealthLevel = HealthLevel.UNKNOWN overall_health: HealthLevel = HealthLevel.UNKNOWN notes: list[str] = Field(default_factory=list) - LinuxProcBackend.get_resource_metrics() — real implementation using psutil for CPU/memory/swap/load and _count_oom_events_24h() for OOM detection, with health classification thresholds.
async def get_resource_metrics(self) -> ResourceMetrics: if psutil is None: # pragma: no cover # type: ignore[unreachable] return ResourceMetrics( # type: ignore[unreachable] overall_health=HealthLevel.UNKNOWN, notes=["psutil not installed."], ) try: cpu = psutil.cpu_percent(interval=0.1) mem = psutil.virtual_memory() swap = psutil.swap_memory() except Exception: # noqa: BLE001 - psutil errors are platform-specific; degrade gracefully return ResourceMetrics( overall_health=HealthLevel.UNKNOWN, notes=["psutil call failed; backend cannot read metrics."], ) load_1m = load_5m = load_15m = None if hasattr(psutil, "getloadavg"): with contextlib.suppress(OSError, AttributeError): load_1m, load_5m, load_15m = psutil.getloadavg() oom = _count_oom_events_24h() cpu_h = _classify_pct(cpu, degraded=_CPU_DEGRADED_PCT, critical=_CPU_CRITICAL_PCT) mem_h = _classify_pct(mem.percent, degraded=_MEM_DEGRADED_PCT, critical=_MEM_CRITICAL_PCT) swap_h = _classify_pct(swap.percent, degraded=_SWAP_DEGRADED_PCT, critical=_SWAP_CRITICAL_PCT) oom_h = HealthLevel.CRITICAL if oom > 0 else HealthLevel.HEALTHY overall = _max_level(cpu_h, mem_h, swap_h, oom_h) notes: list[str] = [] if mem.percent >= _MEM_DEGRADED_PCT: notes.append(f"Memory at {mem.percent:.1f}% — watch for OOM under spike load.") if swap.percent >= _SWAP_DEGRADED_PCT: notes.append(f"Swap at {swap.percent:.1f}% — system may be paging heavily.") if oom > 0: notes.append( f"{oom} OOM kill event(s) detected in last 24h via journalctl/dmesg." ) return ResourceMetrics( cpu_percent=cpu, memory_percent=mem.percent, memory_used_mb=mem.used / (1024 * 1024), memory_total_mb=mem.total / (1024 * 1024), swap_percent=swap.percent, swap_used_mb=swap.used / (1024 * 1024), oom_events_24h=oom, load_average_1m=load_1m, load_average_5m=load_5m, load_average_15m=load_15m, cpu_health=cpu_h, memory_health=mem_h, overall_health=overall, notes=notes, ) - MockBackend.get_resource_metrics() — mock implementation providing sample data for development/ testing.
async def get_resource_metrics(self) -> ResourceMetrics: return ResourceMetrics( cpu_percent=42.7, memory_percent=78.3, memory_used_mb=1602.0, memory_total_mb=2048.0, swap_percent=12.5, swap_used_mb=128.0, oom_events_24h=0, load_average_1m=1.42, load_average_5m=1.28, load_average_15m=1.12, cpu_health=HealthLevel.HEALTHY, memory_health=HealthLevel.DEGRADED, overall_health=HealthLevel.DEGRADED, notes=[ "Memory at 78% — elevated for a 2GB VPS. Watch for OOM if a cron-heavy hour spikes load.", ], )