check_docker_health

Monitor Docker service health and status on remote servers by checking container states, resource usage, and service availability through SSH connections.

Instructions

检查Docker服务的健康状态和基本信息

Input Schema

TableJSON Schema

Name	Required	Description	Default
`hostname`	Yes
`username`	Yes
`password`	No
`port`	No
`timeout`	No

Implementation Reference

server_monitor/tools/docker_tools.py:363-459 (handler)

Primary handler implementation for check_docker_health tool. Performs comprehensive Docker health checks via SSH: installation check, version, system info, disk usage, service status, running containers count, and tests container runtime with hello-world.

@handle_exceptions
def check_docker_health(
    hostname: str,
    username: str,
    password: str = "",
    port: int = 22,
    timeout: int = 30
) -> dict:
    """检查Docker服务的健康状态和基本信息"""
    result = InspectionResult()
    
    try:
        with SSHManager(hostname, username, password, port, timeout) as ssh:
            # 检查Docker是否安装
            stdin, stdout, stderr = ssh.exec_command("command -v docker")
            if not stdout.read().strip():
                result.status = "error"
                result.error = "Docker未安装在目标服务器上"
                return result.dict()
            
            # 执行多个命令收集Docker信息
            cmds = {
                "version": "docker version --format '{{.Server.Version}}'",
                "info": "docker info --format '{{.ServerVersion}}|{{.ContainersRunning}}/{{.Containers}}|{{.Images}}|{{.Driver}}|{{.MemTotal}}'",
                "system_df": "docker system df",
                "service_status": "systemctl is-active docker",
                "docker_ps": "docker ps --quiet | wc -l"
            }
            
            outputs = {}
            for key, cmd in cmds.items():
                stdin, stdout, stderr = ssh.exec_command(cmd)
                outputs[key] = stdout.read().decode('utf-8').strip()
                error = stderr.read().decode('utf-8').strip()
                if error and not outputs[key]:
                    outputs[key] = f"Error: {error}"
            
            # 处理Docker信息输出
            docker_info = {}
            health_status = "healthy"
            
            # 处理版本信息
            docker_info["version"] = outputs["version"]
            
            # 处理基本信息
            if '|' in outputs["info"]:
                info_parts = outputs["info"].split('|')
                if len(info_parts) >= 5:
                    docker_info["server_version"] = info_parts[0]
                    container_parts = info_parts[1].split('/')
                    docker_info["running_containers"] = int(container_parts[0]) if container_parts[0].isdigit() else 0
                    docker_info["total_containers"] = int(container_parts[1]) if container_parts[1].isdigit() else 0
                    docker_info["images"] = int(info_parts[2]) if info_parts[2].isdigit() else 0
                    docker_info["storage_driver"] = info_parts[3]
                    docker_info["memory_total"] = info_parts[4]
            
            # 处理磁盘使用情况
            docker_info["disk_usage"] = outputs["system_df"]
            
            # 处理服务状态
            docker_info["service_active"] = outputs["service_status"] == "active"
            if not docker_info["service_active"]:
                health_status = "unhealthy"
            
            # 检查是否可以运行容器
            try:
                stdin, stdout, stderr = ssh.exec_command("docker run --rm hello-world")
                hello_output = stdout.read().decode('utf-8')
                if "Hello from Docker!" in hello_output:
                    docker_info["can_run_containers"] = True
                else:
                    docker_info["can_run_containers"] = False
                    health_status = "degraded"
            except Exception:
                docker_info["can_run_containers"] = False
                health_status = "degraded"
            
            # 设置结果
            result.status = "success"
            result.data = {
                "docker_info": docker_info,
                "health_status": health_status
            }
            result.raw_outputs = outputs
            
            if health_status == "healthy":
                result.summary = f"Docker服务健康状态良好，版本 {docker_info.get('version', 'unknown')}，{docker_info.get('running_containers', 0)} 个运行中的容器"
            elif health_status == "degraded":
                result.summary = f"Docker服务状态降级，可能存在功能限制"
            else:
                result.summary = f"Docker服务不健康，可能无法正常工作"
            
    except Exception as e:
        result.status = "error"
        result.error = f"检查Docker健康状态失败: {str(e)}"
    
    return result.dict()

server_monitor_sse/tools/docker_tools.py:283-338 (handler)

Secondary handler implementation for check_docker_health in SSE variant. Checks Docker installation, service status via systemctl, docker info parsing, and server version.

@handle_exceptions
def check_docker_health(
    hostname: str,
    username: str,
    password: str = "",
    port: int = 22,
    timeout: int = 30
) -> dict:
    """检查Docker服务健康状态"""
    result = InspectionResult()

    try:
        with SSHManager(hostname, username, password, port, timeout) as ssh:
            # 检查Docker是否安装
            stdin, stdout, stderr = ssh.exec_command("command -v docker", timeout=timeout)
            if not stdout.read().strip():
                result.status = "error"
                result.error = "Docker未安装在目标服务器上"
                return result.dict()

            # 检查Docker服务状态
            stdin, stdout, stderr = ssh.exec_command("systemctl is-active docker", timeout=timeout)
            service_status = stdout.read().decode('utf-8').strip()

            # 获取Docker信息
            stdin, stdout, stderr = ssh.exec_command("docker info", timeout=timeout)
            info_output = stdout.read().decode('utf-8')
            error_output = stderr.read().decode('utf-8')

            # 解析Docker信息
            docker_info = {}
            for line in info_output.split('\n'):
                if ':' in line:
                    key, value = line.split(':', 1)
                    docker_info[key.strip()] = value.strip()

            # 检查Docker版本
            stdin, stdout, stderr = ssh.exec_command("docker version --format '{{.Server.Version}}'", timeout=timeout)
            version_output = stdout.read().decode('utf-8').strip()

            # 设置结果
            result.status = "success"
            result.data = {
                "service_status": service_status,
                "version": version_output,
                "info": docker_info
            }
            result.raw_outputs = {"docker_info": info_output}

            result.summary = f"Docker服务状态: {service_status}, 版本: {version_output}"

    except Exception as e:
        result.status = "error"
        result.error = f"检查Docker健康状态失败: {str(e)}"

    return result.dict()

server_monitor_sse/tools/utils.py:141-147 (schema)

JSON schema definition for the check_docker_health tool, listing parameters like hostname, username, password, port, timeout.

{"name": "check_docker_health", "description": "检查Docker服务健康状态", "parameters": [
    {"name": "hostname", "type": "str", "default": None},
    {"name": "username", "type": "str", "default": None},
    {"name": "password", "type": "str", "default": ""},
    {"name": "port", "type": "int", "default": 22},
    {"name": "timeout", "type": "int", "default": 30}
]}

server_monitor/main.py:41-72 (registration)

Tool registration in server_monitor main.py, where check_docker_health is included in the tools_dict and registered via FastMCP.tool() decorator in a loop.

# 注册所有工具函数
tools_dict = {
    'get_memory_info': get_memory_info,
    'remote_server_inspection': remote_server_inspection,
    'get_system_load': get_system_load,
    'monitor_processes': monitor_processes,
    'check_service_status': check_service_status,
    'get_os_details': get_os_details,
    'check_ssh_risk_logins': check_ssh_risk_logins,
    'check_firewall_config': check_firewall_config,
    'security_vulnerability_scan': security_vulnerability_scan,
    'backup_critical_files': backup_critical_files,
    'inspect_network': inspect_network,
    'analyze_logs': analyze_logs,
    'list_docker_containers': list_docker_containers,
    'list_docker_images': list_docker_images,
    'list_docker_volumes': list_docker_volumes,
    'get_container_logs': get_container_logs,
    'monitor_container_stats': monitor_container_stats,
    'check_docker_health': check_docker_health
}

# 使用装饰器动态注册所有工具
for name, func in tools_dict.items():
    mcp.tool()(func)

# 特殊处理list_available_tools，因为它需要mcp实例
@mcp.tool()
def _list_available_tools():
    return list_available_tools(mcp)

return mcp

server_monitor_sse/server.py:294-306 (registration)

Explicit dispatch registration in SSE server.py tool_handler, handling call to check_docker_health with argument validation and function invocation.

elif name == "check_docker_health":
    required_args = ["hostname", "username"]
    for arg in required_args:
        if arg not in arguments:
            raise ValueError(f"Missing required argument '{arg}'")

    result = check_docker_health(
        hostname=arguments["hostname"],
        username=arguments["username"],
        password=arguments.get("password", ""),
        port=arguments.get("port", 22),
        timeout=arguments.get("timeout", 30)
    )

OPS MCP Server