Skip to main content
Glama
health.py5.6 kB
from fastapi import APIRouter, Depends, HTTPException from typing import Dict, Any import asyncio import time import structlog from app.core.models.mcp_protocol import HealthStatus, MonitoringMetrics from app.config import settings logger = structlog.get_logger() router = APIRouter() # Store start time for uptime calculation start_time = time.time() @router.get("/", response_model=HealthStatus) async def health_check(): """Comprehensive health check endpoint""" try: # Check all critical services services_status = await check_all_services() # Determine overall health if all(services_status.values()): status = "healthy" elif any(services_status.values()): status = "degraded" else: status = "unhealthy" uptime = int(time.time() - start_time) health_status = HealthStatus( status=status, services=services_status, uptime_seconds=uptime, version=settings.app_version ) logger.info( "Health check completed", status=status, services=services_status, uptime=uptime ) return health_status except Exception as e: logger.error("Health check failed", error=str(e)) raise HTTPException(status_code=500, detail="Health check failed") @router.get("/ready") async def readiness_check(): """Readiness probe for Kubernetes""" try: # Check if the application is ready to serve traffic services_ready = await check_readiness() if all(services_ready.values()): return {"status": "ready"} else: raise HTTPException(status_code=503, detail="Service not ready") except HTTPException: raise except Exception as e: logger.error("Readiness check failed", error=str(e)) raise HTTPException(status_code=503, detail="Readiness check failed") @router.get("/live") async def liveness_check(): """Liveness probe for Kubernetes""" try: # Basic liveness check - just return OK if the process is running return {"status": "alive", "timestamp": time.time()} except Exception as e: logger.error("Liveness check failed", error=str(e)) raise HTTPException(status_code=503, detail="Liveness check failed") @router.get("/metrics", response_model=MonitoringMetrics) async def get_metrics(): """Get application metrics""" try: # This would typically collect real metrics from your monitoring system metrics = MonitoringMetrics( active_agents=3, processing_tasks=2, completed_tasks=156, failed_tasks=3, average_response_time=0.234, requests_per_second=12.5, error_rate=0.02 ) return metrics except Exception as e: logger.error("Failed to collect metrics", error=str(e)) raise HTTPException(status_code=500, detail="Failed to collect metrics") @router.get("/version") async def get_version(): """Get application version information""" return { "name": settings.app_name, "version": settings.app_version, "build_time": "2024-01-01T00:00:00Z", # This would be set during build "git_commit": "abc123", # This would be set during build "python_version": "3.11+", "environment": "production" if not settings.debug else "development" } async def check_all_services() -> Dict[str, bool]: """Check all external services""" services = {} # Check Redis try: # This would be an actual Redis ping await asyncio.sleep(0.01) # Simulate Redis check services["redis"] = True except Exception as e: logger.warning("Redis health check failed", error=str(e)) services["redis"] = False # Check Database try: # This would be an actual database ping await asyncio.sleep(0.02) # Simulate DB check services["database"] = True except Exception as e: logger.warning("Database health check failed", error=str(e)) services["database"] = False # Check External APIs try: # This would check external API connectivity await asyncio.sleep(0.05) # Simulate API check services["external_apis"] = True except Exception as e: logger.warning("External APIs health check failed", error=str(e)) services["external_apis"] = False # Check LLM Providers try: # This would check LLM provider connectivity await asyncio.sleep(0.03) # Simulate LLM check services["llm_providers"] = True except Exception as e: logger.warning("LLM providers health check failed", error=str(e)) services["llm_providers"] = False return services async def check_readiness() -> Dict[str, bool]: """Check if application is ready to serve traffic""" readiness = {} # Check if critical services are ready try: # Simulate readiness checks await asyncio.sleep(0.01) readiness["database"] = True readiness["cache"] = True readiness["tools_loaded"] = True except Exception as e: logger.warning("Readiness check failed", error=str(e)) readiness["database"] = False readiness["cache"] = False readiness["tools_loaded"] = False return readiness

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Vovchansky6/mcp_OVS'

If you have feedback or need assistance with the MCP directory API, please join our Discord server