monitor.sh•6.17 kB
#!/bin/bash
# monitor.sh - Monitor health and performance of MCP servers
set -e
# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Default values
COMPOSE_FILE="docker/docker-compose.prod.yml"
INTERVAL=60
LOG_DIR="logs/monitoring"
ALERT_THRESHOLD=80
# Helper functions
log_info() {
echo -e "${GREEN}[INFO] $1${NC}"
}
log_warn() {
echo -e "${YELLOW}[WARN] $1${NC}"
}
log_error() {
echo -e "${RED}[ERROR] $1${NC}"
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--compose-file)
COMPOSE_FILE="$2"
shift 2
;;
--interval)
INTERVAL="$2"
shift 2
;;
--alert-threshold)
ALERT_THRESHOLD="$2"
shift 2
;;
*)
log_error "Unknown argument: $1"
exit 1
;;
esac
done
# Initialize monitoring
initialize() {
log_info "Initializing monitoring..."
# Create log directory
mkdir -p "$LOG_DIR"
# Initialize monitoring files
echo "Timestamp,CPU(%),Memory(%),Disk(%),Redis Memory(%),Response Time(ms)" > "$LOG_DIR/metrics.csv"
}
# Check system resources
check_resources() {
# CPU Usage
CPU_USAGE=$(docker stats --no-stream --format "{{.CPUPerc}}" | sed 's/%//' | awk '{sum += $1} END {print sum}')
# Memory Usage
MEMORY_USAGE=$(docker stats --no-stream --format "{{.MemPerc}}" | sed 's/%//' | awk '{sum += $1} END {print sum}')
# Disk Usage
DISK_USAGE=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//')
# Redis Memory Usage
REDIS_MEMORY=$(docker-compose -f "$COMPOSE_FILE" exec -T redis redis-cli info memory | grep "used_memory_rss_human" | cut -d: -f2 | sed 's/[^0-9.]//g')
REDIS_MAX_MEMORY=$(docker-compose -f "$COMPOSE_FILE" exec -T redis redis-cli config get maxmemory | awk 'NR==2')
REDIS_MEMORY_PERCENT=$(awk "BEGIN {print ($REDIS_MEMORY/$REDIS_MAX_MEMORY)*100}")
# API Response Time
RESPONSE_TIME=$(curl -o /dev/null -s -w "%{time_total}\n" http://localhost:3000/health)
# Log metrics
echo "$(date +%Y-%m-%d_%H:%M:%S),$CPU_USAGE,$MEMORY_USAGE,$DISK_USAGE,$REDIS_MEMORY_PERCENT,$RESPONSE_TIME" >> "$LOG_DIR/metrics.csv"
}
# Check service health
check_health() {
# Check container health
docker-compose -f "$COMPOSE_FILE" ps | grep -q "unhealthy"
if [ $? -eq 0 ]; then
log_error "Unhealthy containers detected:"
docker-compose -f "$COMPOSE_FILE" ps | grep "unhealthy"
send_alert "Unhealthy containers detected"
fi
# Check Redis connection
docker-compose -f "$COMPOSE_FILE" exec -T redis redis-cli ping | grep -q "PONG"
if [ $? -ne 0 ]; then
log_error "Redis connection failed"
send_alert "Redis connection failed"
fi
# Check API health
curl -s http://localhost:3000/health | grep -q "ok"
if [ $? -ne 0 ]; then
log_error "API health check failed"
send_alert "API health check failed"
fi
}
# Check logs for errors
check_logs() {
# Check container logs for errors
for service in $(docker-compose -f "$COMPOSE_FILE" ps --services); do
ERROR_COUNT=$(docker-compose -f "$COMPOSE_FILE" logs --tail=100 "$service" | grep -i "error" | wc -l)
if [ "$ERROR_COUNT" -gt 0 ]; then
log_warn "Found $ERROR_COUNT errors in $service logs"
docker-compose -f "$COMPOSE_FILE" logs --tail=100 "$service" | grep -i "error" >> "$LOG_DIR/errors.log"
fi
done
}
# Generate monitoring report
generate_report() {
REPORT_FILE="$LOG_DIR/report_$(date +%Y%m%d_%H%M%S).txt"
{
echo "MCP Servers Monitoring Report"
echo "=============================="
echo "Generated: $(date)"
echo
echo "System Resources:"
echo "----------------"
echo "CPU Usage: $CPU_USAGE%"
echo "Memory Usage: $MEMORY_USAGE%"
echo "Disk Usage: $DISK_USAGE%"
echo "Redis Memory Usage: $REDIS_MEMORY_PERCENT%"
echo "API Response Time: ${RESPONSE_TIME}ms"
echo
echo "Container Status:"
echo "----------------"
docker-compose -f "$COMPOSE_FILE" ps
echo
echo "Recent Errors:"
echo "-------------"
tail -n 10 "$LOG_DIR/errors.log" 2>/dev/null || echo "No recent errors"
} > "$REPORT_FILE"
log_info "Report generated: $REPORT_FILE"
}
# Send alert (implement according to your notification system)
send_alert() {
local message="$1"
log_error "ALERT: $message"
# Example: Send to monitoring service
# curl -X POST "https://monitoring-service/alert" -d "message=$message"
}
# Check thresholds and send alerts
check_thresholds() {
if [ $(echo "$CPU_USAGE > $ALERT_THRESHOLD" | bc -l) -eq 1 ]; then
send_alert "High CPU usage: $CPU_USAGE%"
fi
if [ $(echo "$MEMORY_USAGE > $ALERT_THRESHOLD" | bc -l) -eq 1 ]; then
send_alert "High memory usage: $MEMORY_USAGE%"
fi
if [ $(echo "$DISK_USAGE > $ALERT_THRESHOLD" | bc -l) -eq 1 ]; then
send_alert "High disk usage: $DISK_USAGE%"
fi
if [ $(echo "$REDIS_MEMORY_PERCENT > $ALERT_THRESHOLD" | bc -l) -eq 1 ]; then
send_alert "High Redis memory usage: $REDIS_MEMORY_PERCENT%"
fi
if [ $(echo "$RESPONSE_TIME > 1" | bc -l) -eq 1 ]; then
send_alert "High API response time: ${RESPONSE_TIME}s"
fi
}
# Cleanup old logs
cleanup_logs() {
# Keep last 7 days of logs
find "$LOG_DIR" -type f -mtime +7 -delete
}
# Main monitoring loop
main() {
initialize
log_info "Starting monitoring (interval: ${INTERVAL}s)"
while true; do
check_resources
check_health
check_logs
check_thresholds
# Generate report every hour
if [ "$(date +%M)" = "00" ]; then
generate_report
cleanup_logs
fi
sleep "$INTERVAL"
done
}
# Run main function with error handling
if ! main; then
log_error "Monitoring failed!"
exit 1
fi