#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
监控面板
提供系统状态、性能指标和反馈信息的可视化展示
"""
import os
import time
import json
from typing import Dict, Any
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display, HTML
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
# 导入监控模块
from monitoring import get_performance_monitor, get_feedback_collector, get_alert_manager
class MonitoringDashboard:
"""监控面板类"""
def __init__(self, config: Dict[str, Any] = None):
self.config = config or {}
self.performance_monitor = get_performance_monitor(self.config.get('performance', {}))
self.feedback_collector = get_feedback_collector(self.config.get('feedback', {}))
self.alert_manager = get_alert_manager(self.config.get('alerts', {}))
def generate_system_overview(self) -> str:
"""生成系统概览"""
stats = self.performance_monitor.get_system_stats()
feedback_summary = self.feedback_collector.get_feedback_summary()
active_alerts = self.alert_manager.get_active_alerts()
overview = f"""
# 系统监控概览
## 📊 系统状态
- **运行时间**: {self._format_duration(stats['uptime'])}
- **总操作数**: {stats['total_operations']}
- **总错误数**: {stats['total_errors']}
- **错误率**: {stats['error_rate']:.2%}
- **平均响应时间**: {stats['avg_response_time']:.2f}秒
- **每秒操作数**: {stats['operations_per_second']:.2f}
- **唯一操作数**: {stats['unique_operations']}
## 🚨 告警状态
- **活跃告警**: {len(active_alerts)}
"""
if active_alerts:
overview += "\n### 当前告警\n"
for alert in active_alerts[:5]: # 显示前5个告警
severity_emoji = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(alert['severity'], "⚪")
overview += f"- {severity_emoji} **{alert['rule_name']}**: {alert['message']} (当前值: {alert['value']:.2f})\n"
overview += f"""
## 💬 反馈统计
- **总反馈数**: {feedback_summary['total']}
- **最新反馈**: {self._format_timestamp(feedback_summary['latest_feedback']['timestamp']) if feedback_summary['latest_feedback'] else '无'}
"""
if feedback_summary['by_type']:
overview += "\n### 反馈类型分布\n"
for feedback_type, count in feedback_summary['by_type'].items():
overview += f"- **{feedback_type}**: {count}\n"
return overview
def generate_performance_report(self) -> str:
"""生成性能报告"""
operation_stats = self.performance_monitor.get_operation_stats()
if not operation_stats:
return "## 📈 性能报告\n\n暂无操作数据。"
report = "## 📈 性能报告\n\n"
for op_name, stats in operation_stats.items():
report += f"""
### {op_name}
- **操作次数**: {stats['count']}
- **成功率**: {stats['success_rate']:.2%}
- **平均耗时**: {stats['avg_duration']:.3f}秒
- **最小耗时**: {stats['min_duration']:.3f}秒
- **最大耗时**: {stats['max_duration']:.3f}秒
- **总耗时**: {stats['total_duration']:.3f}秒
"""
return report
def generate_feedback_report(self) -> str:
"""生成反馈报告"""
feedback = self.feedback_collector.get_feedback(limit=20)
if not feedback:
return "## 💬 反馈报告\n\n暂无反馈数据。"
report = "## 💬 反馈报告\n\n"
for item in feedback[:10]: # 显示前10条反馈
severity_emoji = {"error": "❌", "warning": "⚠️", "info": "ℹ️", "success": "✅"}.get(item['severity'], "💭")
report += f"""
### {severity_emoji} {item['type']} ({item['severity']})
- **时间**: {self._format_timestamp(item['timestamp'])}
- **内容**: {item['message']}
"""
return report
def generate_alert_report(self) -> str:
"""生成告警报告"""
active_alerts = self.alert_manager.get_active_alerts()
if not active_alerts:
return "## 🚨 告警报告\n\n当前无活跃告警。"
report = "## 🚨 告警报告\n\n"
for alert in active_alerts:
severity_emoji = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(alert['severity'], "⚪")
report += f"""
### {severity_emoji} {alert['rule_name']} ({alert['severity']})
- **时间**: {self._format_timestamp(alert['timestamp'])}
- **消息**: {alert['message']}
- **指标**: {alert['metric']} = {alert['value']:.2f}
- **阈值**: {alert['threshold']}
"""
return report
def generate_html_dashboard(self) -> str:
"""生成HTML格式的监控面板"""
# 获取数据
system_stats = self.performance_monitor.get_system_stats()
operation_stats = self.performance_monitor.get_operation_stats()
feedback_summary = self.feedback_collector.get_feedback_summary()
active_alerts = self.alert_manager.get_active_alerts()
html = f"""
<!DOCTYPE html>
<html>
<head>
<title>MCP服务器监控面板</title>
<meta charset="utf-8">
<style>
body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }}
.dashboard {{ display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }}
.card {{ background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
.header {{ text-align: center; margin-bottom: 20px; }}
.metric {{ display: inline-block; margin: 10px; padding: 10px; background: #e3f2fd; border-radius: 4px; }}
.alert {{ background: #ffebee; border-left: 4px solid #f44336; padding: 10px; margin: 10px 0; }}
.warning {{ background: #fff8e1; border-left: 4px solid #ff9800; }}
.success {{ background: #e8f5e8; border-left: 4px solid #4caf50; }}
h1 {{ color: #1976d2; }}
h2 {{ color: #424242; border-bottom: 2px solid #e0e0e0; padding-bottom: 5px; }}
</style>
</head>
<body>
<div class="header">
<h1>MCP服务器监控面板</h1>
<p>最后更新: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
<div class="dashboard">
<div class="card">
<h2>📊 系统状态</h2>
<div class="metric">
<strong>运行时间:</strong> {self._format_duration(system_stats['uptime'])}
</div>
<div class="metric">
<strong>总操作数:</strong> {system_stats['total_operations']}
</div>
<div class="metric">
<strong>错误率:</strong> {system_stats['error_rate']:.2%}
</div>
<div class="metric">
<strong>平均响应时间:</strong> {system_stats['avg_response_time']:.3f}秒
</div>
<div class="metric">
<strong>每秒操作数:</strong> {system_stats['operations_per_second']:.2f}
</div>
</div>
<div class="card">
<h2>🚨 告警状态</h2>
<div class="metric">
<strong>活跃告警:</strong> {len(active_alerts)}
</div>
"""
if active_alerts:
for alert in active_alerts[:3]:
alert_class = {"critical": "alert", "warning": "warning", "info": "success"}.get(alert['severity'], "")
html += f'<div class="alert {alert_class}"><strong>{alert["rule_name"]}</strong>: {alert["message"]}</div>'
html += """
</div>
<div class="card">
<h2>💬 反馈统计</h2>
<div class="metric">
<strong>总反馈数:</strong>""" + str(feedback_summary['total']) + """
</div>
"""
if feedback_summary['by_type']:
for feedback_type, count in feedback_summary['by_type'].items():
html += f'<div class="metric"><strong>{feedback_type}:</strong> {count}</div>'
html += """
</div>
<div class="card">
<h2>📈 操作统计</h2>
"""
if operation_stats:
for op_name, stats in list(operation_stats.items())[:5]: # 显示前5个操作
html += f'<div class="metric"><strong>{op_name}:</strong> {stats["count"]}次, 平均{stats["avg_duration"]:.3f}秒</div>'
html += """
</div>
</div>
</body>
</html>
"""
return html
def save_dashboard_html(self, file_path: str = "monitoring_dashboard.html") -> bool:
"""保存HTML监控面板"""
try:
html_content = self.generate_html_dashboard()
with open(file_path, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"监控面板已保存到: {file_path}")
return True
except Exception as e:
print(f"保存监控面板失败: {e}")
return False
def export_monitoring_data(self, export_dir: str = "monitoring_exports") -> bool:
"""导出监控数据"""
try:
os.makedirs(export_dir, exist_ok=True)
# 导出性能数据
performance_file = os.path.join(export_dir, f"performance_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
self.performance_monitor.export_stats(performance_file)
# 导出反馈数据
feedback_file = os.path.join(export_dir, f"feedback_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
feedback_data = self.feedback_collector.get_feedback(limit=1000)
with open(feedback_file, 'w', encoding='utf-8') as f:
json.dump(feedback_data, f, indent=2, ensure_ascii=False)
# 导出告警数据
alert_file = os.path.join(export_dir, f"alerts_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
alert_data = {
'active_alerts': self.alert_manager.get_active_alerts(),
'alert_history': self.alert_manager.alert_history
}
with open(alert_file, 'w', encoding='utf-8') as f:
json.dump(alert_data, f, indent=2, ensure_ascii=False)
# 保存HTML监控面板
dashboard_file = os.path.join(export_dir, "dashboard.html")
self.save_dashboard_html(dashboard_file)
print(f"监控数据已导出到: {export_dir}")
return True
except Exception as e:
print(f"导出监控数据失败: {e}")
return False
def _format_duration(self, seconds: float) -> str:
"""格式化持续时间"""
if seconds < 60:
return f"{seconds:.1f}秒"
elif seconds < 3600:
minutes = seconds / 60
return f"{minutes:.1f}分钟"
elif seconds < 86400:
hours = seconds / 3600
return f"{hours:.1f}小时"
else:
days = seconds / 86400
return f"{days:.1f}天"
def _format_timestamp(self, timestamp: str) -> str:
"""格式化时间戳"""
try:
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
return dt.strftime('%Y-%m-%d %H:%M:%S')
except:
return timestamp
def main():
"""主函数 - 生成并显示监控报告"""
# 创建监控面板
dashboard = MonitoringDashboard()
# 生成并显示各报告
print("=" * 80)
print(dashboard.generate_system_overview())
print("=" * 80)
print(dashboard.generate_performance_report())
print("=" * 80)
print(dashboard.generate_feedback_report())
print("=" * 80)
print(dashboard.generate_alert_report())
# 保存HTML监控面板
dashboard.save_dashboard_html()
# 导出监控数据
dashboard.export_monitoring_data()
if __name__ == "__main__":
main()