#!/usr/bin/env python3
import json
import os
from datetime import datetime, timedelta
from collections import Counter
import html
def analyze_ai_tool_review_chatlog(file_path):
"""分析AI工具测评群聊数据"""
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 统计分析
total_messages = len(data)
participants = {}
messages_by_day = Counter()
messages_by_hour = Counter()
message_types = Counter()
hot_keywords = Counter()
daily_activity = {}
conversations = []
system_messages = []
member_joins = []
# 处理每条消息
for msg in data:
# 发送者统计
sender = msg.get('senderName', '系统')
participants[sender] = participants.get(sender, 0) + 1
# 日期和时间统计
time_str = msg.get('time', '')
if time_str:
try:
dt = datetime.fromisoformat(time_str.replace('Z', '+00:00').replace('+08:00', '+08:00'))
date_key = dt.strftime('%Y-%m-%d')
hour_key = dt.strftime('%H:00')
messages_by_day[date_key] += 1
messages_by_hour[hour_key] += 1
if date_key not in daily_activity:
daily_activity[date_key] = []
daily_activity[date_key].append({
'time': dt.strftime('%H:%M'),
'sender': sender,
'content': msg.get('content', '')[:50] + '...' if len(msg.get('content', '')) > 50 else msg.get('content', '')
})
except:
pass
# 消息类型统计
msg_type = msg.get('type', 0)
type_map = {1: '文本', 3: '图片', 47: '表情', 49: '分享', 34: '语音', 43: '视频', 10000: '系统消息'}
message_types[type_map.get(msg_type, f'类型{msg_type}')] += 1
# 系统消息分析
if msg_type == 10000:
content = msg.get('content', '')
system_messages.append({
'time': time_str,
'content': content
})
# 检查是否是新成员加入
if '加入了群聊' in content:
member_joins.append(content)
# 关键词提取
if msg_type == 1: # 文本消息
content = msg.get('content', '')
# 简单的关键词提取
words = content.split()
for word in words:
word = word.strip(',。!?;:""''()()【】《》').lower()
if len(word) > 2 and word not in ['https', 'http', 'com', 'www', '这个', '什么', '怎么', '没有', '可以']:
hot_keywords[word] += 1
# 收集对话
conversations.append({
'time': time_str,
'sender': sender,
'content': msg.get('content', ''),
'type': msg_type
})
return {
'total_messages': total_messages,
'participants': participants,
'messages_by_day': messages_by_day,
'messages_by_hour': messages_by_hour,
'message_types': message_types,
'hot_keywords': hot_keywords.most_common(30),
'daily_activity': daily_activity,
'conversations': conversations[-100:], # 最近100条消息
'system_messages': system_messages,
'member_joins': member_joins,
'data': data
}
def generate_html_report(analysis, output_file):
"""生成 HTML 报告"""
data = analysis
html_content = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI工具测评-年度陪跑 群聊分析报告</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
overflow: hidden;
}}
.header {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 40px;
text-align: center;
}}
.header h1 {{
font-size: 32px;
margin-bottom: 10px;
}}
.header p {{
font-size: 16px;
opacity: 0.9;
}}
.stats-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 20px;
padding: 40px;
background: #f8f9fa;
}}
.stat-card {{
background: white;
padding: 25px;
border-radius: 15px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
transition: transform 0.3s;
}}
.stat-card:hover {{
transform: translateY(-5px);
}}
.stat-card h3 {{
color: #667eea;
font-size: 14px;
text-transform: uppercase;
margin-bottom: 10px;
}}
.stat-card .value {{
font-size: 36px;
font-weight: bold;
color: #333;
}}
.section {{
padding: 40px;
}}
.section h2 {{
font-size: 24px;
margin-bottom: 25px;
color: #333;
border-left: 5px solid #667eea;
padding-left: 15px;
}}
.participant-list {{
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 15px;
}}
.participant-item {{
background: #f8f9fa;
padding: 15px;
border-radius: 10px;
display: flex;
justify-content: space-between;
align-items: center;
}}
.participant-name {{
font-weight: 500;
}}
.participant-count {{
background: #667eea;
color: white;
padding: 4px 12px;
border-radius: 20px;
font-size: 14px;
}}
.timeline {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
}}
.timeline-item {{
display: flex;
justify-content: space-between;
padding: 12px;
background: white;
margin-bottom: 10px;
border-radius: 8px;
}}
.hourly-chart {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
}}
.hourly-item {{
display: flex;
justify-content: space-between;
padding: 12px;
background: white;
margin-bottom: 10px;
border-radius: 8px;
align-items: center;
}}
.hour-bar {{
height: 20px;
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
border-radius: 10px;
margin-left: 20px;
flex-grow: 1;
max-width: 300px;
}}
.keyword-cloud {{
display: flex;
flex-wrap: wrap;
gap: 10px;
}}
.keyword-tag {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 8px 16px;
border-radius: 20px;
font-size: 14px;
}}
.conversation-list {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
max-height: 800px;
overflow-y: auto;
}}
.conversation-item {{
background: white;
padding: 15px;
margin-bottom: 15px;
border-radius: 10px;
border-left: 4px solid #667eea;
}}
.conversation-header {{
display: flex;
justify-content: space-between;
margin-bottom: 10px;
font-size: 14px;
color: #666;
}}
.conversation-sender {{
font-weight: bold;
color: #667eea;
}}
.conversation-content {{
color: #333;
line-height: 1.8;
}}
.conversation-time {{
font-size: 12px;
color: #999;
}}
.type-chart {{
display: flex;
gap: 15px;
flex-wrap: wrap;
}}
.type-item {{
background: #f8f9fa;
padding: 15px 20px;
border-radius: 10px;
text-align: center;
}}
.type-name {{
font-weight: bold;
color: #667eea;
}}
.type-count {{
font-size: 24px;
margin-top: 5px;
}}
.footer {{
background: #f8f9fa;
padding: 20px;
text-align: center;
color: #666;
font-size: 14px;
}}
.activity-summary {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
margin-bottom: 20px;
}}
.activity-day {{
background: white;
padding: 15px;
margin-bottom: 10px;
border-radius: 8px;
}}
.activity-day h4 {{
color: #667eea;
margin-bottom: 10px;
}}
.activity-item {{
display: flex;
justify-content: space-between;
padding: 8px;
background: #f8f9fa;
margin-bottom: 5px;
border-radius: 5px;
font-size: 14px;
}}
.system-messages {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
margin-bottom: 20px;
}}
.system-item {{
background: white;
padding: 12px;
margin-bottom: 10px;
border-radius: 8px;
border-left: 4px solid #28a745;
}}
.member-join {{
background: #e8f5e8;
padding: 10px;
border-radius: 5px;
margin-bottom: 5px;
font-size: 14px;
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🤖 AI工具测评-年度陪跑</h1>
<p>群聊分析报告 | 数据时间:2026-01-11 至 2026-01-13</p>
</div>
<div class="stats-grid">
<div class="stat-card">
<h3>总消息数</h3>
<div class="value">{data['total_messages']}</div>
</div>
<div class="stat-card">
<h3>活跃人数</h3>
<div class="value">{len(data['participants'])}</div>
</div>
<div class="stat-card">
<h3>群成员</h3>
<div class="value">87</div>
</div>
<div class="stat-card">
<h3>活跃度</h3>
<div class="value">{len(data['participants'])/87*100:.1f}%</div>
</div>
<div class="stat-card">
<h3>统计天数</h3>
<div class="value">{len(data['messages_by_day'])}</div>
</div>
<div class="stat-card">
<h3>日均消息</h3>
<div class="value">{data['total_messages'] // len(data['messages_by_day']) if data['messages_by_day'] else 0}</div>
</div>
</div>
<div class="section">
<h2>👥 活跃成员 TOP 15</h2>
<div class="participant-list">
"""
# 添加参与者统计
sorted_participants = sorted(data['participants'].items(), key=lambda x: x[1], reverse=True)[:15]
for name, count in sorted_participants:
html_content += f"""
<div class="participant-item">
<span class="participant-name">{html.escape(name)}</span>
<span class="participant-count">{count}</span>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>📅 每日消息量</h2>
<div class="timeline">
"""
# 添加每日消息统计
for date, count in sorted(data['messages_by_day'].items()):
html_content += f"""
<div class="timeline-item">
<span>{date}</span>
<strong>{count} 条消息</strong>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>⏰ 每小时消息分布</h2>
<div class="hourly-chart">
"""
# 添加每小时消息统计
sorted_hours = sorted(data['messages_by_hour'].items())
max_count = max([count for _, count in sorted_hours]) if sorted_hours else 1
for hour, count in sorted_hours:
percentage = (count / max_count) * 100 if max_count > 0 else 0
html_content += f"""
<div class="hourly-item">
<span>{hour}</span>
<div class="hour-bar" style="width: {percentage}%;"></div>
<strong>{count} 条</strong>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>💬 消息类型分布</h2>
<div class="type-chart">
"""
# 添加消息类型统计
for msg_type, count in data['message_types'].items():
html_content += f"""
<div class="type-item">
<div class="type-name">{html.escape(msg_type)}</div>
<div class="type-count">{count}</div>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>🔥 热词 TOP 30</h2>
<div class="keyword-cloud">
"""
# 添加热词统计
for keyword, count in data['hot_keywords']:
html_content += f"""
<span class="keyword-tag">{html.escape(keyword)} ({count})</span>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>👋 新成员加入</h2>
<div class="system-messages">
"""
# 添加新成员加入记录
for join_msg in data['member_joins']:
html_content += f"""
<div class="member-join">{html.escape(join_msg)}</div>
"""
if not data['member_joins']:
html_content += """
<div class="member-join">近期无新成员加入</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>📊 每日活动摘要</h2>
<div class="activity-summary">
"""
# 添加每日活动摘要
for date, activities in sorted(data['daily_activity'].items()):
html_content += f"""
<div class="activity-day">
<h4>{date} - {len(activities)} 条消息</h4>
"""
# 只显示前5条活动
for activity in activities[:5]:
html_content += f"""
<div class="activity-item">
<span>{activity['time']} {activity['sender']}</span>
<span>{html.escape(activity['content'])}</span>
</div>
"""
if len(activities) > 5:
html_content += f"""
<div style="text-align: center; padding: 10px; color: #666;">
还有 {len(activities) - 5} 条消息...
</div>
"""
html_content += """
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>💭 最近聊天记录(100条)</h2>
<div class="conversation-list">
"""
# 添加最近的消息记录
for conv in data['conversations']:
time_str = conv.get('time', '')
try:
dt = datetime.fromisoformat(time_str.replace('Z', '+00:00').replace('+08:00', '+08:00'))
formatted_time = dt.strftime('%m-%d %H:%M')
except:
formatted_time = time_str
sender = html.escape(conv.get('sender', '未知'))
content = html.escape(conv.get('content', ''))
# 系统消息样式
border_color = "#28a745" if conv.get('type') == 10000 else "#667eea"
html_content += f"""
<div class="conversation-item" style="border-left-color: {border_color};">
<div class="conversation-header">
<span class="conversation-sender">{sender}</span>
<span class="conversation-time">{formatted_time}</span>
</div>
<div class="conversation-content">{content}</div>
</div>
"""
html_content += """
</div>
</div>
<div class="footer">
<p>📊 报告生成时间:""" + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + """</p>
<p>🤖 由 Claude Code 自动生成 | 群聊ID: 58253211777@chatroom</p>
</div>
</div>
</body>
</html>
"""
# 保存 HTML 文件
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"HTML 报告已生成:{output_file}")
if __name__ == '__main__':
# 检查数据文件
if os.path.exists('AI工具测评_最近3天.json'):
file_path = 'AI工具测评_最近3天.json'
else:
print("未找到数据文件")
exit(1)
# 分析数据
print("正在分析AI工具测评群聊数据...")
analysis = analyze_ai_tool_review_chatlog(file_path)
# 生成 HTML 报告
print("正在生成 HTML 报告...")
generate_html_report(analysis, 'AI工具测评_年度陪跑_分析报告.html')
print("\n分析完成!")
print(f"总消息数:{analysis['total_messages']}")
print(f"活跃人数:{len(analysis['participants'])}")
print(f"统计天数:{len(analysis['messages_by_day'])}")
print(f"消息类型数:{len(analysis['message_types'])}")
print(f"新成员加入:{len(analysis['member_joins'])}")