#!/usr/bin/env python3
import json
import os
from datetime import datetime, timedelta
from collections import Counter
import html
def analyze_chatlog(file_path):
"""分析聊天记录数据"""
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 统计分析
total_messages = len(data)
participants = {}
messages_by_day = Counter()
message_types = Counter()
hot_keywords = Counter()
# 处理每条消息
for msg in data:
# 发送者统计
sender = msg.get('senderName', '未知')
participants[sender] = participants.get(sender, 0) + 1
# 日期统计
time_str = msg.get('time', '')
if time_str:
try:
dt = datetime.fromisoformat(time_str.replace('Z', '+00:00').replace('+08:00', '+08:00'))
date_key = dt.strftime('%Y-%m-%d')
messages_by_day[date_key] += 1
except:
pass
# 消息类型统计
msg_type = msg.get('type', 0)
type_map = {1: '文本', 3: '图片', 47: '表情', 49: '分享', 34: '语音', 43: '视频'}
message_types[type_map.get(msg_type, f'类型{msg_type}')] += 1
# 关键词提取(从文本消息中)
if msg_type == 1: # 文本消息
content = msg.get('content', '')
# 简单的关键词提取(分词)
words = content.split()
for word in words:
word = word.strip(',。!?;:""''()()【】《》').lower()
if len(word) > 2 and word not in ['https', 'http', 'com', 'www']:
hot_keywords[word] += 1
return {
'total_messages': total_messages,
'participants': participants,
'messages_by_day': messages_by_day,
'message_types': message_types,
'hot_keywords': hot_keywords.most_common(20),
'data': data
}
def generate_html_report(analysis, output_file):
"""生成 HTML 报告"""
data = analysis
html_content = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>【正式班】AI编程社团-风变✖Ai产品 群聊分析报告</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
}}
.container {{
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 20px 60px rgba(0,0,0,0.3);
overflow: hidden;
}}
.header {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 40px;
text-align: center;
}}
.header h1 {{
font-size: 32px;
margin-bottom: 10px;
}}
.header p {{
font-size: 16px;
opacity: 0.9;
}}
.stats-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 20px;
padding: 40px;
background: #f8f9fa;
}}
.stat-card {{
background: white;
padding: 25px;
border-radius: 15px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
transition: transform 0.3s;
}}
.stat-card:hover {{
transform: translateY(-5px);
}}
.stat-card h3 {{
color: #667eea;
font-size: 14px;
text-transform: uppercase;
margin-bottom: 10px;
}}
.stat-card .value {{
font-size: 36px;
font-weight: bold;
color: #333;
}}
.section {{
padding: 40px;
}}
.section h2 {{
font-size: 24px;
margin-bottom: 25px;
color: #333;
border-left: 5px solid #667eea;
padding-left: 15px;
}}
.participant-list {{
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 15px;
}}
.participant-item {{
background: #f8f9fa;
padding: 15px;
border-radius: 10px;
display: flex;
justify-content: space-between;
align-items: center;
}}
.participant-name {{
font-weight: 500;
}}
.participant-count {{
background: #667eea;
color: white;
padding: 4px 12px;
border-radius: 20px;
font-size: 14px;
}}
.timeline {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
}}
.timeline-item {{
display: flex;
justify-content: space-between;
padding: 12px;
background: white;
margin-bottom: 10px;
border-radius: 8px;
}}
.keyword-cloud {{
display: flex;
flex-wrap: wrap;
gap: 10px;
}}
.keyword-tag {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 8px 16px;
border-radius: 20px;
font-size: 14px;
}}
.message-list {{
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
max-height: 600px;
overflow-y: auto;
}}
.message-item {{
background: white;
padding: 15px;
margin-bottom: 15px;
border-radius: 10px;
border-left: 4px solid #667eea;
}}
.message-header {{
display: flex;
justify-content: space-between;
margin-bottom: 10px;
font-size: 14px;
color: #666;
}}
.message-sender {{
font-weight: bold;
color: #667eea;
}}
.message-content {{
color: #333;
line-height: 1.8;
}}
.message-time {{
font-size: 12px;
color: #999;
}}
.type-chart {{
display: flex;
gap: 15px;
flex-wrap: wrap;
}}
.type-item {{
background: #f8f9fa;
padding: 15px 20px;
border-radius: 10px;
text-align: center;
}}
.type-name {{
font-weight: bold;
color: #667eea;
}}
.type-count {{
font-size: 24px;
margin-top: 5px;
}}
.footer {{
background: #f8f9fa;
padding: 20px;
text-align: center;
color: #666;
font-size: 14px;
}}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>📱 【正式班】AI编程社团-风变✖Ai产品</h1>
<p>群聊分析报告 | 数据时间:2026-01-10 至 2026-01-13</p>
</div>
<div class="stats-grid">
<div class="stat-card">
<h3>总消息数</h3>
<div class="value">{data['total_messages']}</div>
</div>
<div class="stat-card">
<h3>活跃人数</h3>
<div class="value">{len(data['participants'])}</div>
</div>
<div class="stat-card">
<h3>统计天数</h3>
<div class="value">{len(data['messages_by_day'])}</div>
</div>
<div class="stat-card">
<h3>日均消息</h3>
<div class="value">{data['total_messages'] // len(data['messages_by_day']) if data['messages_by_day'] else 0}</div>
</div>
</div>
<div class="section">
<h2>👥 活跃成员 TOP 10</h2>
<div class="participant-list">
"""
# 添加参与者统计
sorted_participants = sorted(data['participants'].items(), key=lambda x: x[1], reverse=True)[:10]
for name, count in sorted_participants:
html_content += f"""
<div class="participant-item">
<span class="participant-name">{html.escape(name)}</span>
<span class="participant-count">{count}</span>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>📅 每日消息量</h2>
<div class="timeline">
"""
# 添加每日消息统计
for date, count in sorted(data['messages_by_day'].items()):
html_content += f"""
<div class="timeline-item">
<span>{date}</span>
<strong>{count} 条消息</strong>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>💬 消息类型分布</h2>
<div class="type-chart">
"""
# 添加消息类型统计
for msg_type, count in data['message_types'].items():
html_content += f"""
<div class="type-item">
<div class="type-name">{html.escape(msg_type)}</div>
<div class="type-count">{count}</div>
</div>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>🔥 热词 TOP 20</h2>
<div class="keyword-cloud">
"""
# 添加热词统计
for keyword, count in data['hot_keywords']:
html_content += f"""
<span class="keyword-tag">{html.escape(keyword)} ({count})</span>
"""
html_content += """
</div>
</div>
<div class="section">
<h2>💭 聊天记录精选</h2>
<div class="message-list">
"""
# 添加最近的消息记录(取前50条)
recent_messages = data['data'][-50:] if len(data['data']) > 50 else data['data']
for msg in recent_messages:
time_str = msg.get('time', '')
try:
dt = datetime.fromisoformat(time_str.replace('Z', '+00:00').replace('+08:00', '+08:00'))
formatted_time = dt.strftime('%m-%d %H:%M')
except:
formatted_time = time_str
sender = html.escape(msg.get('senderName', '未知'))
content = html.escape(msg.get('content', ''))
html_content += f"""
<div class="message-item">
<div class="message-header">
<span class="message-sender">{sender}</span>
<span class="message-time">{formatted_time}</span>
</div>
<div class="message-content">{content}</div>
</div>
"""
html_content += """
</div>
</div>
<div class="footer">
<p>📊 报告生成时间:""" + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + """</p>
<p>🤖 由 Claude Code 自动生成</p>
</div>
</div>
</body>
</html>
"""
# 保存 HTML 文件
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"HTML 报告已生成:{output_file}")
if __name__ == '__main__':
# 分析数据
print("正在分析聊天记录...")
analysis = analyze_chatlog('chatlog_data.json')
# 生成 HTML 报告
print("正在生成 HTML 报告...")
generate_html_report(analysis, 'chatlog_report.html')
print("\n分析完成!")
print(f"总消息数:{analysis['total_messages']}")
print(f"活跃人数:{len(analysis['participants'])}")
print(f"统计天数:{len(analysis['messages_by_day'])}")