#!/usr/bin/env python3
"""
Chatlog Data Analysis Example
This script demonstrates how to analyze chatlog data
"""
import json
import sys
import argparse
from datetime import datetime
from collections import Counter
from typing import Dict, List, Any
def load_chatlog(file_path: str) -> List[Dict[str, Any]]:
"""Load chatlog data from JSON file"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
print(f"✓ Loaded {len(data)} messages from {file_path}")
return data
except Exception as e:
print(f"✗ Error loading {file_path}: {e}")
sys.exit(1)
def analyze_participants(messages: List[Dict[str, Any]]) -> Dict[str, int]:
"""Analyze participant activity"""
participants = Counter()
for msg in messages:
sender = msg.get('senderName', 'Unknown')
participants[sender] += 1
return dict(participants)
def analyze_message_types(messages: List[Dict[str, Any]]) -> Dict[str, int]:
"""Analyze message types"""
type_map = {
1: 'Text',
3: 'Image',
47: 'Emoji',
49: 'Share',
34: 'Voice',
43: 'Video',
10000: 'System'
}
types = Counter()
for msg in messages:
msg_type = msg.get('type', 0)
types[type_map.get(msg_type, f'Type{msg_type}')] += 1
return dict(types)
def analyze_daily_activity(messages: List[Dict[str, Any]]) -> Dict[str, int]:
"""Analyze daily message activity"""
daily = Counter()
for msg in messages:
time_str = msg.get('time', '')
if time_str:
try:
dt = datetime.fromisoformat(time_str.replace('Z', '+00:00'))
date = dt.strftime('%Y-%m-%d')
daily[date] += 1
except:
pass
return dict(daily)
def analyze_keywords(messages: List[Dict[str, Any]], limit: int = 20) -> List[tuple]:
"""Extract and count keywords"""
keywords = Counter()
for msg in messages:
if msg.get('type') == 1: # Text messages only
content = msg.get('content', '')
words = content.split()
for word in words:
word = word.strip(',。!?;:""''()()【】《》').lower()
if len(word) > 2 and word not in ['https', 'http', 'com', 'www']:
keywords[word] += 1
return keywords.most_common(limit)
def print_analysis(data: Dict[str, Any]):
"""Print analysis results"""
print("\n" + "="*60)
print("Chatlog Analysis Report")
print("="*60)
# Basic stats
print(f"\n📊 Basic Statistics:")
print(f" Total Messages: {data['total_messages']}")
print(f" Unique Participants: {len(data['participants'])}")
print(f" Date Range: {data['date_range']}")
# Top participants
print(f"\n👥 Top 10 Active Participants:")
for i, (name, count) in enumerate(data['top_participants'], 1):
print(f" {i:2d}. {name}: {count} messages")
# Message types
print(f"\n💬 Message Types:")
for msg_type, count in data['message_types'].items():
percentage = (count / data['total_messages']) * 100
print(f" {msg_type}: {count} ({percentage:.1f}%)")
# Daily activity
print(f"\n📅 Daily Activity:")
for date, count in data['daily_activity'].items():
print(f" {date}: {count} messages")
# Keywords
print(f"\n🔥 Top Keywords:")
for keyword, count in data['keywords']:
print(f" {keyword}: {count}")
print("\n" + "="*60)
def generate_html_report(data: Dict[str, Any], output_file: str):
"""Generate HTML report"""
html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Chatlog Analysis Report</title>
<style>
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; margin: 40px; }}
.header {{ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 10px; }}
.section {{ margin: 30px 0; }}
.section h2 {{ border-left: 4px solid #667eea; padding-left: 10px; }}
.stat-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 20px; }}
.stat-card {{ background: #f8f9fa; padding: 20px; border-radius: 8px; }}
.stat-value {{ font-size: 2em; font-weight: bold; color: #667eea; }}
table {{ width: 100%; border-collapse: collapse; margin: 20px 0; }}
th, td {{ padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }}
th {{ background-color: #667eea; color: white; }}
tr:hover {{ background-color: #f5f5f5; }}
.keyword-tag {{ display: inline-block; background: #667eea; color: white; padding: 5px 10px; margin: 5px; border-radius: 15px; }}
</style>
</head>
<body>
<div class="header">
<h1>📊 Chatlog Analysis Report</h1>
<p>Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
</div>
<div class="section">
<h2>📈 Summary Statistics</h2>
<div class="stat-grid">
<div class="stat-card">
<h3>Total Messages</h3>
<div class="stat-value">{data['total_messages']}</div>
</div>
<div class="stat-card">
<h3>Participants</h3>
<div class="stat-value">{len(data['participants'])}</div>
</div>
<div class="stat-card">
<h3>Date Range</h3>
<div class="stat-value">{data['date_range']}</div>
</div>
</div>
</div>
<div class="section">
<h2>👥 Top Active Participants</h2>
<table>
<thead>
<tr><th>Rank</th><th>Name</th><th>Messages</th><th>Percentage</th></tr>
</thead>
<tbody>
"""
total = data['total_messages']
for i, (name, count) in enumerate(data['top_participants'], 1):
percentage = (count / total) * 100
html_content += f"""
<tr>
<td>{i}</td>
<td>{name}</td>
<td>{count}</td>
<td>{percentage:.1f}%</td>
</tr>
"""
html_content += """
</tbody>
</table>
</div>
<div class="section">
<h2>💬 Message Types</h2>
<table>
<thead>
<tr><th>Type</th><th>Count</th><th>Percentage</th></tr>
</thead>
<tbody>
"""
for msg_type, count in data['message_types'].items():
percentage = (count / total) * 100
html_content += f"""
<tr>
<td>{msg_type}</td>
<td>{count}</td>
<td>{percentage:.1f}%</td>
</tr>
"""
html_content += """
</tbody>
</table>
</div>
<div class="section">
<h2>📅 Daily Activity</h2>
<table>
<thead>
<tr><th>Date</th><th>Messages</th></tr>
</thead>
<tbody>
"""
for date, count in data['daily_activity'].items():
html_content += f"""
<tr>
<td>{date}</td>
<td>{count}</td>
</tr>
"""
html_content += """
</tbody>
</table>
</div>
<div class="section">
<h2>🔥 Top Keywords</h2>
<div>
"""
for keyword, count in data['keywords']:
html_content += f"""
<span class="keyword-tag">{keyword} ({count})</span>
"""
html_content += """
</div>
</div>
</body>
</html>
"""
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"\n✓ HTML report saved to: {output_file}")
def main():
parser = argparse.ArgumentParser(description='Analyze chatlog data')
parser.add_argument('input_file', help='Input JSON file path')
parser.add_argument('--output', '-o', help='Output HTML file path')
parser.add_argument('--keywords', '-k', type=int, default=20, help='Number of keywords to extract')
parser.add_argument('--no-html', action='store_true', help='Skip HTML report generation')
args = parser.parse_args()
# Load data
messages = load_chatlog(args.input_file)
if not messages:
print("✗ No messages found in the file")
sys.exit(1)
# Analyze data
participants = analyze_participants(messages)
message_types = analyze_message_types(messages)
daily_activity = analyze_daily_activity(messages)
keywords = analyze_keywords(messages, args.keywords)
# Prepare data for report
data = {
'total_messages': len(messages),
'participants': participants,
'top_participants': sorted(participants.items(), key=lambda x: x[1], reverse=True)[:10],
'message_types': message_types,
'daily_activity': daily_activity,
'keywords': keywords,
'date_range': f"{min(daily_activity.keys())} to {max(daily_activity.keys())}" if daily_activity else 'N/A'
}
# Print analysis
print_analysis(data)
# Generate HTML report
if not args.no_html:
output_file = args.output or 'chatlog_analysis_report.html'
generate_html_report(data, output_file)
if __name__ == '__main__':
main()