Agent Data Bridge

sandbox_mcp
src
sandbox_storage

script_ae039979a46e42b9a227eb7c92a7c231.py•3.92 KiB

import pandas as pd

# 读取数据
df = pd.read_parquet('df_sysaccesslogtestds_20260115T100453Z_b2d9ed105d9844c385e88baf84887285.parquet')

print("数据总行数:", len(df))
print("数据字段:", list(df.columns))
print("\n" + "="*80 + "\n")

# 1. 按ACTION统计访问量
action_counts = df['ACTION'].value_counts()
print("=== 按ACTION统计访问量 ===")
for action, count in action_counts.items():
    percent = (count / len(df) * 100)
    print(f"{action}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 2. 按LOGTYPE统计访问量
logtype_counts = df['LOGTYPE'].value_counts()
print("=== 按LOGTYPE统计访问量 ===")
for logtype, count in logtype_counts.items():
    percent = (count / len(df) * 100)
    print(f"{logtype}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 3. 按LOGTYPE和ACTION组合统计
grouped = df.groupby(['LOGTYPE', 'ACTION']).size()
print("=== 按LOGTYPE和ACTION组合统计 ===")
for (logtype, action), count in grouped.items():
    percent = (count / len(df) * 100)
    print(f"{logtype}-{action}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 4. 按IP地址统计
ip_counts = df['IP'].value_counts()
print("=== 按IP地址统计 ===")
for ip, count in ip_counts.items():
    percent = (count / len(df) * 100)
    print(f"{ip}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 5. 按用户统计
user_counts = df['LOGINID'].value_counts()
print("=== 按用户统计 ===")
for user, count in user_counts.items():
    percent = (count / len(df) * 100)
    print(f"{user}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 6. 时间分布分析
df['RECORDDATE'] = pd.to_datetime(df['RECORDDATE'])
df['日期'] = df['RECORDDATE'].dt.date
df['小时'] = df['RECORDDATE'].dt.hour

# 按日期统计
date_counts = df['日期'].value_counts().sort_index()
print("=== 按日期统计 ===")
for date, count in date_counts.items():
    print(f"{date}: {count}次")
print("\n" + "="*80 + "\n")

# 按小时统计
hour_counts = df['小时'].value_counts().sort_index()
print("=== 按小时统计 ===")
for hour, count in hour_counts.items():
    percent = (count / len(df) * 100)
    print(f"{hour}:00-{hour+1}:00: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 7. 生成详细报告
print("=== 系统访问量详细分析报告 ===")
print(f"总访问量: {len(df)} 次")
print(f"唯一ACTION数量: {df['ACTION'].nunique()} 种")
print(f"唯一LOGTYPE数量: {df['LOGTYPE'].nunique()} 种")
print(f"唯一用户数量: {df['LOGINID'].nunique()} 个")
print(f"唯一IP地址数量: {df['IP'].nunique()} 个")
print(f"数据时间范围: {df['RECORDDATE'].min()} 至 {df['RECORDDATE'].max()}")
print(f"数据天数: {df['日期'].nunique()} 天")

# 8. 关键指标
print("\n=== 关键指标 ===")
# 前3个ACTION占比
top3_actions = action_counts.head(3)
print("前3个ACTION:")
for i, (action, count) in enumerate(top3_actions.items(), 1):
    percent = (count / len(df) * 100)
    print(f"  {i}. {action}: {count}次 ({percent:.2f}%)")

# 最活跃的时间段
top_hour = hour_counts.idxmax()
top_hour_count = hour_counts.max()
top_hour_percent = (top_hour_count / len(df) * 100)
print(f"\n最活跃的时间段: {top_hour}:00-{top_hour+1}:00, {top_hour_count}次 ({top_hour_percent:.2f}%)")

# 最活跃的用户
top_user = user_counts.idxmax()
top_user_count = user_counts.max()
top_user_percent = (top_user_count / len(df) * 100)
print(f"最活跃的用户: {top_user}, {top_user_count}次 ({top_user_percent:.2f}%)")

# 帕累托分析
print("\n=== 帕累托分析 ===")
cumulative_count = 0
cumulative_percent = 0
for i, (action, count) in enumerate(action_counts.items(), 1):
    cumulative_count += count
    cumulative_percent = (cumulative_count / len(df) * 100)
    percent = (count / len(df) * 100)
    print(f"{action}: {count}次 ({percent:.2f}%), 累计: {cumulative_percent:.2f}%")
    if cumulative_percent >= 80:
        print(f"达到80%累计占比的ACTION数量: {i}个")
        break

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AriaScarlet132/sandbox_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

script_ae039979a46e42b9a227eb7c92a7c231.py•3.92 KiB

import pandas as pd

# 读取数据
df = pd.read_parquet('df_sysaccesslogtestds_20260115T100453Z_b2d9ed105d9844c385e88baf84887285.parquet')

print("数据总行数:", len(df))
print("数据字段:", list(df.columns))
print("\n" + "="*80 + "\n")

# 1. 按ACTION统计访问量
action_counts = df['ACTION'].value_counts()
print("=== 按ACTION统计访问量 ===")
for action, count in action_counts.items():
    percent = (count / len(df) * 100)
    print(f"{action}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 2. 按LOGTYPE统计访问量
logtype_counts = df['LOGTYPE'].value_counts()
print("=== 按LOGTYPE统计访问量 ===")
for logtype, count in logtype_counts.items():
    percent = (count / len(df) * 100)
    print(f"{logtype}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 3. 按LOGTYPE和ACTION组合统计
grouped = df.groupby(['LOGTYPE', 'ACTION']).size()
print("=== 按LOGTYPE和ACTION组合统计 ===")
for (logtype, action), count in grouped.items():
    percent = (count / len(df) * 100)
    print(f"{logtype}-{action}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 4. 按IP地址统计
ip_counts = df['IP'].value_counts()
print("=== 按IP地址统计 ===")
for ip, count in ip_counts.items():
    percent = (count / len(df) * 100)
    print(f"{ip}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 5. 按用户统计
user_counts = df['LOGINID'].value_counts()
print("=== 按用户统计 ===")
for user, count in user_counts.items():
    percent = (count / len(df) * 100)
    print(f"{user}: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 6. 时间分布分析
df['RECORDDATE'] = pd.to_datetime(df['RECORDDATE'])
df['日期'] = df['RECORDDATE'].dt.date
df['小时'] = df['RECORDDATE'].dt.hour

# 按日期统计
date_counts = df['日期'].value_counts().sort_index()
print("=== 按日期统计 ===")
for date, count in date_counts.items():
    print(f"{date}: {count}次")
print("\n" + "="*80 + "\n")

# 按小时统计
hour_counts = df['小时'].value_counts().sort_index()
print("=== 按小时统计 ===")
for hour, count in hour_counts.items():
    percent = (count / len(df) * 100)
    print(f"{hour}:00-{hour+1}:00: {count}次 ({percent:.2f}%)")
print("\n" + "="*80 + "\n")

# 7. 生成详细报告
print("=== 系统访问量详细分析报告 ===")
print(f"总访问量: {len(df)} 次")
print(f"唯一ACTION数量: {df['ACTION'].nunique()} 种")
print(f"唯一LOGTYPE数量: {df['LOGTYPE'].nunique()} 种")
print(f"唯一用户数量: {df['LOGINID'].nunique()} 个")
print(f"唯一IP地址数量: {df['IP'].nunique()} 个")
print(f"数据时间范围: {df['RECORDDATE'].min()} 至 {df['RECORDDATE'].max()}")
print(f"数据天数: {df['日期'].nunique()} 天")

# 8. 关键指标
print("\n=== 关键指标 ===")
# 前3个ACTION占比
top3_actions = action_counts.head(3)
print("前3个ACTION:")
for i, (action, count) in enumerate(top3_actions.items(), 1):
    percent = (count / len(df) * 100)
    print(f"  {i}. {action}: {count}次 ({percent:.2f}%)")

# 最活跃的时间段
top_hour = hour_counts.idxmax()
top_hour_count = hour_counts.max()
top_hour_percent = (top_hour_count / len(df) * 100)
print(f"\n最活跃的时间段: {top_hour}:00-{top_hour+1}:00, {top_hour_count}次 ({top_hour_percent:.2f}%)")

# 最活跃的用户
top_user = user_counts.idxmax()
top_user_count = user_counts.max()
top_user_percent = (top_user_count / len(df) * 100)
print(f"最活跃的用户: {top_user}, {top_user_count}次 ({top_user_percent:.2f}%)")

# 帕累托分析
print("\n=== 帕累托分析 ===")
cumulative_count = 0
cumulative_percent = 0
for i, (action, count) in enumerate(action_counts.items(), 1):
    cumulative_count += count
    cumulative_percent = (cumulative_count / len(df) * 100)
    percent = (count / len(df) * 100)
    print(f"{action}: {count}次 ({percent:.2f}%), 累计: {cumulative_percent:.2f}%")
    if cumulative_percent >= 80:
        print(f"达到80%累计占比的ACTION数量: {i}个")
        break