simple_hit_rate_test.py•3.8 kB
#!/usr/bin/env python3
"""
简单的记忆命中率测试
通过MCP接口测试记忆检索效果
"""
import time
import json
from pathlib import Path
def test_memory_hit_rate():
"""测试记忆命中率"""
print("🎯 MemOS 记忆命中率测试")
print("=" * 50)
# 飞书相关测试查询
test_queries = [
{
"query": "飞书MCP配置",
"expected_keywords": ["MCP", "配置", "token", "权限"],
"category": "技术配置"
},
{
"query": "飞书文档管理",
"expected_keywords": ["文档", "管理", "协作", "权限"],
"category": "功能使用"
},
{
"query": "飞书API调用",
"expected_keywords": ["API", "调用", "token", "权限"],
"category": "技术开发"
},
{
"query": "飞书知识库搜索",
"expected_keywords": ["知识库", "搜索", "wiki", "节点"],
"category": "功能使用"
},
{
"query": "飞书OAuth认证",
"expected_keywords": ["OAuth", "认证", "授权", "token"],
"category": "技术配置"
}
]
results = []
for i, test_case in enumerate(test_queries, 1):
print(f"\n📋 测试 {i}/{len(test_queries)}: {test_case['query']}")
# 这里我们手动执行查询并分析结果
print(f" 🔍 查询: {test_case['query']}")
print(f" 📂 类别: {test_case['category']}")
print(f" 🎯 期望关键词: {', '.join(test_case['expected_keywords'])}")
# 记录测试用例
results.append({
"query": test_case["query"],
"category": test_case["category"],
"expected_keywords": test_case["expected_keywords"],
"timestamp": time.time()
})
print(f" ✅ 测试用例已记录")
# 保存测试结果
results_file = Path("./memos_data/hit_rate_test_cases.json")
with open(results_file, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
print(f"\n📄 测试用例已保存: {results_file}")
print("\n🔬 现在请手动执行以下查询来验证命中率:")
print("=" * 50)
for i, test_case in enumerate(test_queries, 1):
print(f"\n{i}. 查询: {test_case['query']}")
print(f" 期望关键词: {', '.join(test_case['expected_keywords'])}")
print(f" 类别: {test_case['category']}")
print("\n📊 评估标准:")
print("- 🎯 命中率 = 返回结果中包含期望关键词的比例")
print("- ⏱️ 响应时间 = 查询执行时间")
print("- 🎚️ 相关性 = 返回内容与查询意图的匹配度")
return results
def analyze_manual_results():
"""分析手动测试结果的指导"""
print("\n🔍 手动分析指南:")
print("=" * 30)
print("\n1. 对每个查询,记录以下信息:")
print(" - 返回的记忆条数")
print(" - 包含期望关键词的记忆数量")
print(" - 查询响应时间")
print(" - 内容相关性评分 (1-5分)")
print("\n2. 计算指标:")
print(" - 命中率 = 匹配记忆数 / 总返回记忆数")
print(" - 覆盖率 = 匹配关键词数 / 期望关键词数")
print(" - 平均相关性 = 所有记忆相关性评分的平均值")
print("\n3. 优化建议:")
print(" - 命中率 < 60%: 考虑优化embedding模型")
print(" - 响应时间 > 2s: 考虑优化索引结构")
print(" - 相关性 < 3分: 考虑改进记忆标签和内容质量")
if __name__ == "__main__":
test_memory_hit_rate()
analyze_manual_results()