memory_hit_rate_analyzer.py•12.2 kB
#!/usr/bin/env python3
"""
MemOS 记忆命中率分析器
评估记忆检索的准确性和相关性
"""
import json
import time
import statistics
from pathlib import Path
from typing import List, Dict, Any, Tuple
from concurrent_manager import MemOSConcurrentManager
from advanced_examples import AdvancedMemOS
class MemoryHitRateAnalyzer:
"""记忆命中率分析器"""
def __init__(self, data_dir: str = "./memos_data"):
self.data_dir = Path(data_dir)
self.results_dir = self.data_dir / "analysis_results"
self.results_dir.mkdir(exist_ok=True)
# 初始化MemOS
self.memos = AdvancedMemOS(str(self.data_dir))
# 测试用例配置
self.test_cases = self._load_test_cases()
def _load_test_cases(self) -> List[Dict[str, Any]]:
"""加载测试用例"""
# 飞书相关测试用例
feishu_test_cases = [
{
"query": "飞书MCP配置",
"expected_keywords": ["MCP", "配置", "token", "权限"],
"category": "技术配置",
"difficulty": "medium"
},
{
"query": "飞书文档管理",
"expected_keywords": ["文档", "管理", "协作", "权限"],
"category": "功能使用",
"difficulty": "easy"
},
{
"query": "飞书API调用",
"expected_keywords": ["API", "调用", "token", "权限"],
"category": "技术开发",
"difficulty": "hard"
},
{
"query": "飞书知识库搜索",
"expected_keywords": ["知识库", "搜索", "wiki", "节点"],
"category": "功能使用",
"difficulty": "medium"
},
{
"query": "飞书OAuth认证",
"expected_keywords": ["OAuth", "认证", "授权", "token"],
"category": "技术配置",
"difficulty": "hard"
},
{
"query": "飞书工具集成",
"expected_keywords": ["工具", "集成", "MCP", "插件"],
"category": "技术开发",
"difficulty": "medium"
},
{
"query": "飞书文档编辑",
"expected_keywords": ["编辑", "文档", "批量", "更新"],
"category": "功能使用",
"difficulty": "easy"
},
{
"query": "飞书权限管理",
"expected_keywords": ["权限", "管理", "用户", "访问"],
"category": "管理配置",
"difficulty": "medium"
},
{
"query": "飞书问题排查",
"expected_keywords": ["问题", "排查", "错误", "解决"],
"category": "故障处理",
"difficulty": "hard"
},
{
"query": "飞书最佳实践",
"expected_keywords": ["最佳实践", "指南", "建议", "优化"],
"category": "经验总结",
"difficulty": "medium"
}
]
return feishu_test_cases
def analyze_single_query(self, test_case: Dict[str, Any]) -> Dict[str, Any]:
"""分析单个查询的命中率"""
query = test_case["query"]
expected_keywords = test_case["expected_keywords"]
print(f"🔍 测试查询: {query}")
# 记录查询时间
start_time = time.time()
# 执行查询
try:
# 使用不同的max_memories参数测试
results_3 = self.memos.query_memories(query, max_memories=3)
results_5 = self.memos.query_memories(query, max_memories=5)
results_10 = self.memos.query_memories(query, max_memories=10)
query_time = time.time() - start_time
# 分析结果
analysis = {
"query": query,
"category": test_case["category"],
"difficulty": test_case["difficulty"],
"query_time": query_time,
"results": {
"top_3": self._analyze_results(results_3, expected_keywords),
"top_5": self._analyze_results(results_5, expected_keywords),
"top_10": self._analyze_results(results_10, expected_keywords)
}
}
print(f" ⏱️ 查询时间: {query_time:.3f}s")
print(f" 📊 Top-3命中率: {analysis['results']['top_3']['hit_rate']:.2%}")
print(f" 📊 Top-5命中率: {analysis['results']['top_5']['hit_rate']:.2%}")
print(f" 📊 Top-10命中率: {analysis['results']['top_10']['hit_rate']:.2%}")
return analysis
except Exception as e:
print(f" ❌ 查询失败: {e}")
return {
"query": query,
"error": str(e),
"query_time": time.time() - start_time
}
def _analyze_results(self, results: List[Dict], expected_keywords: List[str]) -> Dict[str, Any]:
"""分析查询结果"""
if not results:
return {
"hit_rate": 0.0,
"relevance_scores": [],
"keyword_coverage": 0.0,
"total_memories": 0
}
relevance_scores = []
keyword_hits = set()
for result in results:
content = result.get("content", "").lower()
tags = [tag.lower() for tag in result.get("tags", [])]
# 计算关键词覆盖率
for keyword in expected_keywords:
if keyword.lower() in content or keyword.lower() in " ".join(tags):
keyword_hits.add(keyword.lower())
# 计算相关性分数(简单的关键词匹配)
keyword_matches = sum(1 for kw in expected_keywords
if kw.lower() in content or kw.lower() in " ".join(tags))
relevance_score = keyword_matches / len(expected_keywords)
relevance_scores.append(relevance_score)
# 计算整体指标
hit_rate = len(keyword_hits) / len(expected_keywords)
avg_relevance = statistics.mean(relevance_scores) if relevance_scores else 0.0
return {
"hit_rate": hit_rate,
"avg_relevance": avg_relevance,
"relevance_scores": relevance_scores,
"keyword_coverage": len(keyword_hits) / len(expected_keywords),
"total_memories": len(results),
"matched_keywords": list(keyword_hits)
}
def run_comprehensive_analysis(self) -> Dict[str, Any]:
"""运行全面的命中率分析"""
print("🚀 开始MemOS记忆命中率分析")
print("=" * 60)
all_results = []
category_stats = {}
difficulty_stats = {}
for i, test_case in enumerate(self.test_cases, 1):
print(f"\n📋 测试 {i}/{len(self.test_cases)}")
result = self.analyze_single_query(test_case)
if "error" not in result:
all_results.append(result)
# 按类别统计
category = result["category"]
if category not in category_stats:
category_stats[category] = []
category_stats[category].append(result)
# 按难度统计
difficulty = result["difficulty"]
if difficulty not in difficulty_stats:
difficulty_stats[difficulty] = []
difficulty_stats[difficulty].append(result)
# 生成综合报告
report = self._generate_comprehensive_report(all_results, category_stats, difficulty_stats)
# 保存结果
timestamp = int(time.time())
report_file = self.results_dir / f"hit_rate_analysis_{timestamp}.json"
with open(report_file, 'w', encoding='utf-8') as f:
json.dump(report, f, indent=2, ensure_ascii=False)
print(f"\n📄 分析报告已保存: {report_file}")
return report
def _generate_comprehensive_report(self, results: List[Dict],
category_stats: Dict,
difficulty_stats: Dict) -> Dict[str, Any]:
"""生成综合分析报告"""
if not results:
return {"error": "没有有效的测试结果"}
# 整体统计
overall_stats = {
"total_queries": len(results),
"avg_query_time": statistics.mean([r["query_time"] for r in results]),
"top_3_hit_rate": statistics.mean([r["results"]["top_3"]["hit_rate"] for r in results]),
"top_5_hit_rate": statistics.mean([r["results"]["top_5"]["hit_rate"] for r in results]),
"top_10_hit_rate": statistics.mean([r["results"]["top_10"]["hit_rate"] for r in results]),
}
# 按类别统计
category_analysis = {}
for category, cat_results in category_stats.items():
category_analysis[category] = {
"count": len(cat_results),
"avg_hit_rate_top_5": statistics.mean([r["results"]["top_5"]["hit_rate"] for r in cat_results]),
"avg_query_time": statistics.mean([r["query_time"] for r in cat_results])
}
# 按难度统计
difficulty_analysis = {}
for difficulty, diff_results in difficulty_stats.items():
difficulty_analysis[difficulty] = {
"count": len(diff_results),
"avg_hit_rate_top_5": statistics.mean([r["results"]["top_5"]["hit_rate"] for r in diff_results]),
"avg_query_time": statistics.mean([r["query_time"] for r in diff_results])
}
return {
"timestamp": time.time(),
"overall_stats": overall_stats,
"category_analysis": category_analysis,
"difficulty_analysis": difficulty_analysis,
"detailed_results": results
}
def print_summary_report(self, report: Dict[str, Any]):
"""打印摘要报告"""
if "error" in report:
print(f"❌ 分析失败: {report['error']}")
return
stats = report["overall_stats"]
print("\n" + "=" * 60)
print("📊 MemOS 记忆命中率分析报告")
print("=" * 60)
print(f"\n🎯 整体性能指标:")
print(f" 📝 总查询数: {stats['total_queries']}")
print(f" ⏱️ 平均查询时间: {stats['avg_query_time']:.3f}s")
print(f" 🎯 Top-3 命中率: {stats['top_3_hit_rate']:.2%}")
print(f" 🎯 Top-5 命中率: {stats['top_5_hit_rate']:.2%}")
print(f" 🎯 Top-10 命中率: {stats['top_10_hit_rate']:.2%}")
print(f"\n📂 按类别分析:")
for category, data in report["category_analysis"].items():
print(f" {category}: {data['avg_hit_rate_top_5']:.2%} 命中率 ({data['count']} 查询)")
print(f"\n🎚️ 按难度分析:")
for difficulty, data in report["difficulty_analysis"].items():
print(f" {difficulty}: {data['avg_hit_rate_top_5']:.2%} 命中率 ({data['count']} 查询)")
def main():
"""主函数"""
try:
# 运行分析
print("🔬 启动MemOS记忆命中率分析器...")
analyzer = MemoryHitRateAnalyzer("./memos_data")
report = analyzer.run_comprehensive_analysis()
analyzer.print_summary_report(report)
except Exception as e:
print(f"❌ 分析失败: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()