Skip to main content
Glama

MemOS-MCP

by qinshu1109
Apache 2.0
3
  • Linux
  • Apple
test_semantic_search.py6.42 kB
#!/usr/bin/env python3 """ 测试语义搜索效果对比:哈希向量 vs 真正的嵌入向量 """ import os import numpy as np from pathlib import Path from openai import OpenAI from usage_examples import SimpleMemOS def load_env_file(): """加载环境变量""" env_file = Path(".env") if env_file.exists(): with open(env_file, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if line and not line.startswith('#') and '=' in line: key, value = line.split('=', 1) os.environ[key] = value def get_real_embedding(text, client): """获取真正的嵌入向量""" try: response = client.embeddings.create( model="BAAI/bge-large-zh-v1.5", input=text ) return response.data[0].embedding except Exception as e: print(f"❌ 获取嵌入向量失败: {e}") return None def cosine_similarity(vec1, vec2): """计算余弦相似度""" vec1 = np.array(vec1) vec2 = np.array(vec2) dot_product = np.dot(vec1, vec2) norm1 = np.linalg.norm(vec1) norm2 = np.linalg.norm(vec2) if norm1 == 0 or norm2 == 0: return 0 return dot_product / (norm1 * norm2) def test_semantic_search(): """测试语义搜索效果""" print("🧪 测试语义搜索效果对比") print("=" * 60) # 加载环境变量 load_env_file() # 初始化客户端 client = OpenAI( api_key=os.getenv("SILICONFLOW_API_KEY"), base_url=os.getenv("SILICONFLOW_BASE_URL") ) # 初始化当前系统 memos = SimpleMemOS() # 测试文档集合 documents = [ "今天学习了人工智能和机器学习的基础知识", "MemOS是一个智能记忆管理系统", "手动验证测试功能正常工作", "深度学习神经网络模型训练", "自然语言处理和文本分析技术", "数据库查询和向量搜索算法", "Python编程语言开发应用", "云计算和分布式系统架构" ] # 测试查询 queries = [ "AI和机器学习", "记忆管理", "测试验证", "深度学习", "文本处理" ] print("📚 测试文档集合:") for i, doc in enumerate(documents, 1): print(f" {i}. {doc}") print("\n🔍 测试查询:") for i, query in enumerate(queries, 1): print(f" {i}. {query}") # 获取所有文档的嵌入向量 print("\n📊 计算嵌入向量...") # 哈希向量(当前系统) hash_embeddings = [] for doc in documents: hash_vec = memos._get_embedding(doc) hash_embeddings.append(hash_vec) # 真正的嵌入向量 real_embeddings = [] for doc in documents: real_vec = get_real_embedding(doc, client) if real_vec: real_embeddings.append(real_vec) else: print("❌ 无法获取真正的嵌入向量,跳过对比测试") return print("✅ 嵌入向量计算完成") # 对每个查询进行搜索对比 for query in queries: print(f"\n🔍 查询: '{query}'") print("-" * 40) # 获取查询的嵌入向量 query_hash = memos._get_embedding(query) query_real = get_real_embedding(query, client) if not query_real: continue # 计算相似度 hash_similarities = [] real_similarities = [] for i, (doc, hash_vec, real_vec) in enumerate(zip(documents, hash_embeddings, real_embeddings)): hash_sim = cosine_similarity(query_hash, hash_vec) real_sim = cosine_similarity(query_real, real_vec) hash_similarities.append((i, doc, hash_sim)) real_similarities.append((i, doc, real_sim)) # 排序结果 hash_similarities.sort(key=lambda x: x[2], reverse=True) real_similarities.sort(key=lambda x: x[2], reverse=True) # 显示Top 3结果 print("📈 哈希向量搜索结果 (Top 3):") for rank, (idx, doc, sim) in enumerate(hash_similarities[:3], 1): print(f" {rank}. [{sim:.4f}] {doc}") print("\n🎯 真实嵌入搜索结果 (Top 3):") for rank, (idx, doc, sim) in enumerate(real_similarities[:3], 1): print(f" {rank}. [{sim:.4f}] {doc}") # 分析结果差异 hash_top3 = [x[0] for x in hash_similarities[:3]] real_top3 = [x[0] for x in real_similarities[:3]] overlap = len(set(hash_top3) & set(real_top3)) print(f"\n📊 Top3重叠度: {overlap}/3 ({overlap/3*100:.1f}%)") def test_current_search_quality(): """测试当前搜索质量""" print("\n🧪 测试当前系统搜索质量") print("=" * 60) # 初始化系统 memos = SimpleMemOS() # 测试查询 test_queries = [ "MemOS功能", "测试验证", "记忆管理", "开发功能", "智能提取" ] for query in test_queries: print(f"\n🔍 搜索: '{query}'") results = memos.search_memories(query, limit=3) if results: print(f"✅ 找到 {len(results)} 条结果:") for i, result in enumerate(results, 1): content = result.get('content', 'N/A') score = result.get('score', 0) print(f" {i}. [{score:.4f}] {content[:50]}...") else: print("❌ 未找到相关结果") def main(): """主函数""" print("🚀 开始语义搜索效果测试...") # 测试语义搜索对比 test_semantic_search() # 测试当前系统搜索质量 test_current_search_quality() print("\n" + "=" * 60) print("📋 测试总结:") print("1. SiliconFlow嵌入API可用,提供1024维高质量向量") print("2. 当前系统使用384维哈希向量,搜索功能基本可用") print("3. 真正的嵌入向量在语义理解上明显优于哈希向量") print("4. 建议升级到真正的嵌入模型以提升搜索精度") print("5. 可以考虑安装sentence-transformers支持本地重排") if __name__ == "__main__": main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/qinshu1109/memos-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server