search_docs.py•1.41 kB
#!/usr/bin/env python3
import json
import redis
import numpy as np
from sentence_transformers import SentenceTransformer
import sys
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def search_docs(query, top_k=5):
r = redis.Redis(host='localhost', port=6379, db=0, decode_responses=True)
model = SentenceTransformer('all-MiniLM-L6-v2')
query_embedding = model.encode(query).tolist()
doc_ids = r.smembers('doc_ids')
similarities = []
for doc_id in doc_ids:
doc_data = r.hgetall(f"doc:{doc_id}")
if doc_data and 'embedding' in doc_data:
doc_embedding = json.loads(doc_data['embedding'])
similarity = cosine_similarity(query_embedding, doc_embedding)
similarities.append((similarity, doc_data))
similarities.sort(reverse=True)
print(f"Top {top_k} results for: '{query}'\n")
for i, (score, doc_data) in enumerate(similarities[:top_k]):
print(f"{i+1}. Score: {score:.3f}")
print(f" URL: {doc_data['url']}")
print(f" Chunk: {doc_data['chunk_index']}")
print(f" Content: {doc_data['content'][:200]}...")
print()
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python search_docs.py 'your search query'")
sys.exit(1)
query = ' '.join(sys.argv[1:])
search_docs(query)