test_retriever_debug.py•2.69 kB
#!/usr/bin/env python3
"""Debug retriever issue"""
import asyncio
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from personal_rag_mcp.storage.sqlite_store import SQLiteStore
from personal_rag_mcp.storage.qdrant_store import QdrantStore
from personal_rag_mcp.utils.embeddings import EmbeddingClient
async def debug():
print("Testing retriever components...")
# Use the existing test database
sqlite_path = "/app/data/test_e2e.db"
sqlite_store = SQLiteStore(sqlite_path)
# List all documents
async with __import__('aiosqlite').connect(sqlite_path) as db:
async with db.execute("SELECT id, title FROM documents") as cursor:
docs = await cursor.fetchall()
print(f"\nDocuments in DB: {len(docs)}")
for doc_id, title in docs:
print(f" - {doc_id}: {title}")
async with db.execute("SELECT id, document_id, chunk_index FROM chunks") as cursor:
chunks = await cursor.fetchall()
print(f"\nChunks in DB: {len(chunks)}")
for chunk_id, doc_id, idx in chunks[:5]: # Show first 5
print(f" - {chunk_id} (doc: {doc_id[:8]}..., idx: {idx})")
# Try to get a document
if docs:
doc_id = docs[0][0]
print(f"\nTrying to get document: {doc_id}")
doc = await sqlite_store.get_document(doc_id)
if doc:
print(f" ✓ Got document: {doc.metadata.title}")
print(f" Full text preview: {doc.full_text[:100]}")
else:
print(f" ✗ Failed to get document")
# Try to get a chunk
if chunks:
chunk_id = chunks[0][0]
print(f"\nTrying to get chunk: {chunk_id}")
text = await sqlite_store.get_chunk_text(chunk_id)
if text:
print(f" ✓ Got chunk text: {text[:100]}")
else:
print(f" ✗ Failed to get chunk text")
# Test Qdrant search
qdrant_store = QdrantStore("http://qdrant:6333", collection_name="test_e2e")
embedding_client = EmbeddingClient("http://ollama:11434", model="nomic-embed-text")
query = "Docker Compose"
print(f"\nTesting Qdrant search for '{query}'...")
query_vector = await embedding_client.embed_text(query)
results = await qdrant_store.search(query_vector, limit=3)
print(f" Found {len(results)} Qdrant results:")
for r in results:
print(f" - ID: {r['id']}, Score: {r['score']:.4f}")
print(f" Payload doc_id: {r['payload'].get('document_id', 'N/A')[:8]}...")
print(f" Payload chunk_id: {r['payload'].get('chunk_id', 'N/A')}")
if __name__ == "__main__":
asyncio.run(debug())