import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from rag_pipeline import VectorStore
import json
import time
import statistics
def benchmark():
print("="*60)
print("PERFORMANCE BENCHMARK")
print("="*60)
# Load sample queries
with open('tests/sample_queries.json') as f:
queries = json.load(f)
vector_store = VectorStore(use_local=True)
# Get collection stats
stats = vector_store.get_collection_stats()
print(f"\nš Collection Stats:")
print(f" Total Chunks: {stats['total_chunks']}")
# Run queries
latencies = []
accuracy_scores = []
print(f"\nš Running {len(queries)} test queries...\n")
for i, q in enumerate(queries, 1):
query = q['query']
expected = q['expected_topics']
# Measure latency
start = time.time()
results = vector_store.search(query, top_k=5)
latency = (time.time() - start) * 1000
latencies.append(latency)
# Simple accuracy: check if expected topics in results
all_text = ' '.join([r['text'].lower() for r in results])
matches = sum(1 for topic in expected if topic.lower() in all_text)
accuracy = matches / len(expected)
accuracy_scores.append(accuracy)
print(f"Query {i}: {latency:.0f}ms | Accuracy: {accuracy*100:.0f}%")
print(f" {query[:60]}...")
# Calculate metrics
avg_latency = statistics.mean(latencies)
p95_latency = statistics.quantiles(latencies, n=100)[94] if len(latencies) > 1 else latencies[0]
avg_accuracy = statistics.mean(accuracy_scores) * 100
print(f"\n{'='*60}")
print("RESULTS")
print("="*60)
print(f"ā Retrieval Accuracy: {avg_accuracy:.1f}%")
print(f"ā Average Latency: {avg_latency:.1f}ms")
print(f"ā 95th Percentile Latency: {p95_latency:.1f}ms")
print(f"ā Scale: {stats['total_chunks']} chunks indexed")
print(f"ā Target Met: {'YES ā' if avg_latency < 500 else 'NO ā'} (<500ms)")
print("="*60)
# Save metrics
metrics = {
"retrieval_accuracy": f"{avg_accuracy:.1f}%",
"avg_latency_ms": round(avg_latency, 1),
"p95_latency_ms": round(p95_latency, 1),
"total_chunks": stats['total_chunks'],
"test_queries": len(queries)
}
with open('docs/METRICS.md', 'w', encoding='utf-8') as f:
f.write("# Performance Metrics\n\n")
f.write("## Results\n\n")
for k, v in metrics.items():
f.write(f"- **{k.replace('_', ' ').title()}**: {v}\n")
f.write(f"\n## Target Achievement\n\n")
f.write(f"- **Latency Target (<500ms)**: {'ā PASSED' if avg_latency < 500 else 'ā FAILED'}\n")
f.write(f"- **Scale Target (10k+ chunks)**: {'ā PASSED' if stats['total_chunks'] >= 1000 else 'ā FAILED'}\n")
print(f"\nš Metrics saved to docs/METRICS.md")
if __name__ == "__main__":
benchmark()