quick_quality_test.pyโข8.13 kB
"""
Quick Search Quality Test - Easy to run and interpret
"""
import json
import time
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.core.search_engine_claude import SearchEngine
def quick_search_test(query="AI Ethics and Safety openings fit for a PhD in Computer Science"):
"""
Run a quick test to evaluate search quality and duplicate detection.
Args:
query: Search query to test
"""
print("๐ QUICK SEARCH QUALITY TEST")
print("="*60)
print(f"Query: {query}")
print("="*60)
search_engine = SearchEngine()
# Test 1: First search (baseline)
print("\n๐ TEST 1: Initial Search")
print("-" * 40)
start_time = time.time()
raw_response1 = search_engine.execute_search_task(query)
items1 = search_engine.parse_search_results(raw_response1)
time1 = time.time() - start_time
print(f"โ Found {len(items1)} items in {time1:.2f} seconds")
print(f"โ Raw response length: {len(raw_response1)} characters")
# Show sample results
print(f"\n๐ Sample Results (showing first 5):")
for i, item in enumerate(items1[:5], 1):
name = item.get('name', 'No name')[:60]
source = item.get('source', 'No source')[:20]
location = item.get('location', 'No location')[:15]
has_url = "โ" if item.get('url') else "โ"
has_desc = "โ" if item.get('description') else "โ"
print(f" {i}. {name}")
print(f" Source: {source} | Location: {location}")
print(f" URL: {has_url} | Description: {has_desc}")
print()
# Test 2: Quality metrics
print("๐ TEST 2: Quality Metrics")
print("-" * 40)
items_with_urls = sum(1 for item in items1 if item.get('url'))
items_with_descriptions = sum(1 for item in items1 if item.get('description'))
items_with_locations = sum(1 for item in items1 if item.get('location'))
sources = [item.get('source', 'Unknown') for item in items1]
unique_sources = len(set(sources))
print(f"โ Items with URLs: {items_with_urls}/{len(items1)} ({items_with_urls/len(items1)*100:.1f}%)")
print(f"โ Items with descriptions: {items_with_descriptions}/{len(items1)} ({items_with_descriptions/len(items1)*100:.1f}%)")
print(f"โ Items with locations: {items_with_locations}/{len(items1)} ({items_with_locations/len(items1)*100:.1f}%)")
print(f"โ Unique sources: {unique_sources}")
# Show top sources
from collections import Counter
source_counts = Counter(sources)
print(f"\n๐ Top Sources:")
for source, count in source_counts.most_common(5):
print(f" โข {source}: {count} items")
# Test 3: Second search for duplicate detection
print(f"\n๐ TEST 3: Duplicate Detection")
print("-" * 40)
print("Running second search to test duplicate detection...")
start_time = time.time()
raw_response2 = search_engine.execute_search_task(query)
items2 = search_engine.parse_search_results(raw_response2)
time2 = time.time() - start_time
print(f"โ Second search found {len(items2)} items in {time2:.2f} seconds")
# Compare results
new_items = search_engine.find_new_items(items2, items1)
print(f"โ AI detected {len(new_items)} genuinely NEW items")
print(f"โ Duplicate detection rate: {((len(items2) - len(new_items))/len(items2)*100):.1f}%")
# Show what was detected as new
if new_items:
print(f"\n๐ Items detected as NEW:")
for i, item in enumerate(new_items[:3], 1):
name = item.get('name', 'No name')[:50]
source = item.get('source', 'No source')[:20]
print(f" {i}. {name} (from {source})")
if len(new_items) > 3:
print(f" ... and {len(new_items) - 3} more")
else:
print(f"๐ฏ Perfect! No new items detected (all were duplicates)")
# Test 4: Performance analysis
print(f"\nโก TEST 4: Performance Analysis")
print("-" * 40)
avg_time = (time1 + time2) / 2
time_consistency = abs(time1 - time2) / avg_time * 100
print(f"โ Average search time: {avg_time:.2f} seconds")
print(f"โ Time consistency: {time_consistency:.1f}% variation")
print(f"โ Items/second: {len(items1)/time1:.1f} (first run)")
# Overall assessment
print(f"\n๐ฏ OVERALL ASSESSMENT")
print("=" * 60)
score = 0
max_score = 5
# Scoring criteria
if len(items1) >= 5:
score += 1
print("โ
Item quantity: Good (5+ items found)")
else:
print("โ ๏ธ Item quantity: Low (<5 items found)")
if items_with_urls/len(items1) >= 0.8:
score += 1
print("โ
URL completeness: Excellent (80%+ have URLs)")
elif items_with_urls/len(items1) >= 0.5:
print("โ ๏ธ URL completeness: Moderate (50-80% have URLs)")
else:
print("โ URL completeness: Poor (<50% have URLs)")
if unique_sources >= 3:
score += 1
print("โ
Source diversity: Good (3+ unique sources)")
else:
print("โ ๏ธ Source diversity: Limited (<3 unique sources)")
if len(new_items) <= len(items2) * 0.5: # Less than 50% detected as new
score += 1
print("โ
Duplicate detection: Working well")
else:
print("โ ๏ธ Duplicate detection: May have false positives")
if avg_time <= 15:
score += 1
print("โ
Performance: Good (<15 seconds)")
else:
print("โ ๏ธ Performance: Slow (>15 seconds)")
print(f"\n๐ FINAL SCORE: {score}/{max_score}")
if score >= 4:
print("๐ Excellent! Search quality is high")
elif score >= 3:
print("๐ Good! Minor improvements possible")
elif score >= 2:
print("โ ๏ธ Fair! Some issues need attention")
else:
print("โ Poor! Significant improvements needed")
return {
'items_found': len(items1),
'quality_score': score,
'avg_search_time': avg_time,
'duplicate_detection_rate': (len(items2) - len(new_items))/len(items2)*100,
'new_items': len(new_items)
}
def run_multiple_queries():
"""Test multiple different types of queries."""
test_queries = [
"AI Ethics and Safety openings fit for a PhD in Computer Science",
"Latest iPhone release news and specifications",
"Remote Python developer jobs at tech companies"
]
print("๐ MULTI-QUERY SEARCH QUALITY TEST")
print("=" * 80)
results = []
for i, query in enumerate(test_queries, 1):
print(f"\n\n{'='*20} QUERY {i}/{len(test_queries)} {'='*20}")
result = quick_search_test(query)
result['query'] = query
results.append(result)
print(f"\nWaiting 10 seconds before next query...")
time.sleep(10)
# Summary
print(f"\n\n๐ MULTI-QUERY SUMMARY")
print("=" * 80)
avg_score = sum(r['quality_score'] for r in results) / len(results)
avg_time = sum(r['avg_search_time'] for r in results) / len(results)
avg_items = sum(r['items_found'] for r in results) / len(results)
print(f"Average quality score: {avg_score:.1f}/5")
print(f"Average search time: {avg_time:.1f} seconds")
print(f"Average items found: {avg_items:.1f}")
print(f"\n๐ Query Performance:")
for i, result in enumerate(results, 1):
query_short = result['query'][:40] + "..."
print(f"{i}. {query_short}")
print(f" Score: {result['quality_score']}/5 | Time: {result['avg_search_time']:.1f}s | Items: {result['items_found']}")
return results
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
if sys.argv[1] == "--multi":
run_multiple_queries()
else:
# Custom query
custom_query = " ".join(sys.argv[1:])
quick_search_test(custom_query)
else:
# Default single test
quick_search_test()