analyze_metrics.pyβ’4.19 kB
#!/usr/bin/env python3
"""Analyze search engine selection metrics and compare with LLM responses."""
import json
import sys
from collections import defaultdict
from datetime import datetime, timedelta
from pathlib import Path
def analyze_metrics(days=7):
    """Analyze search responses vs LLM selections."""
    metrics_file = Path("src/websearch/search-metrics.jsonl")
    
    if not metrics_file.exists():
        print("No metrics file found. Start using the search to collect data.")
        return
    
    cutoff_date = datetime.now().replace(tzinfo=None) - timedelta(days=days)
    
    search_responses = []
    url_selections = []
    
    # Parse metrics file
    with open(metrics_file, 'r') as f:
        for line in f:
            try:
                data = json.loads(line.strip())
                timestamp = datetime.fromisoformat(data['timestamp'].replace('Z', '').replace('+00:00', ''))
                
                if timestamp < cutoff_date:
                    continue
                
                if data.get('event_type') == 'search_response':
                    search_responses.append(data)
                elif data.get('event_type') == 'url_selection':
                    url_selections.append(data)
                    
            except (json.JSONDecodeError, KeyError, ValueError):
                continue
    
    print(f"\nπ Search Response vs LLM Selection Analysis (Last {days} days)")
    print("=" * 60)
    
    if not search_responses and not url_selections:
        print("No data found in the specified time range.")
        return
    
    # Analyze search responses
    if search_responses:
        print(f"\nπ€ Search Responses Sent to LLM: {len(search_responses)}")
        
        total_results_sent = sum(r['total_results'] for r in search_responses)
        engine_sent = defaultdict(int)
        
        for response in search_responses:
            for engine, count in response['engine_distribution'].items():
                engine_sent[engine] += count
        
        print(f"Total results sent to LLM: {total_results_sent}")
        print("Engine distribution in responses:")
        for engine, count in sorted(engine_sent.items()):
            percentage = (count / total_results_sent * 100) if total_results_sent > 0 else 0
            print(f"  {engine.upper():12}: {count:3d} results ({percentage:5.1f}%)")
    
    # Analyze URL selections
    if url_selections:
        print(f"\nπ₯ LLM URL Selections: {len(url_selections)}")
        
        total_selections = 0
        engine_selected = defaultdict(int)
        
        for selection in url_selections:
            for sel in selection.get('selections', []):
                total_selections += 1
                engine_selected[sel['engine']] += 1
        
        print(f"Total URLs selected by LLM: {total_selections}")
        print("Engine distribution in selections:")
        for engine, count in sorted(engine_selected.items()):
            percentage = (count / total_selections * 100) if total_selections > 0 else 0
            print(f"  {engine.upper():12}: {count:3d} selections ({percentage:5.1f}%)")
    
    # Compare if we have both
    if search_responses and url_selections:
        print(f"\nπ Response vs Selection Comparison:")
        print("-" * 40)
        
        for engine in set(list(engine_sent.keys()) + list(engine_selected.keys())):
            sent = engine_sent.get(engine, 0)
            selected = engine_selected.get(engine, 0)
            
            sent_pct = (sent / total_results_sent * 100) if total_results_sent > 0 else 0
            selected_pct = (selected / total_selections * 100) if total_selections > 0 else 0
            
            print(f"{engine.upper():12}: {sent_pct:5.1f}% sent β {selected_pct:5.1f}% selected")
        
        # Selection rate
        if total_results_sent > 0:
            selection_rate = (total_selections / total_results_sent * 100)
            print(f"\nOverall selection rate: {selection_rate:.1f}% ({total_selections}/{total_results_sent})")
if __name__ == "__main__":
    days = int(sys.argv[1]) if len(sys.argv) > 1 else 7
    analyze_metrics(days)