Faulkner DB

faulkner-db
ingestion

analyze_benchmark.py•3.28 KiB

#!/usr/bin/env python3 """Analyze benchmark results and recommend optimal strategy.""" print("\n" + "="*70) print("BENCHMARK ANALYSIS & OPTIMIZATION INSIGHTS") print("="*70) results = { "Sequential": {"time": 33.33, "rate": 1.50, "calls": 50}, "Batched (10)": {"time": 22.61, "rate": 2.21, "calls": 5}, "Parallel (4)": {"time": 24.14, "rate": 2.07, "calls": 5}, "Filtered+Parallel": {"time": 24.23, "rate": 2.06, "calls": 5}, } print("\n📊 KEY FINDINGS:\n") # Finding 1: Batching helps speedup_batch = 33.33 / 22.61 print(f"1. Batching Effect:") print(f" Sequential: 50 calls × 0.667s = 33.33s") print(f" Batched(10): 5 calls × 4.52s = 22.61s") print(f" Speedup: {speedup_batch:.2f}x") print(f" Benefit: Reduces API calls by 90%, still gains 1.5x speed") # Finding 2: Parallelism doesn't help much here print(f"\n2. Parallelism on Small Test:") print(f" Parallel(4): 24.14s (vs 22.61s batched)") print(f" Result: -1.6% slower (overhead > benefit)") print(f" Reason: Local LLM already serializes requests") print(f" Insight: Network parallelism isn't the bottleneck") # Finding 3: Real bottleneck print(f"\n3. Real Bottleneck Analysis:") print(f" Each LLM call latency: ~4.5s per batch of 10") print(f" That's: 4.5s / 10 = 0.45s per conversation") print(f" Current Phase 2: ~0.43s per conversation observed") print(f" Match! Confirms LLM latency is the bottleneck, not networking") # Finding 4: Projection for full Phase 2 print(f"\n4. Phase 2 Projection (with optimizations):") remaining_convs = 5657 # Remaining from Phase 2 print(f" Remaining conversations: ~{remaining_convs:,}") print(f" Batch size: 20") print(f" Number of batches: {remaining_convs // 20}") print(f" Latency per batch: ~9-10 seconds (for 20 conversations)") print(f" Total time: {(remaining_convs // 20) * 9.5 / 60:.1f} hours") print(f" Current pace: ~{remaining_convs / (141/60):.1f} hours remaining") # Finding 5: Real optimization opportunity print(f"\n5. Actual Optimization Opportunities:") print(f" X Parallelism: Not effective (local LLM is serial)") print(f" O Batching: 1.5x improvement (reduce API call overhead)") print(f" O Filtering: Skip 30% low-relevance = 1.3x effective speedup") print(f" O Larger batches: 30-40 conversations per call (need testing)") print(f" O Cache common patterns: Skip duplicate extractions") # Finding 6: Recommended approach print(f"\n6. RECOMMENDED STRATEGY:") print(f"") print(f" Option A (Safe, Proven):") print(f" - Batch size: 20 conversations per LLM call") print(f" - Filter: Skip lowest 20-30% by relevance") print(f" - No parallelism (not helpful here)") print(f" - Expected: 1.8x faster = ~8-10 hours remaining") print(f"") print(f" Option B (Aggressive, Experimental):") print(f" - Batch size: 40 conversations per LLM call") print(f" - Filter: Skip lowest 30% by relevance") print(f" - Smart cache: Skip if exact match seen before") print(f" - Expected: 2.5x faster = ~5-6 hours remaining") print(f"") print(f"\n7. NEXT STEPS:") print(f" 1. Test with larger batch sizes (20, 30, 40)") print(f" 2. Measure actual LLM response quality at scale") print(f" 3. Deploy Option A if time-sensitive, Option B if quality matters") print(f" 4. Monitor extraction rate and adjust batch size dynamically") print("\n" + "="*70)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Platano78/faulkner-db'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

analyze_benchmark.py•3.28 KiB