comprehensive_performance_suite.rsâĒ20.8 kB
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId, Throughput};
use std::time::Duration;
use tokio::runtime::Runtime;
// Import CodeGraph components for benchmarking
// Note: These imports will need to be adjusted based on actual module structure
use codegraph_core::*;
use codegraph_vector::*;
use codegraph_cache::*;
// use codegraph_graph::*;
use codegraph_parser::*;
/// Comprehensive performance benchmark suite for CodeGraph
/// Targets 50% performance improvements across all components
/// Performance target constants - 50% improvement from baseline
const TARGET_VECTOR_SEARCH_LATENCY_US: u64 = 500; // Sub-millisecond target
const TARGET_GRAPH_QUERY_LATENCY_MS: u64 = 25; // 50% of 50ms baseline
const TARGET_CACHE_LATENCY_US: u64 = 100; // Cache operation target
const TARGET_PARSER_THROUGHPUT_MULTIPLIER: f64 = 1.5; // 50% throughput increase
/// Vector search performance benchmarks
fn bench_vector_search_performance(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("vector_search_performance");
group.measurement_time(Duration::from_secs(30));
group.warm_up_time(Duration::from_secs(5));
group.sample_size(200);
// Test different vector dimensions and dataset sizes
let dimensions = [128, 384, 768, 1536];
let dataset_sizes = [1000, 10000, 50000, 100000];
for &dim in &dimensions {
for &size in &dataset_sizes {
let test_name = format!("search_dim_{}_size_{}", dim, size);
group.bench_function(&test_name, |b| {
b.to_async(&rt).iter(|| async {
// Generate test vectors
let query_vector = generate_random_vector(dim);
let dataset = generate_vector_dataset(size, dim);
// Create optimized search engine
let search_engine = create_optimized_search_engine(dim, size).await;
// Perform search - this should be < 500Ξs for 50% improvement
let start = std::time::Instant::now();
let _results = search_engine.search_knn(&query_vector, 10).await;
let duration = start.elapsed();
// Assert performance target
if duration.as_micros() > TARGET_VECTOR_SEARCH_LATENCY_US as u128 {
eprintln!("â ïļ Vector search exceeded target: {}Ξs > {}Ξs",
duration.as_micros(), TARGET_VECTOR_SEARCH_LATENCY_US);
}
black_box(duration)
});
});
}
}
group.finish();
}
/// Graph query performance benchmarks
fn bench_graph_query_performance(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("graph_query_performance");
group.measurement_time(Duration::from_secs(30));
group.sample_size(100);
// Test different graph sizes and query complexities
let graph_sizes = [(1000, 2000), (10000, 20000), (100000, 200000)]; // (nodes, edges)
let query_types = ["simple_lookup", "neighbor_traversal", "shortest_path", "complex_query"];
for &(nodes, edges) in &graph_sizes {
for query_type in &query_types {
let test_name = format!("graph_{}_{}_nodes_{}_edges", query_type, nodes, edges);
group.bench_function(&test_name, |b| {
b.to_async(&rt).iter(|| async {
// Create test graph
let graph = create_test_graph(nodes, edges).await;
// Execute query based on type
let start = std::time::Instant::now();
let _result = match *query_type {
"simple_lookup" => graph.get_node(black_box(nodes / 2)).await,
"neighbor_traversal" => graph.get_neighbors(black_box(nodes / 2)).await,
"shortest_path" => graph.shortest_path(black_box(0), black_box(nodes - 1)).await,
"complex_query" => graph.complex_query(create_complex_query()).await,
_ => unreachable!(),
};
let duration = start.elapsed();
// Assert performance target (25ms for 50% improvement)
if duration.as_millis() > TARGET_GRAPH_QUERY_LATENCY_MS as u128 {
eprintln!("â ïļ Graph query exceeded target: {}ms > {}ms",
duration.as_millis(), TARGET_GRAPH_QUERY_LATENCY_MS);
}
black_box(duration)
});
});
}
}
group.finish();
}
/// Cache performance benchmarks
fn bench_cache_performance(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("cache_performance");
group.measurement_time(Duration::from_secs(20));
group.sample_size(500);
// Test different cache scenarios
let cache_sizes = [1000, 10000, 100000];
let operations = ["get_hit", "get_miss", "put", "eviction"];
for &cache_size in &cache_sizes {
for operation in &operations {
let test_name = format!("cache_{}_size_{}", operation, cache_size);
group.bench_function(&test_name, |b| {
b.to_async(&rt).iter(|| async {
let cache = create_test_cache(cache_size).await;
let start = std::time::Instant::now();
let _result = match *operation {
"get_hit" => cache.get(&black_box("existing_key")).await,
"get_miss" => cache.get(&black_box("non_existing_key")).await,
"put" => cache.put(black_box("new_key"), black_box("value")).await,
"eviction" => cache.put_with_eviction(black_box("evict_key"), black_box("value")).await,
_ => unreachable!(),
};
let duration = start.elapsed();
// Assert performance target (100Ξs for cache operations)
if duration.as_micros() > TARGET_CACHE_LATENCY_US as u128 {
eprintln!("â ïļ Cache operation exceeded target: {}Ξs > {}Ξs",
duration.as_micros(), TARGET_CACHE_LATENCY_US);
}
black_box(duration)
});
});
}
}
group.finish();
}
/// Parser performance benchmarks
fn bench_parser_performance(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("parser_performance");
group.measurement_time(Duration::from_secs(30));
group.sample_size(100);
// Test different file sizes and languages
let file_sizes = [1024, 10240, 102400, 1024000]; // bytes
let languages = ["rust", "python", "javascript", "typescript"];
for &file_size in &file_sizes {
for language in &languages {
let test_name = format!("parse_{}_{}kb", language, file_size / 1024);
group.throughput(Throughput::Bytes(file_size as u64));
group.bench_function(&test_name, |b| {
b.to_async(&rt).iter(|| async {
let source_code = generate_source_code(*language, file_size);
let parser = create_optimized_parser(*language).await;
let start = std::time::Instant::now();
let _ast = parser.parse(&source_code).await;
let duration = start.elapsed();
// Calculate throughput and check for 50% improvement
let throughput = file_size as f64 / duration.as_secs_f64();
let baseline_throughput = get_baseline_parser_throughput(*language, file_size);
let improvement_ratio = throughput / baseline_throughput;
if improvement_ratio < TARGET_PARSER_THROUGHPUT_MULTIPLIER {
eprintln!("â ïļ Parser throughput below target: {:.2}x < {:.2}x improvement",
improvement_ratio, TARGET_PARSER_THROUGHPUT_MULTIPLIER);
}
black_box(duration)
});
});
}
}
group.finish();
}
/// Memory efficiency benchmarks
fn bench_memory_efficiency(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("memory_efficiency");
group.measurement_time(Duration::from_secs(30));
group.sample_size(50);
// Test memory usage patterns
let operations = ["allocation_pattern", "deallocation_pattern", "memory_pool_usage", "zero_copy_ops"];
let data_sizes = [1024, 10240, 102400, 1024000];
for operation in &operations {
for &data_size in &data_sizes {
let test_name = format!("memory_{}_{}_bytes", operation, data_size);
group.bench_function(&test_name, |b| {
b.to_async(&rt).iter(|| async {
let initial_memory = get_memory_usage();
let start = std::time::Instant::now();
match *operation {
"allocation_pattern" => {
let _data = allocate_test_data(data_size).await;
// Memory should be efficiently allocated
},
"deallocation_pattern" => {
let data = allocate_test_data(data_size).await;
drop(data);
// Memory should be quickly reclaimed
},
"memory_pool_usage" => {
let _data = allocate_from_pool(data_size).await;
// Pool allocation should be faster
},
"zero_copy_ops" => {
let _result = perform_zero_copy_operation(data_size).await;
// Should avoid unnecessary copies
},
_ => unreachable!(),
}
let duration = start.elapsed();
let final_memory = get_memory_usage();
let memory_used = final_memory.saturating_sub(initial_memory);
// Check for 50% memory usage reduction
let baseline_memory = get_baseline_memory_usage(*operation, data_size);
if memory_used > baseline_memory / 2 {
eprintln!("â ïļ Memory usage above 50% reduction target: {} > {}",
memory_used, baseline_memory / 2);
}
black_box((duration, memory_used))
});
});
}
}
group.finish();
}
/// Concurrent performance benchmarks
fn bench_concurrent_performance(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("concurrent_performance");
group.measurement_time(Duration::from_secs(30));
group.sample_size(100);
// Test different concurrency levels
let thread_counts = [1, 2, 4, 8, 16];
let operations = ["concurrent_reads", "concurrent_writes", "mixed_operations"];
for &thread_count in &thread_counts {
for operation in &operations {
let test_name = format!("concurrent_{}_{}_threads", operation, thread_count);
group.bench_function(&test_name, |b| {
b.to_async(&rt).iter(|| async {
let start = std::time::Instant::now();
// Spawn concurrent operations
let handles: Vec<_> = (0..thread_count).map(|i| {
tokio::spawn(async move {
match *operation {
"concurrent_reads" => perform_concurrent_read_operation(i).await,
"concurrent_writes" => perform_concurrent_write_operation(i).await,
"mixed_operations" => perform_mixed_operation(i).await,
_ => unreachable!(),
}
})
}).collect();
// Wait for all operations to complete
for handle in handles {
let _ = handle.await;
}
let duration = start.elapsed();
// Check for improved concurrent performance
let baseline_duration = get_baseline_concurrent_duration(*operation, thread_count);
let improvement_ratio = baseline_duration.as_secs_f64() / duration.as_secs_f64();
if improvement_ratio < TARGET_PARSER_THROUGHPUT_MULTIPLIER {
eprintln!("â ïļ Concurrent performance below target: {:.2}x < {:.2}x improvement",
improvement_ratio, TARGET_PARSER_THROUGHPUT_MULTIPLIER);
}
black_box(duration)
});
});
}
}
group.finish();
}
/// End-to-end performance benchmarks
fn bench_e2e_performance(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("e2e_performance");
group.measurement_time(Duration::from_secs(60));
group.sample_size(50);
// Test complete workflows
let workflows = [
"code_analysis_pipeline",
"vector_indexing_pipeline",
"graph_construction_pipeline",
"query_processing_pipeline"
];
for workflow in &workflows {
group.bench_function(*workflow, |b| {
b.to_async(&rt).iter(|| async {
let start = std::time::Instant::now();
let _result = match *workflow {
"code_analysis_pipeline" => execute_code_analysis_pipeline().await,
"vector_indexing_pipeline" => execute_vector_indexing_pipeline().await,
"graph_construction_pipeline" => execute_graph_construction_pipeline().await,
"query_processing_pipeline" => execute_query_processing_pipeline().await,
_ => unreachable!(),
};
let duration = start.elapsed();
// Check overall pipeline performance for 50% improvement
let baseline_duration = get_baseline_pipeline_duration(*workflow);
let improvement_ratio = baseline_duration.as_secs_f64() / duration.as_secs_f64();
if improvement_ratio < TARGET_PARSER_THROUGHPUT_MULTIPLIER {
eprintln!("â ïļ Pipeline performance below target: {:.2}x < {:.2}x improvement",
improvement_ratio, TARGET_PARSER_THROUGHPUT_MULTIPLIER);
}
black_box(duration)
});
});
}
group.finish();
}
// Helper functions for benchmark setup (these would need actual implementations)
fn generate_random_vector(dim: usize) -> Vec<f32> {
use fastrand::Rng;
let mut rng = Rng::new();
(0..dim).map(|_| rng.f32() - 0.5).collect()
}
fn generate_vector_dataset(size: usize, dim: usize) -> Vec<Vec<f32>> {
(0..size).map(|_| generate_random_vector(dim)).collect()
}
async fn create_optimized_search_engine(_dim: usize, _size: usize) -> MockSearchEngine {
// Would create actual optimized search engine
MockSearchEngine::new()
}
async fn create_test_graph(_nodes: usize, _edges: usize) -> MockGraph {
// Would create actual test graph
MockGraph::new()
}
async fn create_test_cache(_size: usize) -> MockCache {
// Would create actual test cache
MockCache::new()
}
async fn create_optimized_parser(_language: &str) -> MockParser {
// Would create actual optimized parser
MockParser::new()
}
fn generate_source_code(_language: &str, size: usize) -> String {
// Would generate realistic source code
"fn test() { }".repeat(size / 15)
}
fn get_baseline_parser_throughput(_language: &str, _file_size: usize) -> f64 {
// Would return actual baseline measurements
1000.0 // placeholder
}
fn get_memory_usage() -> usize {
// Would return actual memory usage
0 // placeholder
}
async fn allocate_test_data(_size: usize) -> Vec<u8> {
vec![0; _size]
}
async fn allocate_from_pool(_size: usize) -> Vec<u8> {
// Would use actual memory pool
vec![0; _size]
}
async fn perform_zero_copy_operation(_size: usize) -> Vec<u8> {
// Would perform actual zero-copy operation
vec![0; _size]
}
fn get_baseline_memory_usage(_operation: &str, _size: usize) -> usize {
// Would return actual baseline memory measurements
1024 // placeholder
}
async fn perform_concurrent_read_operation(_id: usize) -> usize {
// Would perform actual concurrent read
_id
}
async fn perform_concurrent_write_operation(_id: usize) -> usize {
// Would perform actual concurrent write
_id
}
async fn perform_mixed_operation(_id: usize) -> usize {
// Would perform actual mixed operation
_id
}
fn get_baseline_concurrent_duration(_operation: &str, _threads: usize) -> Duration {
// Would return actual baseline measurements
Duration::from_millis(100) // placeholder
}
async fn execute_code_analysis_pipeline() -> String {
// Would execute actual pipeline
"result".to_string()
}
async fn execute_vector_indexing_pipeline() -> String {
// Would execute actual pipeline
"result".to_string()
}
async fn execute_graph_construction_pipeline() -> String {
// Would execute actual pipeline
"result".to_string()
}
async fn execute_query_processing_pipeline() -> String {
// Would execute actual pipeline
"result".to_string()
}
fn get_baseline_pipeline_duration(_workflow: &str) -> Duration {
// Would return actual baseline measurements
Duration::from_secs(1) // placeholder
}
fn create_complex_query() -> String {
// Would create actual complex query
"complex_query".to_string()
}
// Mock types for compilation (would be replaced with actual types)
struct MockSearchEngine;
impl MockSearchEngine {
fn new() -> Self { Self }
async fn search_knn(&self, _query: &[f32], _k: usize) -> Vec<usize> { vec![] }
}
struct MockGraph;
impl MockGraph {
fn new() -> Self { Self }
async fn get_node(&self, _id: usize) -> Option<String> { None }
async fn get_neighbors(&self, _id: usize) -> Vec<usize> { vec![] }
async fn shortest_path(&self, _from: usize, _to: usize) -> Option<Vec<usize>> { None }
async fn complex_query(&self, _query: String) -> String { "result".to_string() }
}
struct MockCache;
impl MockCache {
fn new() -> Self { Self }
async fn get(&self, _key: &str) -> Option<String> { None }
async fn put(&self, _key: &str, _value: &str) -> bool { true }
async fn put_with_eviction(&self, _key: &str, _value: &str) -> bool { true }
}
struct MockParser;
impl MockParser {
fn new() -> Self { Self }
async fn parse(&self, _source: &str) -> String { "ast".to_string() }
}
// Configure Criterion for high-precision performance testing
fn performance_criterion() -> Criterion {
Criterion::default()
.sample_size(200)
.measurement_time(Duration::from_secs(30))
.warm_up_time(Duration::from_secs(5))
.significance_level(0.01)
.confidence_level(0.99)
}
criterion_group!(
name = performance_benches;
config = performance_criterion();
targets =
bench_vector_search_performance,
bench_graph_query_performance,
bench_cache_performance,
bench_parser_performance,
bench_memory_efficiency,
bench_concurrent_performance,
bench_e2e_performance
);
criterion_main!(performance_benches);