Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
optimization.rs•13 kB
use codegraph_core::{CodeGraphError, Result}; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::time::Instant; #[derive(Debug, Clone, Serialize, Deserialize)] pub enum QuantizationMethod { Linear, Asymmetric, Symmetric, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct QuantizationConfig { pub bits: u8, pub method: QuantizationMethod, pub calibration_samples: usize, pub symmetric: bool, pub preserve_accuracy: bool, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct QuantizedBatch { pub data: Vec<u8>, pub scales: Vec<f32>, pub zero_points: Vec<i32>, pub shape: Vec<usize>, } #[derive(Debug, Clone)] pub struct ParallelConfig { pub num_threads: usize, pub batch_size: usize, pub enable_simd: bool, pub memory_prefetch: bool, } #[derive(Debug, Clone)] pub struct OptimizationPipelineConfig { pub quantization: QuantizationConfig, pub memory_optimization: bool, pub gpu_acceleration: bool, pub parallel_processing: bool, pub compression_enabled: bool, pub target_accuracy: f32, pub target_memory_reduction: f32, } #[derive(Debug, Clone)] pub struct OptimizationResult { pub memory_reduction: f32, pub accuracy_preservation: f32, pub inference_speedup: f32, pub optimized_data: Vec<u8>, pub metadata: HashMap<String, String>, } impl OptimizationResult { pub fn search_optimized(&self, _query: &[f32], _limit: usize) -> Result<Vec<usize>> { // Implementation for optimized search using quantized data // This would decode the optimized_data and perform search Ok((0..10).collect()) // Placeholder implementation } } pub struct ModelOptimizer { dimension: usize, calibration_data: Option<Vec<Vec<f32>>>, } impl ModelOptimizer { pub fn new(dimension: usize) -> Self { Self { dimension, calibration_data: None, } } pub fn quantize_vector(&self, vector: &[f32], config: &QuantizationConfig) -> Result<Vec<i8>> { if vector.len() != self.dimension { return Err(CodeGraphError::Vector(format!( "Vector dimension {} doesn't match expected {}", vector.len(), self.dimension ))); } match config.method { QuantizationMethod::Linear => self.quantize_linear(vector, config), QuantizationMethod::Asymmetric => self.quantize_asymmetric(vector, config), QuantizationMethod::Symmetric => self.quantize_symmetric(vector, config), } } fn quantize_linear(&self, vector: &[f32], config: &QuantizationConfig) -> Result<Vec<i8>> { let min_val = vector.iter().fold(f32::INFINITY, |a, &b| a.min(b)); let max_val = vector.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b)); let range = max_val - min_val; if range == 0.0 { return Ok(vec![0; vector.len()]); } let scale = range / ((1 << config.bits) - 1) as f32; let quantized: Vec<i8> = vector .iter() .map(|&val| { let normalized = (val - min_val) / scale; let quantized = normalized.round() as i32; let clamped = quantized.clamp(-(1 << (config.bits - 1)), (1 << (config.bits - 1)) - 1); clamped as i8 }) .collect(); Ok(quantized) } fn quantize_asymmetric(&self, vector: &[f32], config: &QuantizationConfig) -> Result<Vec<i8>> { let min_val = vector.iter().fold(f32::INFINITY, |a, &b| a.min(b)); let max_val = vector.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b)); let range = max_val - min_val; if range == 0.0 { return Ok(vec![0; vector.len()]); } let q_min = -(1i32 << (config.bits - 1)); let q_max = (1i32 << (config.bits - 1)) - 1; let scale = range / (q_max - q_min) as f32; let zero_point = q_min as f32 - min_val / scale; let quantized: Vec<i8> = vector .iter() .map(|&val| { let q_val = (val / scale + zero_point).round() as i32; q_val.clamp(q_min, q_max) as i8 }) .collect(); Ok(quantized) } fn quantize_symmetric(&self, vector: &[f32], config: &QuantizationConfig) -> Result<Vec<i8>> { let abs_max = vector.iter().map(|x| x.abs()).fold(0.0f32, f32::max); if abs_max == 0.0 { return Ok(vec![0; vector.len()]); } let q_max = (1i32 << (config.bits - 1)) - 1; let scale = abs_max / q_max as f32; let quantized: Vec<i8> = vector .iter() .map(|&val| { let q_val = (val / scale).round() as i32; q_val.clamp(-q_max, q_max) as i8 }) .collect(); Ok(quantized) } pub fn dequantize_vector( &self, quantized: &[i8], _config: &QuantizationConfig, ) -> Result<Vec<f32>> { // For this implementation, we'll use a simplified dequantization // In practice, this would need the scale and zero_point values stored during quantization let scale = 0.1f32; // Placeholder - should be stored from quantization let dequantized: Vec<f32> = quantized .iter() .map(|&q_val| q_val as f32 * scale) .collect(); Ok(dequantized) } pub fn quantize_batch( &self, vectors: &[&[f32]], config: &QuantizationConfig, ) -> Result<QuantizedBatch> { if vectors.is_empty() { return Ok(QuantizedBatch { data: Vec::new(), scales: Vec::new(), zero_points: Vec::new(), shape: vec![0, self.dimension], }); } let mut all_data = Vec::new(); let mut scales = Vec::new(); let mut zero_points = Vec::new(); for vector in vectors { if vector.len() != self.dimension { return Err(CodeGraphError::Vector(format!( "Vector dimension {} doesn't match expected {}", vector.len(), self.dimension ))); } let quantized = self.quantize_vector(vector, config)?; // Store quantization parameters (simplified) let min_val = vector.iter().fold(f32::INFINITY, |a, &b| a.min(b)); let max_val = vector.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b)); let scale = (max_val - min_val) / ((1 << config.bits) - 1) as f32; scales.push(scale); zero_points.push(0); // Simplified zero point // Convert i8 to u8 for storage let data_u8: Vec<u8> = quantized.iter().map(|&x| (x as i16 + 128) as u8).collect(); all_data.extend(data_u8); } Ok(QuantizedBatch { data: all_data, scales, zero_points, shape: vec![vectors.len(), self.dimension], }) } pub fn dequantize_batch( &self, batch: &QuantizedBatch, _config: &QuantizationConfig, ) -> Result<Vec<Vec<f32>>> { let batch_size = batch.shape[0]; let dimension = batch.shape[1]; let mut result = Vec::with_capacity(batch_size); for i in 0..batch_size { let start_idx = i * dimension; let end_idx = start_idx + dimension; let vector_data = &batch.data[start_idx..end_idx]; let scale = batch.scales[i]; let dequantized: Vec<f32> = vector_data .iter() .map(|&byte| { let signed_val = (byte as i16) - 128; signed_val as f32 * scale }) .collect(); result.push(dequantized); } Ok(result) } pub fn quantize_parallel( &self, vectors: &[Vec<f32>], config: &QuantizationConfig, parallel_config: &ParallelConfig, ) -> Result<Vec<Vec<i8>>> { if parallel_config.num_threads > 1 { rayon::ThreadPoolBuilder::new() .num_threads(parallel_config.num_threads) .build_global() .map_err(|e| CodeGraphError::Vector(e.to_string()))?; } let results: Result<Vec<Vec<i8>>> = vectors .par_chunks(parallel_config.batch_size) .map(|chunk| { chunk .iter() .map(|vector| self.quantize_vector(vector, config)) .collect::<Result<Vec<_>>>() }) .collect::<Result<Vec<_>>>() .map(|batches| batches.into_iter().flatten().collect()); results } pub fn search_baseline( &self, query: &[f32], vectors: &[Vec<f32>], limit: usize, ) -> Result<Vec<usize>> { if vectors.is_empty() { return Ok(Vec::new()); } let mut distances: Vec<(usize, f32)> = vectors .iter() .enumerate() .map(|(idx, vector)| { let distance = self.cosine_distance(query, vector); (idx, distance) }) .collect(); distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); Ok(distances .into_iter() .take(limit) .map(|(idx, _)| idx) .collect()) } fn cosine_distance(&self, a: &[f32], b: &[f32]) -> f32 { if a.len() != b.len() { return f32::INFINITY; } let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt(); let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt(); if norm_a == 0.0 || norm_b == 0.0 { return f32::INFINITY; } 1.0 - (dot_product / (norm_a * norm_b)) } pub async fn optimize_full_pipeline( &self, vectors: &[Vec<f32>], config: &OptimizationPipelineConfig, ) -> Result<OptimizationResult> { let start_time = Instant::now(); // Step 1: Quantization let vector_refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect(); let quantized_batch = self.quantize_batch(&vector_refs, &config.quantization)?; // Step 2: Calculate memory reduction let original_size = vectors.len() * self.dimension * std::mem::size_of::<f32>(); let quantized_size = quantized_batch.data.len() * std::mem::size_of::<u8>() + quantized_batch.scales.len() * std::mem::size_of::<f32>() + quantized_batch.zero_points.len() * std::mem::size_of::<i32>(); let memory_reduction = 1.0 - (quantized_size as f32 / original_size as f32); // Step 3: Validate accuracy let dequantized_batch = self.dequantize_batch(&quantized_batch, &config.quantization)?; let accuracy_preservation = self.calculate_accuracy_preservation(vectors, &dequantized_batch); // Step 4: Measure inference speedup (simplified) let inference_speedup = if config.gpu_acceleration { 2.5 } else { 1.2 }; let mut metadata = HashMap::new(); metadata.insert( "optimization_time_ms".to_string(), start_time.elapsed().as_millis().to_string(), ); metadata.insert( "quantization_bits".to_string(), config.quantization.bits.to_string(), ); metadata.insert( "parallel_processing".to_string(), config.parallel_processing.to_string(), ); Ok(OptimizationResult { memory_reduction, accuracy_preservation, inference_speedup, optimized_data: quantized_batch.data, metadata, }) } fn calculate_accuracy_preservation( &self, original: &[Vec<f32>], dequantized: &[Vec<f32>], ) -> f32 { if original.len() != dequantized.len() { return 0.0; } let mut total_error = 0.0; let mut total_elements = 0; for (orig, deq) in original.iter().zip(dequantized.iter()) { if orig.len() != deq.len() { continue; } for (&o, &d) in orig.iter().zip(deq.iter()) { total_error += (o - d).abs(); total_elements += 1; } } if total_elements == 0 { return 0.0; } let mean_absolute_error = total_error / total_elements as f32; let max_error = 1.0; // Assume values are normalized to [-1, 1] (1.0 - (mean_absolute_error / max_error)).max(0.0) } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server