Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
symbol_resolver.rs8.87 kB
// ABOUTME: LSH-based symbol resolver for fast code similarity without training // ABOUTME: Provides 100-500μs symbol resolution using locality-sensitive hashing use codegraph_core::{EdgeRelationship, EdgeType, ExtractionResult}; use lsh_rs2::prelude::*; use std::collections::HashMap; use tracing::debug; /// Fast symbol resolver using Locality-Sensitive Hashing (100-500μs per query) pub struct SymbolResolver { /// LSH index for symbol similarity (optional - lazy initialized) lsh: Option<LshMem<SignRandomProjections<f32>>>, /// Symbols in insertion order (index → symbol name) symbols: Vec<String>, /// Symbol to vector mapping for lookups symbol_vectors: HashMap<String, Vec<f32>>, /// Configuration min_similarity_threshold: f32, /// Vector dimensionality dim: usize, } impl SymbolResolver { /// Create new symbol resolver pub fn new() -> Self { Self { lsh: None, symbols: Vec::new(), symbol_vectors: HashMap::new(), min_similarity_threshold: 0.7, dim: 64, // Character-based hash dimension (lighter/faster) } } /// Index symbols from extraction result for fast lookups pub fn index_symbols(&mut self, result: &ExtractionResult) { if result.nodes.is_empty() { return; } // Collect all symbol vectors let mut vectors = Vec::new(); for node in &result.nodes { let symbol = &node.name; if symbol.is_empty() { continue; } // Create feature vector from symbol let symbol_str = symbol.to_string(); let vec = Self::symbol_to_hash(&symbol_str); self.symbol_vectors.insert(symbol_str.clone(), vec.clone()); self.symbols.push(symbol_str); vectors.push(vec); } if vectors.is_empty() { return; } // Initialize LSH index if not already done (leaner params) if self.lsh.is_none() { // LSH configuration: fewer projections/tables for speed let mut lsh = LshMem::new(3, 6, self.dim) .srp() // Signed Random Projections for cosine similarity .expect("Failed to create LSH index"); // Store all vectors let _ = lsh.store_vecs(&vectors); self.lsh = Some(lsh); } else if let Some(ref mut lsh) = self.lsh { // Add new vectors to existing index let _ = lsh.store_vecs(&vectors); } debug!("Indexed {} symbols in SymbolResolver", result.nodes.len()); } /// Resolve similar symbols for unmatched references (100-500μs per query) pub fn resolve_symbols(&self, mut result: ExtractionResult) -> ExtractionResult { // Skip resolver for very small files to avoid overhead if result.nodes.len() < 2 { return result; } let mut new_edges = Vec::new(); let max_edges_per_file = 10usize; let mut added = 0usize; // Find edges pointing to symbols that don't exist in nodes let existing_symbols: HashMap<String, _> = result .nodes .iter() .map(|n| (n.name.to_string(), n.id)) .collect(); for edge in &result.edges { // Check if target symbol exists if !existing_symbols.contains_key(&edge.to) && !edge.to.is_empty() { // Try to find similar symbols using LSH if let Some(similar) = self.find_similar_symbol(&edge.to) { // Create edge to similar symbol let mut metadata = HashMap::new(); metadata.insert("original_target".to_string(), edge.to.clone()); metadata.insert("resolved_target".to_string(), similar.clone()); metadata.insert( "fast_ml_enhancement".to_string(), "lsh_resolution".to_string(), ); new_edges.push(EdgeRelationship { from: edge.from, to: similar, edge_type: EdgeType::Uses, metadata, span: None, }); added += 1; if added >= max_edges_per_file { break; } } } } let enhancement_count = new_edges.len(); if enhancement_count > 0 { debug!( "⚡ SymbolResolver: Resolved {} symbols using LSH", enhancement_count ); result.edges.extend(new_edges); } result } /// Find similar symbol using LSH (100-500μs) fn find_similar_symbol(&self, symbol: &str) -> Option<String> { let lsh = self.lsh.as_ref()?; let query_vec = Self::symbol_to_hash(symbol); // Query LSH index for similar symbols (returns Result<Vec<&Vec<f32>>>) let candidate_vectors = lsh.query_bucket(&query_vec).ok()?; if candidate_vectors.is_empty() { return None; } // Match candidate vectors back to symbols // Since we can't directly get indices, we need to search for matching vectors let mut best_match = None; let mut best_score = 0.0; for candidate_vec in candidate_vectors { // Find which symbol this vector belongs to for (sym, vec) in &self.symbol_vectors { // Compare vectors (approximate match due to floating point) if vec.len() == candidate_vec.len() && vec .iter() .zip(candidate_vec.iter()) .all(|(a, b)| (a - b).abs() < 0.001) { let score = Self::string_similarity(symbol, sym); if score > best_score && score >= self.min_similarity_threshold { best_score = score; best_match = Some(sym.clone()); } break; } } } best_match } /// Convert symbol to hash vector for LSH fn symbol_to_hash(symbol: &str) -> Vec<f32> { // Simple character-based hashing (can be enhanced with better features) let mut hash = vec![0.0; 128]; for (i, c) in symbol.chars().enumerate() { let idx = (c as usize + i) % 128; hash[idx] += 1.0; } // Normalize let sum: f32 = hash.iter().sum(); if sum > 0.0 { for val in &mut hash { *val /= sum; } } hash } /// Calculate string similarity (simple but fast) fn string_similarity(s1: &str, s2: &str) -> f32 { if s1 == s2 { return 1.0; } let s1_lower = s1.to_lowercase(); let s2_lower = s2.to_lowercase(); if s1_lower == s2_lower { return 0.95; } // Check containment if s1_lower.contains(&s2_lower) || s2_lower.contains(&s1_lower) { return 0.8; } // Simple character overlap let chars1: std::collections::HashSet<char> = s1_lower.chars().collect(); let chars2: std::collections::HashSet<char> = s2_lower.chars().collect(); let intersection = chars1.intersection(&chars2).count(); let union = chars1.union(&chars2).count(); if union == 0 { 0.0 } else { intersection as f32 / union as f32 } } } impl Default for SymbolResolver { fn default() -> Self { Self::new() } } #[cfg(test)] mod tests { use super::*; use codegraph_core::{CodeNode, NodeId}; #[test] fn test_symbol_similarity() { let sim1 = SymbolResolver::string_similarity("HashMap", "HashMap"); assert_eq!(sim1, 1.0); let sim2 = SymbolResolver::string_similarity("HashMap", "hashmap"); assert!(sim2 > 0.9); let sim3 = SymbolResolver::string_similarity("HashMap", "HashSet"); assert!(sim3 > 0.5); } #[test] fn test_symbol_resolution() { let mut resolver = SymbolResolver::new(); let result = ExtractionResult { nodes: vec![CodeNode { id: NodeId::new_v4(), name: "HashMap".to_string(), ..Default::default() }], edges: vec![], }; resolver.index_symbols(&result); // Should be able to resolve similar symbols let similar = resolver.find_similar_symbol("hashmap"); assert!(similar.is_some()); } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server