Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
pipeline.rs•16.4 kB
use crate::visitor::{AstToGraphConverter, CodeEntity, SemanticRelationship}; use crate::edge::CodeEdge; use codegraph_core::{CodeNode, Language, NodeId, EdgeType}; use tree_sitter::{Parser, Tree}; use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::sync::Arc; use dashmap::DashMap; type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; #[derive(Debug, Clone)] pub struct CrossFileReference { pub from_file: PathBuf, pub to_file: PathBuf, pub from_symbol: String, pub to_symbol: String, pub reference_type: EdgeType, } pub struct ConversionPipeline { parsers: HashMap<Language, Parser>, global_symbol_table: Arc<DashMap<String, (NodeId, PathBuf)>>, file_entities: HashMap<PathBuf, Vec<CodeEntity>>, cross_file_refs: Vec<CrossFileReference>, processed_files: HashSet<PathBuf>, } impl ConversionPipeline { pub fn new() -> Result<Self> { let mut parsers = HashMap::new(); // Rust if let Ok(mut parser) = Parser::new() { let language = tree_sitter_rust::language(); parser .set_language(language) .map_err(|e| format!("Failed to set Rust language: {:?}", e))?; parsers.insert(Language::Rust, parser); } // TypeScript if let Ok(mut parser) = Parser::new() { let language = tree_sitter_typescript::language_typescript(); parser .set_language(language) .map_err(|e| format!("Failed to set TypeScript language: {:?}", e))?; parsers.insert(Language::TypeScript, parser); } // JavaScript if let Ok(mut parser) = Parser::new() { let language = tree_sitter_javascript::language(); parser .set_language(language) .map_err(|e| format!("Failed to set JavaScript language: {:?}", e))?; parsers.insert(Language::JavaScript, parser); } // Python if let Ok(mut parser) = Parser::new() { let language = tree_sitter_python::language(); parser .set_language(language) .map_err(|e| format!("Failed to set Python language: {:?}", e))?; parsers.insert(Language::Python, parser); } // Go if let Ok(mut parser) = Parser::new() { let language = tree_sitter_go::language(); parser .set_language(language) .map_err(|e| format!("Failed to set Go language: {:?}", e))?; parsers.insert(Language::Go, parser); } // Java if let Ok(mut parser) = Parser::new() { let language = tree_sitter_java::language(); parser .set_language(language) .map_err(|e| format!("Failed to set Java language: {:?}", e))?; parsers.insert(Language::Java, parser); } // C++ if let Ok(mut parser) = Parser::new() { let language = tree_sitter_cpp::language(); parser .set_language(language) .map_err(|e| format!("Failed to set C++ language: {:?}", e))?; parsers.insert(Language::Cpp, parser); } Ok(Self { parsers, global_symbol_table: Arc::new(DashMap::new()), file_entities: HashMap::new(), cross_file_refs: Vec::new(), processed_files: HashSet::new(), }) } pub fn process_file(&mut self, file_path: &Path, source: String) -> Result<ConversionResult> { let language = self.detect_language(file_path)?; let file_path_buf = file_path.to_path_buf(); if self.processed_files.contains(&file_path_buf) { return Err(format!("File already processed: {}", file_path.display()).into()); } // Choose parser; use TSX grammar for .tsx files let is_tsx = file_path .extension() .and_then(|s| s.to_str()) .map(|ext| ext.eq_ignore_ascii_case("tsx")) .unwrap_or(false); let mut owned_tsx: Option<Parser> = None; if matches!(language, Language::TypeScript) && is_tsx { let mut p = Parser::new()?; p.set_language(tree_sitter_typescript::language_tsx()) .map_err(|e| format!("Failed to set TSX language: {:?}", e))?; owned_tsx = Some(p); } let parser: &mut Parser = if let Some(ref mut p) = owned_tsx { p } else { self.parsers.get_mut(&language).ok_or_else(|| { format!("No parser available for language: {:?}", language) })? }; let tree = parser.parse(&source, None).ok_or_else(|| { format!("Failed to parse file: {}", file_path.display()) })?; let mut converter = AstToGraphConverter::new( language.clone(), file_path.to_string_lossy().to_string(), source, ); converter.convert(tree.root_node()).map_err(|e| { format!("Conversion failed: {}", e) })?; self.register_symbols(&converter, &file_path_buf)?; self.file_entities.insert(file_path_buf.clone(), converter.entities.clone()); self.processed_files.insert(file_path_buf); Ok(ConversionResult { nodes: converter.get_nodes(), edges: converter.get_edges(), entities: converter.entities, relationships: converter.relationships, }) } pub fn resolve_cross_file_links(&mut self) -> Result<Vec<CodeEdge>> { let mut cross_file_edges = Vec::new(); for (file_path, entities) in &self.file_entities { for entity in entities { let potential_refs = self.find_potential_cross_file_references(entity, file_path)?; for (target_symbol, target_node_id, target_file) in potential_refs { let edge_type = self.infer_cross_file_edge_type(&entity.node.content, &target_symbol); cross_file_edges.push(CodeEdge::new( entity.node.id, target_node_id, edge_type, ).with_metadata("cross_file".to_string(), "true".to_string()) .with_metadata("target_file".to_string(), target_file.to_string_lossy().to_string())); } } } Ok(cross_file_edges) } pub fn build_dependency_graph(&self) -> Result<DependencyGraph> { let mut graph = DependencyGraph::new(); for file_path in &self.processed_files { graph.add_file(file_path.clone()); } for cross_ref in &self.cross_file_refs { graph.add_dependency(cross_ref.from_file.clone(), cross_ref.to_file.clone()); } graph.compute_metrics()?; Ok(graph) } pub fn optimize_memory_usage(&mut self) { self.global_symbol_table.clear(); self.file_entities.clear(); self.cross_file_refs.clear(); } fn register_symbols(&self, converter: &AstToGraphConverter, file_path: &PathBuf) -> Result<()> { for entity in &converter.entities { let global_symbol = format!("{}::{}", file_path.to_string_lossy(), entity.qualified_name); self.global_symbol_table.insert( global_symbol.clone(), (entity.node.id, file_path.clone()), ); self.global_symbol_table.insert( entity.symbol_name.clone(), (entity.node.id, file_path.clone()), ); } Ok(()) } fn find_potential_cross_file_references( &self, entity: &CodeEntity, current_file: &PathBuf, ) -> Result<Vec<(String, NodeId, PathBuf)>> { let mut potential_refs = Vec::new(); let empty = String::new(); let content = entity.node.content.as_ref().unwrap_or(&empty); for (symbol, (node_id, file_path)) in self.global_symbol_table.iter() { if file_path != current_file && content.contains(symbol) { potential_refs.push((symbol.clone(), *node_id, file_path.clone())); } } Ok(potential_refs) } fn infer_cross_file_edge_type(&self, content: &Option<String>, symbol: &str) -> EdgeType { let empty2 = String::new(); let content = content.as_ref().unwrap_or(&empty2); if content.contains("use ") || content.contains("import ") { EdgeType::Imports } else if content.contains(&format!("{}(", symbol)) { EdgeType::Calls } else if content.contains("extends") || content.contains("implements") { EdgeType::Implements } else { EdgeType::References } } fn detect_language(&self, file_path: &Path) -> Result<Language> { match file_path.extension().and_then(|s| s.to_str()) { Some("rs") => Ok(Language::Rust), Some("ts") | Some("tsx") => Ok(Language::TypeScript), Some("js") => Ok(Language::JavaScript), Some("py") => Ok(Language::Python), Some("go") => Ok(Language::Go), Some("java") => Ok(Language::Java), Some("cpp") | Some("cc") | Some("cxx") => Ok(Language::Cpp), _ => Ok(Language::Other("unknown".to_string())), } } } #[derive(Debug)] pub struct ConversionResult { pub nodes: Vec<CodeNode>, pub edges: Vec<CodeEdge>, pub entities: Vec<CodeEntity>, pub relationships: Vec<SemanticRelationship>, } #[derive(Debug)] pub struct DependencyGraph { files: HashSet<PathBuf>, dependencies: HashMap<PathBuf, HashSet<PathBuf>>, metrics: DependencyMetrics, } #[derive(Debug, Default)] pub struct DependencyMetrics { pub total_files: usize, pub total_dependencies: usize, pub cyclic_dependencies: Vec<Vec<PathBuf>>, pub fan_out: HashMap<PathBuf, usize>, pub fan_in: HashMap<PathBuf, usize>, } impl DependencyGraph { pub fn new() -> Self { Self { files: HashSet::new(), dependencies: HashMap::new(), metrics: DependencyMetrics::default(), } } pub fn add_file(&mut self, file: PathBuf) { self.files.insert(file); } pub fn add_dependency(&mut self, from: PathBuf, to: PathBuf) { self.dependencies.entry(from).or_insert_with(HashSet::new).insert(to); } pub fn compute_metrics(&mut self) -> Result<()> { self.metrics.total_files = self.files.len(); self.metrics.total_dependencies = self.dependencies.values().map(|deps| deps.len()).sum(); for (file, deps) in &self.dependencies { self.metrics.fan_out.insert(file.clone(), deps.len()); for dep in deps { *self.metrics.fan_in.entry(dep.clone()).or_insert(0) += 1; } } self.metrics.cyclic_dependencies = self.find_cycles()?; Ok(()) } fn find_cycles(&self) -> Result<Vec<Vec<PathBuf>>> { let mut cycles = Vec::new(); let mut visited = HashSet::new(); let mut rec_stack = HashSet::new(); let mut current_path = Vec::new(); for file in &self.files { if !visited.contains(file) { self.dfs_find_cycles( file, &mut visited, &mut rec_stack, &mut current_path, &mut cycles, ); } } Ok(cycles) } fn dfs_find_cycles( &self, file: &PathBuf, visited: &mut HashSet<PathBuf>, rec_stack: &mut HashSet<PathBuf>, current_path: &mut Vec<PathBuf>, cycles: &mut Vec<Vec<PathBuf>>, ) { visited.insert(file.clone()); rec_stack.insert(file.clone()); current_path.push(file.clone()); if let Some(deps) = self.dependencies.get(file) { for dep in deps { if !visited.contains(dep) { self.dfs_find_cycles(dep, visited, rec_stack, current_path, cycles); } else if rec_stack.contains(dep) { if let Some(start_idx) = current_path.iter().position(|f| f == dep) { let cycle = current_path[start_idx..].to_vec(); cycles.push(cycle); } } } } current_path.pop(); rec_stack.remove(file); } pub fn get_metrics(&self) -> &DependencyMetrics { &self.metrics } } pub struct ZeroCopySymbolResolver<'a> { source_bytes: &'a [u8], symbol_positions: HashMap<&'a str, Vec<(usize, usize)>>, resolved_cache: HashMap<&'a str, NodeId>, } impl<'a> ZeroCopySymbolResolver<'a> { pub fn new(source_bytes: &'a [u8]) -> Self { Self { source_bytes, symbol_positions: HashMap::new(), resolved_cache: HashMap::new(), } } pub fn index_symbols(&mut self, symbols: &[&'a str]) -> Result<()> { for symbol in symbols { let positions = self.find_symbol_positions(symbol); self.symbol_positions.insert(symbol, positions); } Ok(()) } pub fn resolve_symbol(&self, symbol: &str) -> Option<NodeId> { self.resolved_cache.get(symbol).copied() } fn find_symbol_positions(&self, symbol: &str) -> Vec<(usize, usize)> { let mut positions = Vec::new(); let symbol_bytes = symbol.as_bytes(); let mut start = 0; while let Some(pos) = self.find_next_occurrence(&self.source_bytes[start..], symbol_bytes) { let absolute_pos = start + pos; positions.push((absolute_pos, absolute_pos + symbol_bytes.len())); start = absolute_pos + symbol_bytes.len(); } positions } fn find_next_occurrence(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { haystack.windows(needle.len()).position(|window| window == needle) } } #[cfg(test)] mod tests { use super::*; use std::fs; use tempfile::TempDir; #[test] fn test_conversion_pipeline_basic() { let mut pipeline = ConversionPipeline::new().unwrap(); let rust_code = r#" use std::collections::HashMap; struct Config { name: String, } fn process_config(config: Config) -> HashMap<String, String> { let mut result = HashMap::new(); result.insert("name".to_string(), config.name); result } "#; let temp_dir = TempDir::new().unwrap(); let file_path = temp_dir.path().join("test.rs"); let result = pipeline.process_file(&file_path, rust_code.to_string()).unwrap(); assert!(!result.nodes.is_empty()); assert!(!result.edges.is_empty()); assert!(result.nodes.iter().any(|n| n.name == "Config")); assert!(result.nodes.iter().any(|n| n.name == "process_config")); } #[test] fn test_zero_copy_symbol_resolver() { let source = "fn hello() { world(); }"; let mut resolver = ZeroCopySymbolResolver::new(source.as_bytes()); resolver.index_symbols(&["hello", "world"]).unwrap(); let hello_positions = resolver.symbol_positions.get("hello").unwrap(); assert_eq!(hello_positions.len(), 1); assert_eq!(hello_positions[0], (3, 8)); // "hello" starts at position 3 } #[test] fn test_dependency_graph_cycles() { let mut graph = DependencyGraph::new(); let file_a = PathBuf::from("a.rs"); let file_b = PathBuf::from("b.rs"); let file_c = PathBuf::from("c.rs"); graph.add_file(file_a.clone()); graph.add_file(file_b.clone()); graph.add_file(file_c.clone()); graph.add_dependency(file_a.clone(), file_b.clone()); graph.add_dependency(file_b.clone(), file_c.clone()); graph.add_dependency(file_c.clone(), file_a.clone()); // Creates a cycle graph.compute_metrics().unwrap(); assert_eq!(graph.metrics.cyclic_dependencies.len(), 1); assert_eq!(graph.metrics.total_files, 3); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server