Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
estimation.rs9.29 kB
// ABOUTME: Provides repository counting and embedding time estimation utilities. // ABOUTME: Shared helpers for CLI planners and indexer symbol handling logic. use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use anyhow::Result; use codegraph_core::{CodeNode, EdgeRelationship, NodeId}; use codegraph_parser::{ file_collect, file_collect::FileCollectionConfig, ParsingStatistics, TreeSitterParser, }; use futures::stream::{self, StreamExt}; use serde::Serialize; use tokio::sync::Semaphore; use tracing::{debug, info, warn}; use crate::indexer::IndexerConfig; #[derive(Debug, Clone, Serialize)] pub struct RepositoryCounts { pub total_files: usize, pub parsed_files: usize, pub failed_files: usize, pub nodes: usize, pub edges: usize, pub symbols: usize, } #[derive(Debug, Clone, Serialize)] pub struct ParsingSummary { pub total_lines: usize, pub duration_seconds: f64, pub files_per_second: f64, pub lines_per_second: f64, } impl From<&ParsingStatistics> for ParsingSummary { fn from(stats: &ParsingStatistics) -> Self { Self { total_lines: stats.total_lines, duration_seconds: stats.parsing_duration.as_secs_f64(), files_per_second: stats.files_per_second, lines_per_second: stats.lines_per_second, } } } #[derive(Debug, Clone, Serialize)] pub struct TimeEstimates { pub jina_batches: usize, pub jina_batch_size: usize, pub jina_batch_minutes: f64, pub jina_minutes: f64, pub local_minutes: Option<f64>, pub local_rate_per_minute: Option<f64>, } impl TimeEstimates { pub fn from_node_count(node_count: usize, cfg: &EmbeddingThroughputConfig) -> Self { let jina_batches = if node_count == 0 { 0 } else { (node_count + cfg.jina_batch_size - 1) / cfg.jina_batch_size }; let jina_minutes = jina_batches as f64 * cfg.jina_batch_minutes; let local_minutes = cfg.local_embeddings_per_minute.and_then(|rate| { if rate > 0.0 { Some(node_count as f64 / rate) } else { None } }); Self { jina_batches, jina_batch_size: cfg.jina_batch_size, jina_batch_minutes: cfg.jina_batch_minutes, jina_minutes, local_minutes, local_rate_per_minute: cfg.local_embeddings_per_minute, } } } #[derive(Debug, Clone)] pub struct RepositoryEstimate { pub counts: RepositoryCounts, pub parsing: ParsingSummary, pub parsing_duration: Duration, pub timings: TimeEstimates, } #[derive(Debug, Clone)] pub struct EmbeddingThroughputConfig { pub jina_batch_size: usize, pub jina_batch_minutes: f64, pub local_embeddings_per_minute: Option<f64>, } impl EmbeddingThroughputConfig { pub fn with_local_rate(mut self, rate: Option<f64>) -> Self { self.local_embeddings_per_minute = rate; self } } pub struct RepositoryEstimator { parser: TreeSitterParser, config: IndexerConfig, } impl RepositoryEstimator { pub fn new(config: IndexerConfig) -> Self { Self { parser: TreeSitterParser::new(), config, } } pub async fn analyze( &self, path: impl AsRef<Path>, throughput: &EmbeddingThroughputConfig, ) -> Result<RepositoryEstimate> { let path = path.as_ref(); let file_config: FileCollectionConfig = (&self.config).into(); let files = file_collect::collect_source_files_with_config(path, &file_config)?; let total_files = files.len() as u64; let (nodes, edges, stats) = parse_files_with_unified_extraction(&self.parser, files, total_files).await?; let symbol_map = build_symbol_index(&nodes); let counts = RepositoryCounts { total_files: stats.total_files, parsed_files: stats.parsed_files, failed_files: stats.failed_files, nodes: nodes.len(), edges: edges.len(), symbols: symbol_map.len(), }; let timings = TimeEstimates::from_node_count(nodes.len(), throughput); let parsing_duration = stats.parsing_duration; Ok(RepositoryEstimate { counts, parsing: ParsingSummary::from(&stats), parsing_duration, timings, }) } } pub fn build_symbol_index(nodes: &[CodeNode]) -> HashMap<String, NodeId> { let mut symbol_map = HashMap::with_capacity(nodes.len().saturating_mul(4)); for node in nodes { extend_symbol_index(&mut symbol_map, node); } symbol_map } pub(crate) fn extend_symbol_index(target: &mut HashMap<String, NodeId>, node: &CodeNode) { let base_name = node.name.to_string(); target.insert(base_name.clone(), node.id); if let Some(qname) = node.metadata.attributes.get("qualified_name") { target.insert(qname.clone(), node.id); } if let Some(node_type) = &node.node_type { target.insert(format!("{:?}::{}", node_type, base_name), node.id); } target.insert( format!("{}::{}", node.location.file_path, base_name), node.id, ); if let Some(short_name) = base_name.split("::").last() { target.insert(short_name.to_string(), node.id); } if let Some(method_of) = node.metadata.attributes.get("method_of") { target.insert(format!("{}::{}", method_of, base_name), node.id); } if let Some(trait_impl) = node.metadata.attributes.get("implements_trait") { target.insert(format!("{}::{}", trait_impl, base_name), node.id); } } pub(crate) async fn parse_files_with_unified_extraction( parser: &TreeSitterParser, files: Vec<(PathBuf, u64)>, total_files: u64, ) -> Result<(Vec<CodeNode>, Vec<EdgeRelationship>, ParsingStatistics)> { let mut all_nodes = Vec::new(); let mut all_edges = Vec::new(); let total_lines = 0; let mut parsed_files = 0; let mut failed_files = 0; let semaphore = Arc::new(Semaphore::new(4)); let start_time = std::time::Instant::now(); let parser_ref = parser; let mut stream = stream::iter(files.into_iter().map(|(file_path, _)| { let semaphore = semaphore.clone(); async move { let _permit = semaphore.acquire().await.unwrap(); parser_ref .parse_file_with_edges(&file_path.to_string_lossy()) .await } })) .buffer_unordered(4); while let Some(result) = stream.next().await { match result { Ok(extraction_result) => { let node_count = extraction_result.nodes.len(); let edge_count = extraction_result.edges.len(); if node_count > 0 { debug!( "🌳 AST extraction: {} nodes, {} edges from file", node_count, edge_count ); } all_nodes.extend(extraction_result.nodes); all_edges.extend(extraction_result.edges); parsed_files += 1; } Err(e) => { failed_files += 1; warn!("Failed to parse file: {}", e); } } } let parsing_duration = start_time.elapsed(); let files_per_second = if parsing_duration.as_secs_f64() > 0.0 { parsed_files as f64 / parsing_duration.as_secs_f64() } else { 0.0 }; let lines_per_second = if parsing_duration.as_secs_f64() > 0.0 { total_lines as f64 / parsing_duration.as_secs_f64() } else { 0.0 }; let stats = ParsingStatistics { total_files: total_files.try_into().unwrap_or(usize::MAX), parsed_files, failed_files, total_lines, parsing_duration, files_per_second, lines_per_second, }; info!("🌳 UNIFIED AST EXTRACTION COMPLETE:"); info!( " 📊 Files processed: {}/{} ({:.1}% success rate)", parsed_files, total_files, if total_files > 0 { parsed_files as f64 / total_files as f64 * 100.0 } else { 100.0 } ); info!( " 🌳 Semantic nodes extracted: {} (functions, structs, classes, imports, etc.)", all_nodes.len() ); info!( " 🔗 Code relationships found: {} (function calls, imports, dependencies)", all_edges.len() ); info!( " ⚡ Processing performance: {:.1} files/s | {:.0} lines/s", files_per_second, lines_per_second ); info!( " 🎯 Extraction efficiency: {:.1} nodes/file | {:.1} edges/file", if parsed_files > 0 { all_nodes.len() as f64 / parsed_files as f64 } else { 0.0 }, if parsed_files > 0 { all_edges.len() as f64 / parsed_files as f64 } else { 0.0 } ); if failed_files > 0 { warn!( " ⚠️ Parse failures: {} files failed TreeSitter analysis", failed_files ); } Ok((all_nodes, all_edges, stats)) }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server