Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
file_collect.rs7.63 kB
use codegraph_core::Result; use globset::{Glob, GlobSet, GlobSetBuilder}; use ignore::WalkBuilder; use std::collections::HashSet; use std::path::{Path, PathBuf}; use tracing::{debug, info, warn}; /// Configuration for file collection #[derive(Debug, Clone)] pub struct FileCollectionConfig { pub recursive: bool, pub languages: Vec<String>, pub include_patterns: Vec<String>, pub exclude_patterns: Vec<String>, } impl Default for FileCollectionConfig { fn default() -> Self { Self { recursive: true, languages: vec![], include_patterns: vec![], exclude_patterns: vec![], } } } /// Fast file collector with proper language and pattern filtering pub fn collect_source_files_with_config( dir: &Path, config: &FileCollectionConfig, ) -> Result<Vec<(PathBuf, u64)>> { info!("Collecting source files from: {:?}", dir); debug!( "Collection config: recursive={}, languages={:?}", config.recursive, config.languages ); let default_excludes = vec![ "**/target/**", "**/.git/**", "**/node_modules/**", "**/dist/**", "**/build/**", "**/.next/**", "**/.nuxt/**", "**/coverage/**", "**/__pycache__/**", "**/.pytest_cache/**", "**/.codegraph/**", ]; let mut combined_excludes: Vec<String> = default_excludes.iter().map(|s| s.to_string()).collect(); combined_excludes.extend(config.exclude_patterns.clone()); let exclude_set = build_globset(&combined_excludes); let include_set = if config.include_patterns.is_empty() { None } else { build_globset(&config.include_patterns) }; // Build walker with recursive setting let mut walker_builder = WalkBuilder::new(dir); walker_builder .hidden(false) .git_ignore(true) .git_exclude(true) .ignore(true); // Set max depth based on recursive flag if !config.recursive { walker_builder.max_depth(Some(1)); debug!("Non-recursive: limited to depth 1"); } else { debug!("Recursive: scanning all subdirectories"); } let walker = walker_builder.build(); // Create set of supported file extensions let supported_extensions = get_supported_extensions(&config.languages); debug!("Supported extensions: {:?}", supported_extensions); let mut paths = Vec::new(); let mut total_files = 0; let mut filtered_files = 0; for dent in walker { let dent = match dent { Ok(d) => d, Err(e) => { warn!("Walker error: {}", e); continue; } }; let path = dent.path(); if !path.is_file() { continue; } total_files += 1; if let Some(ref set) = exclude_set { if set.is_match(path) { continue; } } if let Some(ref set) = include_set { if !set.is_match(path) { continue; } } // Filter by file extension if languages specified if !config.languages.is_empty() { if let Some(ext) = path.extension().and_then(|e| e.to_str()) { if !supported_extensions.contains(ext) { continue; } } else { continue; // Skip files without extensions when language filter active } } filtered_files += 1; // Size extraction (best-effort) let size = dent .metadata() .ok() .and_then(|m| Some(m.len())) .unwrap_or(0); paths.push((path.to_path_buf(), size)); } info!( "File collection complete: {} files found, {} passed filters", total_files, filtered_files ); if paths.is_empty() && total_files > 0 { warn!("No files passed language filters. Check --languages setting and file extensions."); warn!("Supported extensions: {:?}", supported_extensions); } Ok(paths) } fn build_globset<S, I>(patterns: I) -> Option<GlobSet> where S: AsRef<str>, I: IntoIterator<Item = S>, { let mut builder = GlobSetBuilder::new(); let mut added = false; for pattern in patterns { match Glob::new(pattern.as_ref()) { Ok(glob) => { builder.add(glob); added = true; } Err(e) => warn!("Invalid glob pattern '{}': {}", pattern.as_ref(), e), } } if added { builder.build().ok() } else { None } } /// Get supported file extensions for specified languages fn get_supported_extensions(languages: &[String]) -> HashSet<&'static str> { let mut extensions = HashSet::new(); for lang in languages { match lang.to_lowercase().as_str() { "rust" => { extensions.insert("rs"); } "typescript" => { extensions.insert("ts"); extensions.insert("tsx"); // ← Critical: .tsx support } "javascript" => { extensions.insert("js"); extensions.insert("jsx"); } "python" => { extensions.insert("py"); extensions.insert("pyi"); } "go" => { extensions.insert("go"); } "java" => { extensions.insert("java"); } "cpp" | "c++" => { extensions.insert("cpp"); extensions.insert("cxx"); extensions.insert("cc"); extensions.insert("hpp"); extensions.insert("hxx"); extensions.insert("h"); } "c" => { extensions.insert("c"); extensions.insert("h"); } // Revolutionary universal language support "swift" => { extensions.insert("swift"); } "csharp" | "c#" => { extensions.insert("cs"); } "ruby" => { extensions.insert("rb"); extensions.insert("rake"); extensions.insert("gemspec"); } "php" => { extensions.insert("php"); extensions.insert("phtml"); extensions.insert("php3"); extensions.insert("php4"); extensions.insert("php5"); } "kotlin" => { extensions.insert("kt"); extensions.insert("kts"); } "dart" => { extensions.insert("dart"); } _ => { warn!("Unknown language: {}", lang); } } } // If no languages specified, support all known extensions (universal auto-detection) if extensions.is_empty() { extensions.extend(&[ "rs", "ts", "tsx", "js", "jsx", "py", "pyi", "go", "java", "cpp", "cxx", "cc", "hpp", "hxx", "h", "c", // Revolutionary universal language support "swift", "cs", "rb", "rake", "gemspec", "php", "phtml", "php3", "php4", "php5", "kt", "kts", "dart", ]); } extensions } /// Legacy function for backward compatibility pub fn collect_source_files(dir: &Path) -> Result<Vec<(PathBuf, u64)>> { collect_source_files_with_config(dir, &FileCollectionConfig::default()) }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server