CodeGraph CLI MCP Server

codegraph.rs•58.4 kB

use anyhow::Result; use atty::Stream; use clap::{Parser, Subcommand}; use codegraph_core::GraphStore; use codegraph_mcp::{IndexerConfig, ProcessManager, ProjectIndexer}; use colored::*; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use rmcp::ServiceExt; use std::path::PathBuf; use tracing::info; use tracing_subscriber::{layer::SubscriberExt, Registry}; #[derive(Parser)] #[command( name = "codegraph", version, author, about = "CodeGraph CLI - MCP server management and project indexing", long_about = "CodeGraph provides a unified interface for managing MCP servers and indexing projects with the codegraph system." )] #[command(propagate_version = true)] struct Cli { #[command(subcommand)] command: Commands, #[arg(short, long, global = true, help = "Enable verbose logging")] verbose: bool, #[arg(long, global = true, help = "Configuration file path")] config: Option<PathBuf>, } #[derive(Subcommand)] enum Commands { #[command(about = "Start MCP server with specified transport")] Start { #[command(subcommand)] transport: TransportType, #[arg(short, long, help = "Server configuration file")] config: Option<PathBuf>, #[arg(long, help = "Run server in background")] daemon: bool, #[arg(long, help = "PID file location for daemon mode")] pid_file: Option<PathBuf>, }, #[command(about = "Stop running MCP server")] Stop { #[arg(long, help = "PID file location")] pid_file: Option<PathBuf>, #[arg(short, long, help = "Force stop without graceful shutdown")] force: bool, }, #[command(about = "Check status of MCP server")] Status { #[arg(long, help = "PID file location")] pid_file: Option<PathBuf>, #[arg(short, long, help = "Show detailed status information")] detailed: bool, }, #[command(about = "Index a project or directory")] Index { #[arg(help = "Path to project directory")] path: PathBuf, #[arg(short, long, help = "Languages to index", value_delimiter = ',')] languages: Option<Vec<String>>, #[arg(long, help = "Exclude patterns (gitignore format)")] exclude: Vec<String>, #[arg(long, help = "Include only these patterns")] include: Vec<String>, #[arg(short, long, help = "Recursively index subdirectories")] recursive: bool, #[arg(long, help = "Force reindex even if already indexed")] force: bool, #[arg(long, help = "Watch for changes and auto-reindex")] watch: bool, #[arg(long, help = "Number of parallel workers", default_value = "4")] workers: usize, #[arg(long, help = "Embedding batch size", default_value = "100")] batch_size: usize, #[arg(long, help = "Local embedding device: cpu | metal | cuda:<id>")] device: Option<String>, #[arg( long, help = "Max sequence length for local embeddings", default_value = "512" )] max_seq_len: usize, }, #[command(about = "Search indexed code")] Search { #[arg(help = "Search query")] query: String, #[arg(short, long, help = "Search type", default_value = "semantic")] search_type: SearchType, #[arg(short, long, help = "Maximum results", default_value = "10")] limit: usize, #[arg(long, help = "Similarity threshold (0.0-1.0)", default_value = "0.7")] threshold: f32, #[arg(short, long, help = "Output format", default_value = "human")] format: OutputFormat, #[arg( long, help = "Restrict to path prefixes (comma-separated)", value_delimiter = ',' )] paths: Option<Vec<String>>, #[arg( long, help = "Restrict to languages (comma-separated)", value_delimiter = ',' )] langs: Option<Vec<String>>, #[arg( long, help = "Expand graph neighbors to this depth (0 to disable)", default_value = "0" )] expand_graph: usize, }, #[command(about = "Manage MCP server configuration")] Config { #[command(subcommand)] action: ConfigAction, }, #[command(about = "Show statistics and metrics")] Stats { #[arg(long, help = "Show index statistics")] index: bool, #[arg(long, help = "Show server statistics")] server: bool, #[arg(long, help = "Show performance metrics")] performance: bool, #[arg(short, long, help = "Output format", default_value = "table")] format: StatsFormat, }, #[command(about = "Initialize a new CodeGraph project")] Init { #[arg(help = "Project directory", default_value = ".")] path: PathBuf, #[arg(long, help = "Project name")] name: Option<String>, #[arg(long, help = "Skip interactive setup")] non_interactive: bool, }, #[command(about = "Clean up resources and cache")] Clean { #[arg(long, help = "Clean index database")] index: bool, #[arg(long, help = "Clean vector embeddings")] vectors: bool, #[arg(long, help = "Clean cache files")] cache: bool, #[arg(long, help = "Clean all resources")] all: bool, #[arg(short, long, help = "Skip confirmation prompt")] yes: bool, }, #[command(about = "Graph utilities")] Graph { #[command(subcommand)] action: GraphAction, }, #[command(about = "Vector operations (FAISS)")] Vector { #[command(subcommand)] action: VectorAction, }, #[command(about = "Code IO operations")] Code { #[command(subcommand)] action: CodeAction, }, #[command(about = "Test runner helpers")] Test { #[command(subcommand)] action: TestAction, }, #[command(about = "Run performance benchmarks (index + query)")] Perf { #[arg(help = "Path to project directory")] path: PathBuf, #[arg(long, value_delimiter = ',')] langs: Option<Vec<String>>, #[arg(long, default_value = "3", help = "Warmup runs before timing")] warmup: usize, #[arg(long, default_value = "10", help = "Timed query trials")] trials: usize, #[arg(long, value_delimiter = ',', help = "Queries to benchmark")] queries: Option<Vec<String>>, #[arg(long, default_value = "4")] workers: usize, #[arg(long, default_value = "100")] batch_size: usize, #[arg(long, help = "Local embedding device: cpu | metal | cuda:<id>")] device: Option<String>, #[arg(long, default_value = "512")] max_seq_len: usize, #[arg(long, help = "Remove existing .codegraph before indexing")] clean: bool, #[arg(long, default_value = "json", value_parser = clap::builder::PossibleValuesParser::new(["human","json"]))] format: String, #[arg(long, help = "Open graph in read-only mode for perf queries")] graph_readonly: bool, }, #[command(about = "Serve HTTP MCP endpoint")] #[cfg(feature = "server-http")] ServeHttp { #[arg(long, default_value = "127.0.0.1")] host: String, #[arg(long, default_value = "3000")] port: u16, }, #[command(about = "Serve STDIO MCP endpoint")] ServeStdio { #[arg(long, default_value = "8192")] buffer_size: usize, }, } #[derive(Subcommand)] enum GraphAction { #[command(about = "Get neighbors of a node")] Neighbors { #[arg(long, help = "Node UUID")] node: String, #[arg(long, help = "Limit", default_value = "20")] limit: usize, }, #[command(about = "Traverse graph breadth-first")] Traverse { #[arg(long, help = "Start node UUID")] start: String, #[arg(long, help = "Depth", default_value = "2")] depth: usize, #[arg(long, help = "Limit nodes", default_value = "100")] limit: usize, }, } #[derive(Subcommand)] enum VectorAction { #[command(about = "Semantic vector search (FAISS)")] Search { #[arg(help = "Query string")] query: String, #[arg(long, value_delimiter = ',')] paths: Option<Vec<String>>, #[arg(long, value_delimiter = ',')] langs: Option<Vec<String>>, #[arg(long, default_value = "10")] limit: usize, }, } #[derive(Subcommand)] enum CodeAction { #[command(about = "Read a file or range")] Read { #[arg(help = "File path")] path: String, #[arg(long)] start: Option<usize>, #[arg(long)] end: Option<usize>, }, #[command(about = "Patch a file (find/replace)")] Patch { #[arg(help = "File path")] path: String, #[arg(long, help = "Find text")] find: String, #[arg(long, help = "Replace with")] replace: String, #[arg(long, help = "Dry run")] dry_run: bool, }, } #[derive(Subcommand)] enum TestAction { #[command(about = "Run tests (cargo test)")] Run { #[arg(long)] package: Option<String>, #[arg(trailing_var_arg = true)] args: Vec<String>, }, } #[derive(Subcommand)] enum TransportType { #[command(about = "Start with STDIO transport (default)")] Stdio { #[arg(long, help = "Buffer size for STDIO", default_value = "8192")] buffer_size: usize, }, #[command(about = "Start with HTTP streaming transport")] Http { #[arg(short, long, help = "Host to bind to", default_value = "127.0.0.1")] host: String, #[arg(short, long, help = "Port to bind to", default_value = "3000")] port: u16, #[arg(long, help = "Enable TLS/HTTPS")] tls: bool, #[arg(long, help = "TLS certificate file")] cert: Option<PathBuf>, #[arg(long, help = "TLS key file")] key: Option<PathBuf>, #[arg(long, help = "Enable CORS")] cors: bool, }, #[command(about = "Start with both STDIO and HTTP transports")] Dual { #[arg(short, long, help = "HTTP host", default_value = "127.0.0.1")] host: String, #[arg(short, long, help = "HTTP port", default_value = "3000")] port: u16, #[arg(long, help = "STDIO buffer size", default_value = "8192")] buffer_size: usize, }, } #[derive(Subcommand)] enum ConfigAction { #[command(about = "Show current configuration")] Show { #[arg(long, help = "Show as JSON")] json: bool, }, #[command(about = "Set configuration value")] Set { #[arg(help = "Configuration key")] key: String, #[arg(help = "Configuration value")] value: String, }, #[command(about = "Get configuration value")] Get { #[arg(help = "Configuration key")] key: String, }, #[command(about = "Reset configuration to defaults")] Reset { #[arg(short, long, help = "Skip confirmation")] yes: bool, }, #[command(about = "Validate configuration")] Validate, } #[derive(clap::ValueEnum, Clone, Debug)] enum SearchType { Semantic, Exact, Fuzzy, Regex, Ast, } #[derive(clap::ValueEnum, Clone, Debug)] enum OutputFormat { Human, Json, Yaml, Table, } #[derive(clap::ValueEnum, Clone, Debug)] enum StatsFormat { Table, Json, Yaml, Human, } #[tokio::main] async fn main() -> Result<()> { let cli = Cli::parse(); let log_level = if cli.verbose { "debug" } else { "info" }; // Load configuration if provided if let Some(config_path) = &cli.config { // TODO: Load and merge configuration } match cli.command { Commands::Start { transport, config, daemon, pid_file, } => { handle_start(transport, config, daemon, pid_file).await?; } Commands::Stop { pid_file, force } => { handle_stop(pid_file, force).await?; } Commands::Status { pid_file, detailed } => { handle_status(pid_file, detailed).await?; } Commands::Index { path, languages, exclude, include, recursive, force, watch, workers, batch_size, device, max_seq_len, } => { handle_index( path, languages, exclude, include, recursive, force, watch, workers, batch_size, device, max_seq_len, ) .await?; } Commands::Search { query, search_type, limit, threshold, format, paths, langs, expand_graph, } => { handle_search( query, search_type, limit, threshold, format, paths, langs, expand_graph, ) .await?; } Commands::Graph { action } => match action { GraphAction::Neighbors { node, limit } => { let id = uuid::Uuid::parse_str(&node)?; let graph = codegraph_graph::CodeGraph::new()?; let neighbors = graph.get_neighbors(id).await?; println!("Neighbors ({}):", neighbors.len().min(limit)); for n in neighbors.into_iter().take(limit) { if let Some(nn) = graph.get_node(n).await? { println!("- {} {}", n, nn.name); } else { println!("- {}", n); } } } GraphAction::Traverse { start, depth, limit, } => { use std::collections::{HashSet, VecDeque}; let start_id = uuid::Uuid::parse_str(&start)?; let graph = codegraph_graph::CodeGraph::new()?; let mut seen: HashSet<codegraph_core::NodeId> = HashSet::new(); let mut q: VecDeque<(codegraph_core::NodeId, usize)> = VecDeque::new(); q.push_back((start_id, 0)); seen.insert(start_id); let mut out = Vec::new(); while let Some((nid, d)) = q.pop_front() { out.push((nid, d)); if out.len() >= limit { break; } if d >= depth { continue; } for nb in graph.get_neighbors(nid).await.unwrap_or_default() { if seen.insert(nb) { q.push_back((nb, d + 1)); } } } println!("Traversal (visited {}):", out.len()); for (nid, d) in out { if let Some(nn) = graph.get_node(nid).await? { println!("{} {} {}", d, nid, nn.name); } else { println!("{} {}", d, nid); } } } }, Commands::Vector { action } => match action { VectorAction::Search { query, paths, langs, limit, } => { handle_search( query, SearchType::Semantic, limit, 0.7, OutputFormat::Human, paths, langs, 0, ) .await?; } }, Commands::Code { action } => match action { CodeAction::Read { path, start, end } => { let text = std::fs::read_to_string(&path)?; let total = text.lines().count(); let s = start.unwrap_or(1).max(1); let e = end.unwrap_or(total).min(total); for (i, line) in text.lines().enumerate() { let ln = i + 1; if ln >= s && ln <= e { println!("{:>6} | {}", ln, line); } } } CodeAction::Patch { path, find, replace, dry_run, } => { let text = std::fs::read_to_string(&path)?; let patched = text.replace(&find, &replace); if dry_run { println!("--- DRY RUN: changes not written ---"); println!("Replacements: {}", text.matches(&find).count()); } else { std::fs::write(&path, patched)?; println!("Patched '{}'", path); } } }, Commands::Test { action } => match action { TestAction::Run { package, args } => { let mut cmd = std::process::Command::new("cargo"); cmd.arg("test"); if let Some(p) = package { cmd.arg("-p").arg(p); } if !args.is_empty() { cmd.args(args); } let status = cmd.status()?; if !status.success() { anyhow::bail!("tests failed"); } } }, Commands::Perf { path, langs, warmup, trials, queries, workers, batch_size, device, max_seq_len, clean, format, graph_readonly, } => { handle_perf( path, langs, warmup, trials, queries, workers, batch_size, device, max_seq_len, clean, format, graph_readonly, ) .await?; } Commands::Config { action } => { handle_config(action).await?; } Commands::Stats { index, server, performance, format, } => { handle_stats(index, server, performance, format).await?; } Commands::Init { path, name, non_interactive, } => { handle_init(path, name, non_interactive).await?; } Commands::Clean { index, vectors, cache, all, yes, } => { handle_clean(index, vectors, cache, all, yes).await?; } #[cfg(feature = "server-http")] Commands::ServeHttp { host, port } => { codegraph_mcp::server::serve_http(host, port).await?; } Commands::ServeStdio { buffer_size } => { codegraph_mcp::server::serve_stdio(buffer_size).await?; } } Ok(()) } async fn handle_start( transport: TransportType, config: Option<PathBuf>, daemon: bool, pid_file: Option<PathBuf>, ) -> Result<()> { let manager = ProcessManager::new(); match transport { TransportType::Stdio { buffer_size: _ } => { if atty::is(Stream::Stderr) { eprintln!( "{}", "Starting CodeGraph MCP Server with 100% Official SDK..." .green() .bold() ); } // Create and initialize the revolutionary CodeGraph server with official SDK let mut server = codegraph_mcp::official_server::CodeGraphMCPServer::new(); server.initialize_qwen().await; if atty::is(Stream::Stderr) { eprintln!("✅ Revolutionary CodeGraph MCP server ready with 100% protocol compliance"); } // Use official rmcp STDIO transport for perfect compliance let service = server.serve(rmcp::transport::stdio()).await.map_err(|e| { if atty::is(Stream::Stderr) { eprintln!("❌ Failed to start official MCP server: {}", e); } anyhow::anyhow!("MCP server startup failed: {}", e) })?; if atty::is(Stream::Stderr) { eprintln!("🚀 Official MCP server started with revolutionary capabilities"); } // Wait for the server to complete service .waiting() .await .map_err(|e| anyhow::anyhow!("Server error: {}", e))?; } TransportType::Http { host, port, tls, cert, key, cors: _, } => { eprintln!("🚧 HTTP transport with official SDK not yet implemented"); eprintln!("💡 Use STDIO transport for 100% official SDK compliance:"); eprintln!(" codegraph start stdio"); eprintln!("💡 Or use legacy HTTP server:"); eprintln!(" codegraph start http --legacy"); return Err(anyhow::anyhow!( "HTTP transport requires legacy mode or official SDK implementation" )); } TransportType::Dual { host, port, buffer_size, } => { info!("Starting with dual transport (STDIO + HTTP)"); let (stdio_pid, http_pid) = manager .start_dual_transport( host.clone(), port, buffer_size, config, daemon, pid_file.clone(), ) .await?; println!("✓ MCP server started with dual transport"); println!(" STDIO: buffer size {} (PID: {})", buffer_size, stdio_pid); println!(" HTTP: http://{}:{} (PID: {})", host, port, http_pid); } } if daemon { println!("Running in daemon mode"); if let Some(ref pid_file) = pid_file { println!("PID file: {:?}", pid_file); } } Ok(()) } async fn handle_stop(pid_file: Option<PathBuf>, force: bool) -> Result<()> { if atty::is(Stream::Stdout) { println!("{}", "Stopping CodeGraph MCP Server...".yellow().bold()); } let manager = ProcessManager::new(); if force { if atty::is(Stream::Stdout) { println!("Force stopping server"); } } manager.stop_server(pid_file, force).await?; if atty::is(Stream::Stdout) { println!("✓ Server stopped"); } Ok(()) } async fn handle_status(pid_file: Option<PathBuf>, detailed: bool) -> Result<()> { println!("{}", "CodeGraph MCP Server Status".blue().bold()); println!(); let manager = ProcessManager::new(); match manager.get_status(pid_file).await { Ok(info) => { println!("Server: {}", "Running".green()); println!("Transport: {}", info.transport); println!("Process ID: {}", info.pid); let duration = chrono::Utc::now() - info.start_time; let hours = duration.num_hours(); let minutes = (duration.num_minutes() % 60) as u32; println!("Uptime: {}h {}m", hours, minutes); if detailed { println!(); println!("Detailed Information:"); println!( " Start Time: {}", info.start_time.format("%Y-%m-%d %H:%M:%S UTC") ); if let Some(config) = info.config_path { println!(" Config File: {:?}", config); } // TODO: Add more detailed metrics once integrated with monitoring } } Err(e) => { println!("Server: {}", "Not Running".red()); println!("Error: {}", e); } } Ok(()) } async fn handle_index( path: PathBuf, languages: Option<Vec<String>>, exclude: Vec<String>, include: Vec<String>, recursive: bool, force: bool, watch: bool, workers: usize, batch_size: usize, device: Option<String>, max_seq_len: usize, ) -> Result<()> { let multi_progress = MultiProgress::new(); let subscriber = Registry::default() .with(tracing_subscriber::filter::EnvFilter::from_default_env()) .with(tracing_subscriber::fmt::layer()); tracing::subscriber::set_global_default(subscriber).ok(); let h_style = ProgressStyle::with_template("{spinner:.blue} {msg}") .unwrap() .tick_chars("⠁⠂⠄⡀⢀⠠⠐⠈ "); let header_pb = multi_progress.add(ProgressBar::new(1)); header_pb.set_style(h_style); header_pb.set_message(format!("Indexing project: {}", path.to_string_lossy())); // Memory-aware optimization for high-memory systems let available_memory_gb = estimate_available_memory_gb(); let (optimized_batch_size, optimized_workers) = optimize_for_memory(available_memory_gb, batch_size, workers); if available_memory_gb >= 64 { multi_progress.println(format!( "🚀 High-memory system detected ({}GB) - performance optimized!", available_memory_gb ))?; } // Configure indexer let languages_list = languages.clone().unwrap_or_default(); let config = IndexerConfig { languages: languages_list.clone(), exclude_patterns: exclude, include_patterns: include, recursive, force_reindex: force, watch, workers: optimized_workers, batch_size: optimized_batch_size, device, max_seq_len, project_root: path.clone().canonicalize().unwrap_or(path.clone()), ..Default::default() }; // Create indexer let mut indexer = ProjectIndexer::new(config, multi_progress.clone()).await?; let start_time = std::time::Instant::now(); // Perform indexing let stats = indexer.index_project(&path).await?; let elapsed = start_time.elapsed(); header_pb.finish_with_message("✔ Indexing complete".to_string()); println!(); println!("{}", "🎉 INDEXING COMPLETE!".green().bold()); println!(); // Performance summary with dual metrics println!("{}", "📊 Performance Summary".cyan().bold()); println!("┌─────────────────────────────────────────────────┐"); println!( "│ ⏱️ Total time: {:<33} │", format!("{:.2?}", elapsed).green() ); println!( "│ ⚡ Throughput: {:<33} │", format!("{:.2} files/sec", stats.files as f64 / elapsed.as_secs_f64()).yellow() ); println!("├─────────────────────────────────────────────────┤"); println!( "│ 📄 Files: {} indexed, {} skipped {:<13} │", format!("{:>6}", stats.files).yellow(), format!("{:>4}", stats.skipped).yellow(), "" ); println!( "│ 📝 Lines: {} processed {:<24} │", format!("{:>6}", stats.lines).yellow(), "" ); println!( "│ 💾 Embeddings: {} generated {:<20} │", format!("{:>6}", stats.embeddings).cyan(), "" ); if stats.errors > 0 { println!( "│ ❌ Errors: {} encountered {:<24} │", format!("{:>6}", stats.errors).red(), "" ); } println!("└─────────────────────────────────────────────────┘"); // Configuration summary println!(); println!("{}", "⚙️ Configuration Summary".cyan().bold()); println!( "Workers: {} | Batch Size: {} | Languages: {}", optimized_workers, optimized_batch_size, languages_list.join(", ") ); let provider = std::env::var("CODEGRAPH_EMBEDDING_PROVIDER").unwrap_or("default".to_string()); if provider == "ollama" { println!( "{}", "🧠 Using SOTA Code-Specialized Embeddings (nomic-embed-code)".green() ); } else if provider == "onnx" { println!("{}", "⚡ Using Speed-Optimized Embeddings (ONNX)".yellow()); } if available_memory_gb >= 128 { println!( "{}", format!( "🚀 Ultra-High Memory System ({}GB) - Maximum Performance!", available_memory_gb ) .green() .bold() ); } else if available_memory_gb >= 64 { println!( "{}", format!( "💪 High Memory System ({}GB) - Optimized Performance!", available_memory_gb ) .green() ); } // Success rate and recommendations if stats.embeddings > 0 && stats.files > 0 { let embedding_success_rate = (stats.embeddings as f64 / stats.files as f64) * 100.0; if embedding_success_rate >= 90.0 { println!("{}", "✅ Excellent embedding success rate (>90%)".green()); } else if embedding_success_rate >= 75.0 { println!("{}", "⚠️ Good embedding success rate (75-90%)".yellow()); } else { println!( "{}", "❌ Low embedding success rate (<75%) - check language support".red() ); } } println!(); println!( "{}", "🚀 Ready for Revolutionary MCP Intelligence!" .green() .bold() ); println!("Next: Start MCP server with 'codegraph start stdio'"); if stats.errors > 0 { println!(" {} Errors: {}", "⚠".yellow(), stats.errors); } if watch { println!(); println!("Watching for changes... (Press Ctrl+C to stop)"); indexer.watch_for_changes(path).await?; } Ok(()) } async fn handle_search( query: String, search_type: SearchType, limit: usize, _threshold: f32, format: OutputFormat, paths: Option<Vec<String>>, langs: Option<Vec<String>>, expand_graph: usize, ) -> Result<()> { println!("{}", format!("Searching for: '{}'", query).magenta().bold()); println!("Search type: {:?}", search_type); println!(); // Build a query embedding compatible with the indexer #[cfg(feature = "embeddings")] let emb = { let gen = codegraph_vector::EmbeddingGenerator::with_auto_from_env().await; let e = gen.generate_text_embedding(&query).await?; codegraph_mcp::indexer::normalize(&e) }; #[cfg(not(feature = "embeddings"))] let emb = { let dimension = 384; // Match EmbeddingGenerator default (all-MiniLM-L6-v2) let e = codegraph_mcp::indexer::simple_text_embedding(&query, dimension); codegraph_mcp::indexer::normalize(&e) }; #[cfg(feature = "faiss")] { use faiss::index::io::read_index; use faiss::index::Index as _; use std::path::Path; // Try to use shards when filters provided, else global index let mut scored: Vec<(codegraph_core::NodeId, f32)> = Vec::new(); let mut used_any_shard = false; // Helper to search a specific index file + mapping file let mut search_index = |index_path: &Path, ids_path: &Path, topk: usize| -> Result<()> { if !index_path.exists() || !ids_path.exists() { return Ok(()); } let mut index = read_index(index_path.to_string_lossy()) .map_err(|e| anyhow::anyhow!(e.to_string()))?; let mapping_raw = std::fs::read_to_string(ids_path)?; let mapping: Vec<codegraph_core::NodeId> = serde_json::from_str(&mapping_raw)?; let res = index .search(&emb, topk) .map_err(|e| anyhow::anyhow!(e.to_string()))?; for (i, label) in res.labels.into_iter().enumerate() { if let Some(idx_val) = label.get() { let idx = idx_val as usize; if idx < mapping.len() { let score = res.distances[i]; scored.push((mapping[idx], score)); } } } Ok(()) }; let mut shard_count = 0usize; if let Some(prefs) = &paths { for p in prefs { let seg = p.trim_start_matches("./").split('/').next().unwrap_or(""); if seg.is_empty() { continue; } let idx = Path::new(".codegraph/shards/path").join(format!("{}.index", seg)); let ids = Path::new(".codegraph/shards/path").join(format!("{}_ids.json", seg)); search_index(&idx, &ids, limit * 5)?; shard_count += 1; } } if let Some(langs) = &langs { for l in langs { let norm = l.to_lowercase(); let idx = Path::new(".codegraph/shards/lang").join(format!("{}.index", norm)); let ids = Path::new(".codegraph/shards/lang").join(format!("{}_ids.json", norm)); search_index(&idx, &ids, limit * 5)?; shard_count += 1; } } used_any_shard = shard_count > 0; if !used_any_shard { // Fall back to global index let idx = Path::new(".codegraph/faiss.index"); let ids = Path::new(".codegraph/faiss_ids.json"); if !idx.exists() || !ids.exists() { println!( "FAISS index not found. Run 'codegraph index .' first (with --features faiss)." ); return Ok(()); } search_index(idx, ids, limit * 5)?; } // Rank and trim, keep scores for output scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); scored.dedup_by_key(|(id, _)| *id); let top: Vec<(codegraph_core::NodeId, f32)> = scored.into_iter().take(limit).collect(); // Optionally enrich results from graph; keep scores with nodes let graph = codegraph_graph::CodeGraph::new()?; let mut base_scored: Vec<(codegraph_core::NodeId, codegraph_core::CodeNode, f32)> = Vec::new(); let path_filters = paths.unwrap_or_default(); for (id, score) in top { if let Some(node) = graph.get_node(id).await? { if path_filters.is_empty() || path_filters .iter() .any(|p| node.location.file_path.starts_with(p)) { base_scored.push((id, node, score)); } } } // Expand graph neighbors if requested (depth-weighted) if expand_graph > 0 { use std::collections::{HashSet, VecDeque}; let mut seen: HashSet<codegraph_core::NodeId> = base_scored.iter().map(|(id, _, _)| *id).collect(); let mut q: VecDeque<(codegraph_core::NodeId, usize)> = base_scored.iter().map(|(id, _, _)| (*id, 0usize)).collect(); let mut extra: Vec<(codegraph_core::NodeId, codegraph_core::CodeNode, usize)> = Vec::new(); while let Some((nid, depth)) = q.pop_front() { if depth >= expand_graph { continue; } let neighbors = graph.get_neighbors(nid).await.unwrap_or_default(); for nb in neighbors { if seen.insert(nb) { if let Some(nnode) = graph.get_node(nb).await? { if path_filters.is_empty() || path_filters .iter() .any(|p| nnode.location.file_path.starts_with(p)) { extra.push((nb, nnode, depth + 1)); q.push_back((nb, depth + 1)); } } } } } // Shallow neighbors first; cap to avoid explosion extra.sort_by_key(|(_, _, d)| *d); // Build final results with depth field let mut results_with_depth: Vec<( codegraph_core::NodeId, codegraph_core::CodeNode, usize, )> = base_scored .iter() .map(|(id, node, _)| (*id, node.clone(), 0usize)) .collect(); results_with_depth.extend(extra.into_iter().take(limit.saturating_mul(5))); match format { OutputFormat::Human => { println!("Results ({}):", results_with_depth.len()); for (i, (_id, node, depth)) in results_with_depth.iter().enumerate() { let summary = node .content .as_deref() .map(|s| { let mut t = s.trim().replace('\n', " "); if t.len() > 160 { t.truncate(160); t.push_str("..."); } t }) .unwrap_or_else(|| "".to_string()); println!( "{}. [d={}] {} {}", i + 1, depth, node.location.file_path, summary ); } } OutputFormat::Json => { use std::collections::HashMap; let score_map: HashMap<codegraph_core::NodeId, f32> = base_scored .iter() .map(|(id, _node, score)| (*id, *score)) .collect(); let j = serde_json::json!({ "results": results_with_depth.iter().map(|(id, node, depth)| { let score = score_map.get(id).copied(); serde_json::json!({ "id": id, "name": node.name, "path": node.location.file_path, "node_type": node.node_type.as_ref().map(|t| format!("{:?}", t)).unwrap_or_else(|| "unknown".into()), "language": node.language.as_ref().map(|l| format!("{:?}", l)).unwrap_or_else(|| "unknown".into()), "depth": depth, "summary": node.content.as_deref().map(|s| { let mut t = s.trim().replace('\n', " "); if t.len() > 160 { t.truncate(160); t.push_str("..."); } t }).unwrap_or_default(), "score": score, }) }).collect::<Vec<_>>() }); println!("{}", serde_json::to_string_pretty(&j)?); } _ => { println!("Format {:?} not yet implemented", format); } } return Ok(()); } // No graph expansion: print base results with depth=0 match format { OutputFormat::Human => { println!("Results ({}):", base_scored.len()); for (i, (_id, node, _score)) in base_scored.iter().enumerate() { let summary = node .content .as_deref() .map(|s| { let mut t = s.trim().replace('\n', " "); if t.len() > 160 { t.truncate(160); t.push_str("..."); } t }) .unwrap_or_else(|| "".to_string()); println!( "{}. [d=0] {} {}", i + 1, node.location.file_path, summary ); } } OutputFormat::Json => { let j = serde_json::json!({ "results": base_scored.iter().map(|(_id, node, score)| serde_json::json!({ "name": node.name, "path": node.location.file_path, "node_type": node.node_type.as_ref().map(|t| format!("{:?}", t)).unwrap_or_else(|| "unknown".into()), "language": node.language.as_ref().map(|l| format!("{:?}", l)).unwrap_or_else(|| "unknown".into()), "depth": 0, "summary": node.content.as_deref().map(|s| { let mut t = s.trim().replace('\n', " "); if t.len() > 160 { t.truncate(160); t.push_str("..."); } t }).unwrap_or_default(), "score": score, })).collect::<Vec<_>>() }); println!("{}", serde_json::to_string_pretty(&j)?); } _ => { println!("Format {:?} not yet implemented", format); } } return Ok(()); } #[cfg(not(feature = "faiss"))] { println!( "Vector search requires FAISS support. Reinstall with: cargo install --path crates/codegraph-mcp --features faiss" ); Ok(()) } } async fn handle_config(action: ConfigAction) -> Result<()> { match action { ConfigAction::Show { json } => { if json { println!(r#"{{"embedding_model": "all-MiniLM-L6-v2", "vector_dimension": 384}}"#); } else { println!("{}", "Current Configuration:".blue().bold()); println!(" Embedding Model: all-MiniLM-L6-v2"); println!(" Vector Dimension: 384"); println!(" Database Path: ~/.codegraph/db"); } } ConfigAction::Set { key, value } => { println!("Set {} = {}", key.yellow(), value.green()); } ConfigAction::Get { key } => { println!("{}: {}", key, "value"); } ConfigAction::Reset { yes } => { if !yes { println!("Reset configuration to defaults? (y/n)"); // TODO: Read user input } println!("✓ Configuration reset to defaults"); } ConfigAction::Validate => { println!("✓ Configuration is valid"); } } Ok(()) } async fn handle_stats( index: bool, server: bool, performance: bool, _format: StatsFormat, ) -> Result<()> { println!("{}", "CodeGraph Statistics".blue().bold()); println!(); if index || (!index && !server && !performance) { println!("Index Statistics:"); println!(" Projects: 3"); println!(" Files: 1,234"); println!(" Functions: 567"); println!(" Classes: 89"); println!(" Embeddings: 5,678"); println!(" Database Size: 125 MB"); println!(); } if server { println!("Server Statistics:"); println!(" Uptime: 2h 15m"); println!(" Requests: 1,234"); println!(" Errors: 2"); println!(" Avg Response Time: 45ms"); println!(); } if performance { println!("Performance Metrics:"); println!(" CPU Usage: 12%"); println!(" Memory: 45.2 MB"); println!(" Disk I/O: 1.2 MB/s"); println!(" Network: 0.5 MB/s"); } Ok(()) } async fn handle_init(path: PathBuf, name: Option<String>, _non_interactive: bool) -> Result<()> { println!("{}", "Initializing CodeGraph project...".green().bold()); println!("Path: {:?}", path); if let Some(name) = name { println!("Project name: {}", name); } // Create .codegraph directory structure let codegraph_dir = path.join(".codegraph"); std::fs::create_dir_all(&codegraph_dir)?; // Create subdirectories std::fs::create_dir_all(codegraph_dir.join("db"))?; std::fs::create_dir_all(codegraph_dir.join("vectors"))?; std::fs::create_dir_all(codegraph_dir.join("cache"))?; // Create basic config.toml let config_content = r#"# CodeGraph Project Configuration [project] name = "codegraph-project" version = "1.0.0" [indexing] languages = ["rust", "python", "typescript", "javascript", "go", "java", "cpp"] exclude_patterns = ["**/node_modules/**", "**/target/**", "**/.git/**", "**/build/**"] include_patterns = ["src/**", "lib/**", "**/*.rs", "**/*.py", "**/*.ts", "**/*.js"] [mcp] enable_qwen_integration = true enable_caching = true enable_pattern_detection = true [database] path = "./.codegraph/db" cache_path = "./.codegraph/cache" vectors_path = "./.codegraph/vectors" "#; std::fs::write(codegraph_dir.join("config.toml"), config_content)?; // Create .gitignore for CodeGraph files let gitignore_content = r#"# CodeGraph generated files .codegraph/db/ .codegraph/cache/ .codegraph/vectors/ .codegraph/logs/ "#; std::fs::write(path.join(".gitignore.codegraph"), gitignore_content)?; println!("✓ Created .codegraph/config.toml"); println!("✓ Created .codegraph/db/"); println!("✓ Created .codegraph/vectors/"); println!("✓ Created .codegraph/cache/"); println!("✓ Created .gitignore.codegraph"); println!(); println!("Project initialized successfully!"); println!(); println!("{}", "Next steps:".yellow().bold()); println!("1. Run 'codegraph index .' to index your codebase"); println!("2. Start MCP server: 'codegraph start stdio'"); println!("3. Configure Claude Desktop with CodeGraph MCP"); println!("4. Experience revolutionary AI codebase intelligence!"); Ok(()) } async fn handle_clean(index: bool, vectors: bool, cache: bool, all: bool, yes: bool) -> Result<()> { println!("{}", "Cleaning CodeGraph resources...".yellow().bold()); if all || index { println!(" Cleaning index database..."); } if all || vectors { println!(" Cleaning vector embeddings..."); } if all || cache { println!(" Cleaning cache files..."); } if !yes { println!(); println!("This will permanently delete data. Continue? (y/n)"); // TODO: Read user input } // TODO: Implement cleanup logic println!(); println!("✓ Cleanup complete"); Ok(()) } async fn handle_perf( path: PathBuf, langs: Option<Vec<String>>, warmup: usize, trials: usize, queries: Option<Vec<String>>, workers: usize, batch_size: usize, device: Option<String>, max_seq_len: usize, clean: bool, format: String, graph_readonly: bool, ) -> Result<()> { use serde_json::json; use std::time::Instant; let project_root = path.clone().canonicalize().unwrap_or(path.clone()); if clean { let codegraph_dir = project_root.join(".codegraph"); if codegraph_dir.exists() { let _ = std::fs::remove_dir_all(&codegraph_dir); } } let config = codegraph_mcp::IndexerConfig { languages: langs.clone().unwrap_or_default(), exclude_patterns: vec![], include_patterns: vec![], recursive: true, force_reindex: true, watch: false, workers, batch_size, vector_dimension: 384, // Match EmbeddingGenerator default (all-MiniLM-L6-v2) device: device.clone(), max_seq_len, project_root, }; let multi_progress = MultiProgress::new(); let mut indexer = codegraph_mcp::ProjectIndexer::new(config, multi_progress).await?; let t0 = Instant::now(); let stats = indexer.index_project(&path).await?; let indexing_secs = t0.elapsed().as_secs_f64(); // Release RocksDB handle before running queries (which open their own graph handles) drop(indexer); // Give RocksDB a brief moment to release OS locks tokio::time::sleep(std::time::Duration::from_millis(75)).await; let qset = if let Some(q) = queries { q } else { vec![ "main function".to_string(), "http server router".to_string(), "database connection".to_string(), "graph traversal".to_string(), "embedding generator".to_string(), ] }; for _ in 0..warmup.max(1) { for q in &qset { let _ = codegraph_mcp::server::bin_search_with_scores(q.clone(), None, None, 10).await; } } let mut latencies_ms: Vec<f64> = Vec::new(); for _ in 0..trials { for q in &qset { let t = Instant::now(); let _ = codegraph_mcp::server::bin_search_with_scores(q.clone(), None, None, 10).await; latencies_ms.push(t.elapsed().as_secs_f64() * 1000.0); } } latencies_ms.sort_by(|a, b| a.partial_cmp(b).unwrap()); let p50 = latencies_ms .get(latencies_ms.len() / 2) .copied() .unwrap_or(0.0); let p95 = latencies_ms .get(((latencies_ms.len() as f64) * 0.95).floor() as usize) .copied() .unwrap_or(0.0); let avg = if latencies_ms.is_empty() { 0.0 } else { latencies_ms.iter().sum::<f64>() / latencies_ms.len() as f64 }; // Open graph with small retry to avoid transient lock contention let graph = { use std::time::Duration; let mut attempts = 0; loop { let open_res = if graph_readonly { codegraph_graph::CodeGraph::new_read_only() } else { codegraph_graph::CodeGraph::new() }; match open_res { Ok(g) => break g, Err(e) => { let msg = e.to_string(); if msg.contains("LOCK") && attempts < 10 { tokio::time::sleep(Duration::from_millis(50)).await; attempts += 1; continue; } return Err(e.into()); } } } }; let mut any_node: Option<codegraph_core::NodeId> = None; if let Ok(ids_raw) = std::fs::read_to_string(".codegraph/faiss_ids.json") { if let Ok(ids) = serde_json::from_str::<Vec<codegraph_core::NodeId>>(&ids_raw) { any_node = ids.into_iter().next(); } } let (graph_bfs_ms, bfs_nodes) = if let Some(start) = any_node { use std::collections::{HashSet, VecDeque}; let t = Instant::now(); let mut seen: HashSet<codegraph_core::NodeId> = HashSet::new(); let mut q: VecDeque<(codegraph_core::NodeId, usize)> = VecDeque::new(); q.push_back((start, 0)); seen.insert(start); let mut visited = 0usize; while let Some((nid, d)) = q.pop_front() { let _ = graph.get_node(nid).await; // load visited += 1; if d >= 2 { continue; } for nb in graph.get_neighbors(nid).await.unwrap_or_default() { if seen.insert(nb) { q.push_back((nb, d + 1)); } } if visited >= 1000 { break; } } (t.elapsed().as_secs_f64() * 1000.0, visited) } else { (0.0, 0) }; let out = json!({ "env": { "embedding_provider": std::env::var("CODEGRAPH_EMBEDDING_PROVIDER").ok(), "device": device, "features": {"faiss": cfg!(feature = "faiss"), "embeddings": cfg!(feature = "embeddings")} }, "dataset": {"path": path, "languages": langs, "files": stats.files, "lines": stats.lines}, "indexing": {"total_seconds": indexing_secs, "embeddings": stats.embeddings, "throughput_embeddings_per_sec": if indexing_secs > 0.0 { stats.embeddings as f64 / indexing_secs } else { 0.0 }}, "vector_search": {"queries": qset.len() * trials, "latency_ms": {"avg": avg, "p50": p50, "p95": p95}}, "graph": {"bfs_depth": 2, "visited_nodes": bfs_nodes, "elapsed_ms": graph_bfs_ms} }); if format == "human" { println!( "Performance Results ====================" ); println!( "Dataset: {:?} ({} files, {} lines)", out["dataset"]["path"], out["dataset"]["files"], out["dataset"]["lines"] ); println!( "Indexing: {:.2}s ({} embeddings, {:.1} emb/s)", out["indexing"]["total_seconds"], out["indexing"]["embeddings"], out["indexing"]["throughput_embeddings_per_sec"] ); println!( "Vector Search: avg={:.1}ms p50={:.1}ms p95={:.1}ms ({} queries)", out["vector_search"]["latency_ms"]["avg"], out["vector_search"]["latency_ms"]["p50"], out["vector_search"]["latency_ms"]["p95"], out["vector_search"]["queries"] ); println!( "Graph BFS depth=2: visited {} nodes in {:.1}ms", out["graph"]["visited_nodes"], out["graph"]["elapsed_ms"] ); } else { println!("{}", serde_json::to_string_pretty(&out)?); } Ok(()) } /// Estimate available system memory in GB fn estimate_available_memory_gb() -> usize { #[cfg(target_os = "macos")] { if let Ok(output) = std::process::Command::new("sysctl") .args(["-n", "hw.memsize"]) .output() { if let Ok(memsize_str) = String::from_utf8(output.stdout) { if let Ok(memsize) = memsize_str.trim().parse::<u64>() { return (memsize / 1024 / 1024 / 1024) as usize; } } } } #[cfg(target_os = "linux")] { if let Ok(content) = std::fs::read_to_string("/proc/meminfo") { for line in content.lines() { if line.starts_with("MemTotal:") { if let Some(kb_str) = line.split_whitespace().nth(1) { if let Ok(kb) = kb_str.parse::<u64>() { return (kb / 1024 / 1024) as usize; } } } } } } 16 // Default assumption if detection fails } /// Optimize batch size and workers based on available memory and embedding provider fn optimize_for_memory( memory_gb: usize, default_batch_size: usize, default_workers: usize, ) -> (usize, usize) { let embedding_provider = std::env::var("CODEGRAPH_EMBEDDING_PROVIDER").unwrap_or_default(); let optimized_batch_size = if default_batch_size == 100 { // Default value if embedding_provider == "ollama" { // Ollama models work better with smaller batches for stability match memory_gb { 128.. => 1024, // 128GB+: Large but stable batch size 96..=127 => 768, // 96-127GB: Medium-large batch 64..=95 => 512, // 64-95GB: Medium batch 32..=63 => 256, // 32-63GB: Small batch 16..=31 => 128, // 16-31GB: Very small batch _ => 64, // <16GB: Minimal batch } } else { // ONNX/OpenAI can handle much larger batches match memory_gb { 128.. => 20480, // 128GB+: Ultra-high batch size 96..=127 => 15360, // 96-127GB: Very high batch size 64..=95 => 10240, // 64-95GB: High batch size 48..=63 => 5120, // 48-63GB: Medium-high batch size 32..=47 => 2048, // 32-47GB: Medium batch size 16..=31 => 512, // 16-31GB: Conservative batch size _ => 100, // <16GB: Keep default } } } else { default_batch_size // User specified - respect their choice }; let optimized_workers = if default_workers == 4 { // Default value match memory_gb { 128.. => 16, // 128GB+: Maximum parallelism 96..=127 => 14, // 96-127GB: Very high parallelism 64..=95 => 12, // 64-95GB: High parallelism 48..=63 => 10, // 48-63GB: Medium-high parallelism 32..=47 => 8, // 32-47GB: Medium parallelism 16..=31 => 6, // 16-31GB: Conservative parallelism _ => 4, // <16GB: Keep default } } else { default_workers // User specified - respect their choice }; (optimized_batch_size, optimized_workers) }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server