Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
insights_generator.rs11 kB
use crate::reranker::{ReRankingPipeline, RerankedResult, RerankerConfig}; use crate::EmbeddingGenerator; use codegraph_core::{CodeNode, NodeId, Result}; use serde::{Deserialize, Serialize}; use std::sync::Arc; use tracing::{info, warn}; /// Mode for insights generation #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum InsightsMode { /// Fast mode: Return context only, no LLM processing /// Best for agent-based workflows (Claude, GPT-4, etc.) ContextOnly, /// Balanced mode: Use reranking + lightweight LLM /// Good for local processing with speed requirements Balanced, /// Deep mode: Use full LLM processing /// Best for comprehensive analysis, slower Deep, } impl Default for InsightsMode { fn default() -> Self { Self::ContextOnly // Default to fastest mode } } /// Configuration for insights generation #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InsightsConfig { pub mode: InsightsMode, pub reranker_config: RerankerConfig, pub max_context_length: usize, pub include_metadata: bool, } impl Default for InsightsConfig { fn default() -> Self { Self { mode: InsightsMode::ContextOnly, reranker_config: RerankerConfig::default(), max_context_length: 8000, // Tokens include_metadata: true, } } } /// Result from insights generation #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InsightsResult { pub query: String, pub mode: InsightsMode, pub reranked_files: Vec<RerankedResult>, pub context: String, pub llm_insights: Option<String>, pub metrics: InsightsMetrics, } /// Performance metrics for insights generation #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InsightsMetrics { pub total_candidates: usize, pub files_analyzed: usize, pub reranking_duration_ms: f64, pub llm_duration_ms: f64, pub total_duration_ms: f64, pub speedup_ratio: f64, // vs processing all files } /// High-performance insights generator with reranking pipeline pub struct InsightsGenerator { config: InsightsConfig, reranking_pipeline: ReRankingPipeline, } impl InsightsGenerator { pub fn new(config: InsightsConfig, embedding_generator: Arc<EmbeddingGenerator>) -> Self { let reranking_pipeline = ReRankingPipeline::new( config.reranker_config.clone(), embedding_generator, ); Self { config, reranking_pipeline, } } /// Generate insights with automatic mode selection pub async fn generate_insights( &self, query: &str, candidates: Vec<(NodeId, CodeNode)>, ) -> Result<InsightsResult> { let total_start = std::time::Instant::now(); info!("🚀 Generating insights in {:?} mode for {} candidates", self.config.mode, candidates.len()); // Stage 1 & 2: Reranking pipeline (always runs) let reranking_start = std::time::Instant::now(); let reranked_results = self.reranking_pipeline.rerank_pipeline(query, candidates.clone()).await?; let reranking_duration = reranking_start.elapsed().as_secs_f64() * 1000.0; info!("✅ Reranking complete: {} -> {} files ({:.1}% reduction)", candidates.len(), reranked_results.len(), (1.0 - reranked_results.len() as f64 / candidates.len() as f64) * 100.0); // Build context from reranked results let context = self.build_context(&reranked_results); // Stage 3: Optional LLM processing let (llm_insights, llm_duration) = match self.config.mode { InsightsMode::ContextOnly => { info!("📋 Context-only mode: Skipping LLM processing (returning context for agent)"); (None, 0.0) } InsightsMode::Balanced => { self.generate_llm_insights_lightweight(query, &reranked_results).await? } InsightsMode::Deep => { self.generate_llm_insights_deep(query, &reranked_results).await? } }; let total_duration = total_start.elapsed().as_secs_f64() * 1000.0; // Calculate speedup vs processing all files let estimated_full_llm_time = candidates.len() as f64 * 500.0; // Estimate 500ms per file let speedup_ratio = estimated_full_llm_time / total_duration; let metrics = InsightsMetrics { total_candidates: candidates.len(), files_analyzed: reranked_results.len(), reranking_duration_ms: reranking_duration, llm_duration_ms: llm_duration, total_duration_ms: total_duration, speedup_ratio, }; info!("🎉 Insights generation complete in {:.2}ms ({:.1}x faster than processing all files)", total_duration, speedup_ratio); Ok(InsightsResult { query: query.to_string(), mode: self.config.mode, reranked_files: reranked_results, context, llm_insights, metrics, }) } /// Build formatted context from reranked results fn build_context(&self, results: &[RerankedResult]) -> String { let mut context = String::new(); context.push_str(&format!("# Retrieved Context ({} files)\n\n", results.len())); for (idx, result) in results.iter().enumerate() { if let Some(ref node) = result.node { context.push_str(&format!("## File {} (Score: {:.3})\n", idx + 1, result.relevance_score)); if self.config.include_metadata { context.push_str(&format!("**Path**: {}\n", node.location.file_path)); context.push_str(&format!("**Name**: {}\n", node.name)); if let Some(ref lang) = node.language { context.push_str(&format!("**Language**: {:?}\n", lang)); } if let Some(ref node_type) = node.node_type { context.push_str(&format!("**Type**: {:?}\n", node_type)); } context.push_str("\n"); } context.push_str("**Content**:\n```\n"); if let Some(ref content) = node.content { // Truncate to max context length let truncated = if content.len() > self.config.max_context_length { format!("{}... [truncated]", &content[..self.config.max_context_length]) } else { content.to_string() }; context.push_str(&truncated); } else { context.push_str(&node.name); } context.push_str("\n```\n\n"); } } context } /// Generate lightweight LLM insights (balanced mode) async fn generate_llm_insights_lightweight( &self, query: &str, results: &[RerankedResult], ) -> Result<(Option<String>, f64)> { let start = std::time::Instant::now(); // Get top K files for LLM processing let llm_candidates = self.reranking_pipeline.get_llm_candidates(results); if llm_candidates.is_empty() { warn!("No candidates for LLM processing in balanced mode"); return Ok((None, 0.0)); } info!("🤖 Running lightweight LLM on {} files", llm_candidates.len()); // In production, this would call the local LLM (Qwen2.5-Coder) // For now, return a placeholder let insights = format!( "Lightweight analysis of {} files for query: '{}'\n\ This would contain quick insights from Qwen2.5-Coder.", llm_candidates.len(), query ); let duration = start.elapsed().as_secs_f64() * 1000.0; Ok((Some(insights), duration)) } /// Generate deep LLM insights (deep mode) async fn generate_llm_insights_deep( &self, query: &str, results: &[RerankedResult], ) -> Result<(Option<String>, f64)> { let start = std::time::Instant::now(); info!("🔬 Running deep LLM analysis on {} files", results.len()); // In production, this would call the local LLM with more context let insights = format!( "Deep analysis of {} files for query: '{}'\n\ This would contain comprehensive insights from Qwen2.5-Coder.", results.len(), query ); let duration = start.elapsed().as_secs_f64() * 1000.0; Ok((Some(insights), duration)) } /// Create a preset for fast agent-based workflows pub fn for_agent_workflow(embedding_generator: Arc<EmbeddingGenerator>) -> Self { let config = InsightsConfig { mode: InsightsMode::ContextOnly, reranker_config: RerankerConfig { embedding_top_k: 50, // More aggressive filtering embedding_threshold: 0.4, enable_cross_encoder: true, cross_encoder_top_k: 15, cross_encoder_threshold: 0.6, enable_llm_insights: false, // No local LLM llm_top_k: 0, enable_batch_processing: true, batch_size: 64, max_concurrent_requests: 8, }, max_context_length: 4000, include_metadata: true, }; Self::new(config, embedding_generator) } /// Create a preset for local LLM processing pub fn for_local_llm(embedding_generator: Arc<EmbeddingGenerator>) -> Self { let config = InsightsConfig { mode: InsightsMode::Balanced, reranker_config: RerankerConfig { embedding_top_k: 100, embedding_threshold: 0.3, enable_cross_encoder: true, cross_encoder_top_k: 20, cross_encoder_threshold: 0.5, enable_llm_insights: true, llm_top_k: 10, // Only top 10 to LLM enable_batch_processing: true, batch_size: 32, max_concurrent_requests: 4, }, max_context_length: 8000, include_metadata: true, }; Self::new(config, embedding_generator) } } #[cfg(test)] mod tests { use super::*; #[tokio::test] async fn test_insights_modes() { // Test that different modes produce different results let embedding_gen = Arc::new(EmbeddingGenerator::default()); let context_only = InsightsGenerator::for_agent_workflow(embedding_gen.clone()); let local_llm = InsightsGenerator::for_local_llm(embedding_gen.clone()); assert_eq!(context_only.config.mode, InsightsMode::ContextOnly); assert_eq!(local_llm.config.mode, InsightsMode::Balanced); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server