CodeGraph CLI MCP Server

llm_provider.rs•5.9 KiB

use async_trait::async_trait; use serde::{Deserialize, Serialize}; use std::env; use std::fmt; /// Result type for LLM operations pub type LLMResult<T> = anyhow::Result<T>; /// Performance characteristics of an LLM provider #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ProviderCharacteristics { /// Maximum tokens that can be processed in a single request pub max_tokens: usize, /// Typical latency in milliseconds (for estimation) pub avg_latency_ms: u64, /// Requests per minute limit (for rate limiting) pub rpm_limit: Option<u64>, /// Tokens per minute limit (for rate limiting) pub tpm_limit: Option<u64>, /// Whether the provider supports streaming responses pub supports_streaming: bool, /// Whether the provider supports function calling pub supports_functions: bool, } /// Configuration for generation parameters #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GenerationConfig { /// Temperature for sampling (0.0 to 2.0) - Not supported by reasoning models pub temperature: f32, /// Maximum tokens to generate (legacy parameter for Chat Completions API) pub max_tokens: Option<usize>, /// Maximum output tokens (for Responses API and reasoning models) pub max_completion_token: Option<usize>, /// Reasoning effort for reasoning models: "minimal", "medium", "high" pub reasoning_effort: Option<String>, /// Top-p nucleus sampling parameter - Not supported by reasoning models pub top_p: Option<f32>, /// Frequency penalty (-2.0 to 2.0) - Not supported by reasoning models pub frequency_penalty: Option<f32>, /// Presence penalty (-2.0 to 2.0) - Not supported by reasoning models pub presence_penalty: Option<f32>, /// Stop sequences pub stop: Option<Vec<String>>, } impl Default for GenerationConfig { fn default() -> Self { Self { temperature: 0.1, max_tokens: Some(4096), max_completion_token: None, // Will use max_tokens if not set reasoning_effort: default_reasoning_effort(), top_p: None, frequency_penalty: None, presence_penalty: None, stop: None, } } } fn default_reasoning_effort() -> Option<String> { env::var("CODEGRAPH_REASONING_EFFORT") .ok() .filter(|v| matches!(v.as_str(), "minimal" | "medium" | "high")) .or_else(|| Some("medium".to_string())) } /// A message in the conversation #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Message { pub role: MessageRole, pub content: String, } /// Role of a message in the conversation #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum MessageRole { System, User, Assistant, } impl fmt::Display for MessageRole { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { MessageRole::System => write!(f, "system"), MessageRole::User => write!(f, "user"), MessageRole::Assistant => write!(f, "assistant"), } } } /// Response from the LLM #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LLMResponse { /// Generated text content pub content: String, /// Total tokens used in the request pub total_tokens: Option<usize>, /// Tokens used in the prompt pub prompt_tokens: Option<usize>, /// Tokens generated in the completion pub completion_tokens: Option<usize>, /// Finish reason (e.g., "stop", "length", "function_call") pub finish_reason: Option<String>, /// Model used for generation pub model: String, } /// Metrics for LLM operations #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LLMMetrics { /// Total time taken for the request in milliseconds pub duration_ms: u64, /// Total tokens used pub total_tokens: usize, /// Tokens per second pub tokens_per_second: f64, /// Whether the request was cached pub cached: bool, } /// Main trait for LLM providers #[async_trait] pub trait LLMProvider: Send + Sync { /// Generate a completion for a single prompt async fn generate(&self, prompt: &str) -> LLMResult<LLMResponse> { let messages = vec![Message { role: MessageRole::User, content: prompt.to_string(), }]; self.generate_chat(&messages, &GenerationConfig::default()) .await } /// Generate a completion with custom configuration async fn generate_with_config( &self, prompt: &str, config: &GenerationConfig, ) -> LLMResult<LLMResponse> { let messages = vec![Message { role: MessageRole::User, content: prompt.to_string(), }]; self.generate_chat(&messages, config).await } /// Generate a chat completion with message history async fn generate_chat( &self, messages: &[Message], config: &GenerationConfig, ) -> LLMResult<LLMResponse>; /// Check if the provider is available and ready async fn is_available(&self) -> bool; /// Get the name of this provider fn provider_name(&self) -> &str; /// Get the model identifier fn model_name(&self) -> &str; /// Get performance characteristics fn characteristics(&self) -> ProviderCharacteristics; /// Get the maximum context window size fn context_window(&self) -> usize { self.characteristics().max_tokens } } /// Code intelligence capabilities (specialized for code analysis) #[async_trait] pub trait CodeIntelligenceProvider: LLMProvider { /// Analyze semantic context of code async fn analyze_semantic_context(&self, query: &str, context: &str) -> LLMResult<String>; /// Detect patterns in code samples async fn detect_patterns(&self, code_samples: &[String]) -> LLMResult<String>; /// Analyze impact of changes async fn analyze_impact(&self, target_code: &str, dependencies: &str) -> LLMResult<String>; }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

llm_provider.rs•5.9 KiB