Skip to main content
Glama

CodeGraph CLI MCP Server

by Jakedismo
openai_compatible_provider.rs29.1 kB
use crate::llm_provider::*; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::time::Duration; /// Configuration for OpenAI-compatible providers (LM Studio, Ollama, etc.) #[derive(Debug, Clone, Serialize, Deserialize)] pub struct OpenAICompatibleConfig { /// Base URL for the API (e.g., "http://localhost:1234/v1") pub base_url: String, /// Model to use pub model: String, /// Maximum context window pub context_window: usize, /// Request timeout in seconds pub timeout_secs: u64, /// Maximum retries for failed requests pub max_retries: u32, /// Optional API key (some providers require it, some don't) pub api_key: Option<String>, /// Provider name for display purposes pub provider_name: String, /// Whether to use Responses API (true) or Chat Completions API (false) pub use_responses_api: bool, } impl Default for OpenAICompatibleConfig { fn default() -> Self { Self { base_url: "http://localhost:1234/v1".to_string(), model: "local-model".to_string(), context_window: 128_000, timeout_secs: 120, max_retries: 3, api_key: None, provider_name: "openai-compatible".to_string(), use_responses_api: true, // Responses API is now the default (99% provider support) } } } impl OpenAICompatibleConfig { /// Create config for LM Studio pub fn lm_studio(model: String) -> Self { Self { base_url: "http://localhost:1234/v1".to_string(), model, context_window: 128_000, provider_name: "lmstudio".to_string(), use_responses_api: true, // LM Studio supports Responses API (set CODEGRAPH_USE_COMPLETIONS_API=true for legacy) ..Default::default() } } /// Create config for Ollama (OpenAI-compatible endpoint) pub fn ollama(model: String) -> Self { Self { base_url: "http://localhost:11434/v1".to_string(), model, context_window: 128_000, provider_name: "ollama".to_string(), use_responses_api: false, // Ollama uses native API, not OpenAI-compatible protocol ..Default::default() } } /// Create config for custom endpoint pub fn custom(base_url: String, model: String, provider_name: String) -> Self { Self { base_url, model, provider_name, use_responses_api: true, // Default to Responses API (99% of providers support it) ..Default::default() } } } /// OpenAI-compatible LLM provider pub struct OpenAICompatibleProvider { config: OpenAICompatibleConfig, client: Client, } impl OpenAICompatibleProvider { /// Create a new OpenAI-compatible provider pub fn new(config: OpenAICompatibleConfig) -> Result<Self> { let client = Client::builder() .timeout(Duration::from_secs(config.timeout_secs)) .build() .context("Failed to create HTTP client")?; Ok(Self { config, client }) } /// Create for LM Studio pub fn lm_studio(model: String) -> Result<Self> { Self::new(OpenAICompatibleConfig::lm_studio(model)) } /// Create for Ollama pub fn ollama(model: String) -> Result<Self> { Self::new(OpenAICompatibleConfig::ollama(model)) } /// Send a request with retry logic async fn send_request( &self, messages: &[Message], config: &GenerationConfig, ) -> Result<ResponseAPIResponse> { let mut last_error = None; for attempt in 0..=self.config.max_retries { if attempt > 0 { // Exponential backoff: 1s, 2s, 4s let delay = Duration::from_secs(2u64.pow(attempt - 1)); tokio::time::sleep(delay).await; } match self.try_request(messages, config).await { Ok(response) => return Ok(response), Err(e) => { last_error = Some(e); if attempt < self.config.max_retries { tracing::warn!( "{} request failed (attempt {}/{}), retrying...", self.config.provider_name, attempt + 1, self.config.max_retries + 1 ); } } } } Err(last_error.unwrap_or_else(|| anyhow!("All retry attempts failed"))) } /// Try a single request using Responses API format async fn try_request( &self, messages: &[Message], config: &GenerationConfig, ) -> Result<ResponseAPIResponse> { if self.config.use_responses_api { self.try_responses_api_request(messages, config).await } else { self.try_chat_completions_request(messages, config).await } } /// Try request using Responses API async fn try_responses_api_request( &self, messages: &[Message], config: &GenerationConfig, ) -> Result<ResponseAPIResponse> { self.try_responses_api_request_with_tools(messages, None, config) .await .map(|(response, _)| response) } /// Try request using Responses API with optional tool calling support async fn try_responses_api_request_with_tools( &self, messages: &[Message], tools: Option<&[crate::llm_provider::ToolDefinition]>, config: &GenerationConfig, ) -> Result<( ResponseAPIResponse, Option<Vec<crate::llm_provider::ToolCall>>, )> { // Extract system instructions and user input let instructions = messages .iter() .find(|m| matches!(m.role, MessageRole::System)) .map(|m| m.content.clone()); let input = messages .iter() .filter(|m| !matches!(m.role, MessageRole::System)) .map(|m| format!("{}: {}", m.role, m.content)) .collect::<Vec<_>>() .join("\n\n"); // Convert CodeGraph ToolDefinition to Responses API format let responses_tools = tools.map(|t| { t.iter() .map(|tool| ResponsesAPITool { tool_type: "function".to_string(), name: tool.function.name.clone(), description: tool.function.description.clone(), parameters: tool.function.parameters.clone(), }) .collect() }); let request = ResponsesAPIRequest { model: self.config.model.clone(), input, instructions, max_completion_tokens: config.max_completion_token.or(config.max_tokens), reasoning_effort: config.reasoning_effort.clone(), top_p: config.top_p, stop: config.stop.clone(), response_format: config.response_format.clone(), tools: responses_tools, parallel_tool_calls: config.parallel_tool_calls, }; let mut request_builder = self .client .post(format!("{}/responses", self.config.base_url)) .header("Content-Type", "application/json") .json(&request); // Add API key if provided if let Some(api_key) = &self.config.api_key { request_builder = request_builder.header("Authorization", format!("Bearer {}", api_key)); } let response = request_builder.send().await.context(format!( "Failed to send request to {} Responses API at {}", self.config.provider_name, self.config.base_url ))?; let status = response.status(); if !status.is_success() { let error_text = response .text() .await .unwrap_or_else(|_| "Unknown error".to_string()); return Err(anyhow!( "{} Responses API error ({}): {}. \ If your provider doesn't support Responses API, \ set CODEGRAPH_USE_COMPLETIONS_API=true to use Chat Completions API instead.", self.config.provider_name, status, error_text )); } // Get raw response text for debugging let response_text = response.text().await.context(format!( "Failed to read {} Responses API response body", self.config.provider_name ))?; // Log the raw response for debugging tracing::debug!( provider = %self.config.provider_name, response = %response_text, "Raw Responses API response" ); // Parse the response let parsed: ResponseAPIResponse = serde_json::from_str::<ResponseAPIResponse>(&response_text).context(format!( "Failed to parse {} Responses API response. Raw response: {}. \ If your provider doesn't support Responses API, \ set CODEGRAPH_USE_COMPLETIONS_API=true to use Chat Completions API instead.", self.config.provider_name, response_text ))?; // Extract tool calls from function_call output items let tool_calls: Option<Vec<crate::llm_provider::ToolCall>> = { let calls: Vec<crate::llm_provider::ToolCall> = parsed .output .iter() .filter(|item| item.output_type == "function_call") .filter_map(|item| { let call_id = item.call_id.as_ref()?; let name = item.name.as_ref()?; let arguments = item.arguments.as_ref()?; Some(crate::llm_provider::ToolCall { id: call_id.clone(), call_type: "function".to_string(), function: crate::llm_provider::FunctionCall { name: name.clone(), arguments: arguments.clone(), }, }) }) .collect(); if calls.is_empty() { None } else { Some(calls) } }; Ok((parsed, tool_calls)) } /// Try request using Chat Completions API (fallback for older systems) async fn try_chat_completions_request( &self, messages: &[Message], config: &GenerationConfig, ) -> Result<ResponseAPIResponse> { self.try_chat_completions_request_with_tools(messages, None, config) .await .map(|(response, _)| response) } /// Try request using Chat Completions API with optional tool calling support async fn try_chat_completions_request_with_tools( &self, messages: &[Message], tools: Option<&[crate::llm_provider::ToolDefinition]>, config: &GenerationConfig, ) -> Result<( ResponseAPIResponse, Option<Vec<crate::llm_provider::ToolCall>>, )> { // Extract Ollama-native format from response_format for compatibility let ollama_format = config.response_format.as_ref().and_then(|rf| { match rf { crate::llm_provider::ResponseFormat::JsonSchema { json_schema } => { // For Ollama: Use just the schema object, not the nested structure Some(json_schema.schema.clone()) } crate::llm_provider::ResponseFormat::JsonObject => { // For basic JSON mode Some(serde_json::json!("json")) } crate::llm_provider::ResponseFormat::Text => None, } }); // Skip reasoning_effort for Ollama - it doesn't support this parameter // and may interpret it incorrectly as "think" let reasoning_effort = if self.config.provider_name == "ollama" { None } else { config.reasoning_effort.clone() }; let request = ChatCompletionsRequest { model: self.config.model.clone(), messages: messages .iter() .map(|m| ChatMessageRequest { role: m.role.to_string(), content: m.content.clone(), }) .collect(), max_tokens: config.max_tokens, max_completion_tokens: config.max_completion_token.or(config.max_tokens), reasoning_effort, top_p: config.top_p, stop: config.stop.clone(), response_format: config.response_format.clone(), format: ollama_format, tools: tools.map(|t| t.to_vec()), parallel_tool_calls: config.parallel_tool_calls, }; let mut request_builder = self .client .post(format!("{}/chat/completions", self.config.base_url)) .header("Content-Type", "application/json") .json(&request); // Add API key if provided if let Some(api_key) = &self.config.api_key { request_builder = request_builder.header("Authorization", format!("Bearer {}", api_key)); } let response = request_builder.send().await.context(format!( "Failed to send request to {} Chat Completions API at {}", self.config.provider_name, self.config.base_url ))?; let status = response.status(); if !status.is_success() { let error_text = response .text() .await .unwrap_or_else(|_| "Unknown error".to_string()); return Err(anyhow!( "{} API error ({}): {}", self.config.provider_name, status, error_text )); } let chat_response: ChatCompletionsResponse = response.json().await.context(format!( "Failed to parse {} Chat Completions API response", self.config.provider_name ))?; // Convert Chat Completions response to Responses API format let choice = chat_response .choices .first() .ok_or_else(|| anyhow!("No choices in response"))?; // Extract tool calls if present let tool_calls: Option<Vec<crate::llm_provider::ToolCall>> = choice.message.tool_calls.as_ref().map(|api_calls| { api_calls .iter() .map(|tc| crate::llm_provider::ToolCall { id: tc.id.clone(), call_type: tc.call_type.clone(), function: crate::llm_provider::FunctionCall { name: tc.function.name.clone(), arguments: tc.function.arguments.clone(), }, }) .collect() }); // Create output array structure matching GPT-5.1 format let output = vec![ResponseOutputItem { output_type: "message".to_string(), content: vec![ResponseOutputContent { content_type: "output_text".to_string(), text: choice.message.content.clone().unwrap_or_default(), }], call_id: None, name: None, arguments: None, }]; Ok(( ResponseAPIResponse { id: chat_response.id, object: "response".to_string(), status: choice.finish_reason.clone(), output, usage: chat_response.usage.map(|u| ResponseUsage { input_tokens: u.prompt_tokens, output_tokens: u.completion_tokens, total_tokens: u.total_tokens, }), }, tool_calls, )) } } #[async_trait] impl LLMProvider for OpenAICompatibleProvider { async fn generate_chat( &self, messages: &[Message], config: &GenerationConfig, ) -> LLMResult<LLMResponse> { let response = self.send_request(messages, config).await?; // Extract text from output array (matches OpenAI GPT-5.1 structure) // Response format: output[{type: "message", content: [{type: "output_text", text: "..."}]}] let content = response .output .iter() .filter(|item| item.output_type == "message") .flat_map(|item| &item.content) .filter(|c| c.content_type == "output_text") .map(|c| c.text.as_str()) .collect::<Vec<_>>() .join("\n"); let content = if content.is_empty() { serde_json::to_string(&response.output).unwrap_or_default() } else { content }; Ok(LLMResponse { answer: content.clone(), content, total_tokens: response.usage.as_ref().map(|u| u.total_tokens), prompt_tokens: response.usage.as_ref().map(|u| u.input_tokens), completion_tokens: response.usage.as_ref().map(|u| u.output_tokens), finish_reason: response.status.clone(), model: self.config.model.clone(), tool_calls: None, }) } async fn generate_chat_with_tools( &self, messages: &[Message], tools: Option<&[ToolDefinition]>, config: &GenerationConfig, ) -> LLMResult<LLMResponse> { // Use appropriate API based on configuration let (response, tool_calls) = if self.config.use_responses_api { // Use Responses API with native tool calling self.try_responses_api_request_with_tools(messages, tools, config) .await? } else { // Use Chat Completions API with native tool calling self.try_chat_completions_request_with_tools(messages, tools, config) .await? }; // Extract text from output array let content = response .output .iter() .filter(|item| item.output_type == "message") .flat_map(|item| &item.content) .filter(|c| c.content_type == "output_text") .map(|c| c.text.as_str()) .collect::<Vec<_>>() .join("\n"); let content = if content.is_empty() { serde_json::to_string(&response.output).unwrap_or_default() } else { content }; // Determine finish_reason - if we have tool calls, it should be "tool_calls" let finish_reason = if tool_calls.is_some() { Some("tool_calls".to_string()) } else { response.status.clone() }; tracing::info!( "generate_chat_with_tools: tool_calls={}, finish_reason={:?}", tool_calls.as_ref().map_or(0, |tc| tc.len()), finish_reason ); Ok(LLMResponse { answer: content.clone(), content, total_tokens: response.usage.as_ref().map(|u| u.total_tokens), prompt_tokens: response.usage.as_ref().map(|u| u.input_tokens), completion_tokens: response.usage.as_ref().map(|u| u.output_tokens), finish_reason, model: self.config.model.clone(), tool_calls, }) } async fn is_available(&self) -> bool { // Check if the endpoint is reachable let result = self .client .get(format!("{}/models", self.config.base_url)) .send() .await; result.is_ok() } fn provider_name(&self) -> &str { &self.config.provider_name } fn model_name(&self) -> &str { &self.config.model } fn characteristics(&self) -> ProviderCharacteristics { ProviderCharacteristics { max_tokens: self.config.context_window, avg_latency_ms: 1500, // Local models are typically slower rpm_limit: None, // No rate limits for local providers tpm_limit: None, supports_streaming: true, supports_functions: true, // Most modern providers support function calling } } } #[async_trait] impl CodeIntelligenceProvider for OpenAICompatibleProvider { async fn analyze_semantic_context(&self, query: &str, context: &str) -> LLMResult<String> { let messages = vec![ Message { role: MessageRole::System, content: "You are an expert code analyst. Analyze code context and answer queries with detailed, accurate information.".to_string(), }, Message { role: MessageRole::User, content: format!( "Context:\n{}\n\nQuery: {}\n\nProvide a detailed analysis:", context, query ), }, ]; let response = self .generate_chat(&messages, &GenerationConfig::default()) .await?; Ok(response.content) } async fn detect_patterns(&self, code_samples: &[String]) -> LLMResult<String> { let samples = code_samples.join("\n\n---\n\n"); let messages = vec![ Message { role: MessageRole::System, content: "You are an expert at identifying code patterns and best practices." .to_string(), }, Message { role: MessageRole::User, content: format!( "Analyze these code samples and identify common patterns:\n\n{}", samples ), }, ]; let response = self .generate_chat(&messages, &GenerationConfig::default()) .await?; Ok(response.content) } async fn analyze_impact(&self, target_code: &str, dependencies: &str) -> LLMResult<String> { let messages = vec![ Message { role: MessageRole::System, content: "You are an expert at analyzing code dependencies and change impact." .to_string(), }, Message { role: MessageRole::User, content: format!( "Target Code:\n{}\n\nDependencies:\n{}\n\nAnalyze the impact of changes:", target_code, dependencies ), }, ]; let response = self .generate_chat(&messages, &GenerationConfig::default()) .await?; Ok(response.content) } } // API request/response types for Responses API #[derive(Debug, Serialize)] struct ResponsesAPIRequest { model: String, input: String, #[serde(skip_serializing_if = "Option::is_none")] instructions: Option<String>, #[serde(rename = "max_output_tokens", skip_serializing_if = "Option::is_none")] max_completion_tokens: Option<usize>, #[serde(skip_serializing_if = "Option::is_none")] reasoning_effort: Option<String>, #[serde(skip_serializing_if = "Option::is_none")] top_p: Option<f32>, #[serde(skip_serializing_if = "Option::is_none")] stop: Option<Vec<String>>, #[serde(skip_serializing_if = "Option::is_none")] response_format: Option<crate::llm_provider::ResponseFormat>, /// Tools for function calling (Responses API format) #[serde(skip_serializing_if = "Option::is_none")] tools: Option<Vec<ResponsesAPITool>>, /// Whether to allow parallel tool calls (default: true for most models) #[serde(skip_serializing_if = "Option::is_none")] parallel_tool_calls: Option<bool>, } /// Tool definition for Responses API (different from Chat Completions API format) #[derive(Debug, Serialize)] struct ResponsesAPITool { #[serde(rename = "type")] tool_type: String, name: String, description: String, parameters: serde_json::Value, } #[derive(Debug, Deserialize)] struct ResponseAPIResponse { #[allow(dead_code)] id: String, #[allow(dead_code)] object: String, #[serde(default)] status: Option<String>, #[serde(default)] output: Vec<ResponseOutputItem>, #[serde(default)] usage: Option<ResponseUsage>, } #[derive(Debug, Serialize, Deserialize)] struct ResponseOutputItem { #[serde(rename = "type")] output_type: String, #[serde(default)] content: Vec<ResponseOutputContent>, /// Function call ID (for type: "function_call") #[serde(default)] call_id: Option<String>, /// Function name (for type: "function_call") #[serde(default)] name: Option<String>, /// Function arguments as JSON string (for type: "function_call") #[serde(default)] arguments: Option<String>, } #[derive(Debug, Serialize, Deserialize)] struct ResponseOutputContent { #[serde(rename = "type")] content_type: String, #[serde(default)] text: String, } #[derive(Debug, Deserialize)] struct ResponseUsage { input_tokens: usize, output_tokens: usize, total_tokens: usize, } // API request/response types for Chat Completions API (fallback) #[derive(Debug, Serialize)] struct ChatCompletionsRequest { model: String, messages: Vec<ChatMessageRequest>, #[serde(skip_serializing_if = "Option::is_none")] max_tokens: Option<usize>, #[serde(skip_serializing_if = "Option::is_none")] max_completion_tokens: Option<usize>, #[serde(skip_serializing_if = "Option::is_none")] reasoning_effort: Option<String>, #[serde(skip_serializing_if = "Option::is_none")] top_p: Option<f32>, #[serde(skip_serializing_if = "Option::is_none")] stop: Option<Vec<String>>, #[serde(skip_serializing_if = "Option::is_none")] response_format: Option<crate::llm_provider::ResponseFormat>, /// Ollama-native format field (extracted from response_format for Ollama compatibility) #[serde(skip_serializing_if = "Option::is_none")] format: Option<serde_json::Value>, /// Tools for function calling (OpenAI Chat Completions API) #[serde(skip_serializing_if = "Option::is_none")] tools: Option<Vec<crate::llm_provider::ToolDefinition>>, /// Whether to allow parallel tool calls (default: true for most models) #[serde(skip_serializing_if = "Option::is_none")] parallel_tool_calls: Option<bool>, } #[derive(Debug, Serialize)] struct ChatMessageRequest { role: String, content: String, } #[derive(Debug, Deserialize)] #[allow(dead_code)] struct ChatMessageResponse { role: String, #[serde(default)] content: Option<String>, /// Tool calls from the LLM (OpenAI native tool calling) #[serde(default)] tool_calls: Option<Vec<ApiToolCall>>, } /// OpenAI API tool call structure #[derive(Debug, Deserialize)] struct ApiToolCall { id: String, #[serde(rename = "type")] call_type: String, function: ApiFunctionCall, } /// OpenAI API function call structure #[derive(Debug, Deserialize)] struct ApiFunctionCall { name: String, arguments: String, } #[derive(Debug, Deserialize)] struct ChatCompletionsResponse { id: String, #[allow(dead_code)] object: String, choices: Vec<ChatChoice>, usage: Option<ChatUsage>, } #[derive(Debug, Deserialize)] struct ChatChoice { message: ChatMessageResponse, finish_reason: Option<String>, } #[derive(Debug, Deserialize)] struct ChatUsage { prompt_tokens: usize, completion_tokens: usize, total_tokens: usize, } #[cfg(test)] mod tests { use super::*; #[test] fn test_lm_studio_config() { let config = OpenAICompatibleConfig::lm_studio("test-model".to_string()); assert_eq!(config.base_url, "http://localhost:1234/v1"); assert_eq!(config.provider_name, "lmstudio"); assert!(config.use_responses_api); // LM Studio now defaults to Responses API } #[test] fn test_ollama_config() { let config = OpenAICompatibleConfig::ollama("llama3".to_string()); assert_eq!(config.base_url, "http://localhost:11434/v1"); assert_eq!(config.provider_name, "ollama"); assert!(!config.use_responses_api); // Ollama uses native API } #[test] fn test_default_config() { let config = OpenAICompatibleConfig::default(); assert!(config.use_responses_api); // Responses API is now the default } #[test] fn test_custom_config() { let config = OpenAICompatibleConfig::custom( "http://localhost:8080/v1".to_string(), "custom-model".to_string(), "custom-provider".to_string(), ); assert!(config.use_responses_api); // Custom configs default to Responses API } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Jakedismo/codegraph-rust'

If you have feedback or need assistance with the MCP directory API, please join our Discord server