Skip to main content
Glama
openaiService.ts45.9 kB
/** * @fileOverview: OpenAI service with provider-specific configurations and compatibility handling * @module: OpenAIService * @keyFunctions: * - createChatCompletion(): Unified interface for chat completions across providers * - getProviderConfig(): Get provider-specific configuration * - normalizeParameters(): Normalize parameters for each provider * @dependencies: * - openai: Official OpenAI SDK * - logger: Logging utilities * @context: Handles differences between OpenAI-compatible providers (Qwen, Azure, etc.) with specific parameter requirements */ import OpenAI from 'openai'; import type { Response as ResponsesResponse } from 'openai/resources/responses/responses'; import type { ChatCompletionMessageParam } from 'openai/resources/chat/completions'; import { logger } from '../utils/logger'; import { validateDynamicSignal, Message, ValidationError } from './validation'; // Provider types export type ProviderType = | 'openai' | 'qwen' | 'azure' | 'anthropic' | 'together' | 'openrouter' | 'grok' | 'groq' | 'custom'; // Provider-specific configurations interface ProviderConfig { name: string; requiresTemperatureOne?: boolean; maxTokensParam?: string; // 'max_tokens' for most, could be 'maxTokens' for some supportsStreaming?: boolean; defaultModel: string; defaultMiniModel: string; defaultEmbeddingsModel?: string; baseUrl?: string; } // Provider-specific API key environment variable priority export const PROVIDER_API_KEY_ENV: Record<ProviderType, string[]> = { openai: ['OPENAI_API_KEY'], qwen: ['QWEN_API_KEY', 'DASHSCOPE_API_KEY', 'OPENAI_API_KEY'], azure: ['AZURE_OPENAI_API_KEY', 'OPENAI_API_KEY'], anthropic: ['ANTHROPIC_API_KEY', 'OPENAI_API_KEY'], together: ['TOGETHER_API_KEY', 'OPENAI_API_KEY'], openrouter: ['OPENROUTER_API_KEY', 'OPENAI_API_KEY'], grok: ['XAI_API_KEY', 'GROK_API_KEY', 'OPENAI_API_KEY'], groq: ['GROQ_API_KEY', 'OPENAI_API_KEY'], custom: ['OPENAI_API_KEY'], }; export function resolveProviderApiKey(provider: ProviderType): string | undefined { const envKeys = PROVIDER_API_KEY_ENV[provider] || PROVIDER_API_KEY_ENV.openai; for (const key of envKeys) { const value = process.env[key]; if (value && value.trim().length > 0) { return value; } } return undefined; } // Model configurations interface ModelConfig { maxTokensLimit: number; supportsFunctions?: boolean; supportsTools?: boolean; disallowMaxTokens?: boolean; enforceTemperature?: number; } // Service configuration interface OpenAIServiceConfig { apiKey: string; provider: ProviderType; model?: string; miniModel?: string; embeddingsModel?: string; baseUrl?: string; organization?: string; } export class OpenAIService { private client: OpenAI; private config: OpenAIServiceConfig; private providerConfig: ProviderConfig; private modelConfig: ModelConfig; constructor(config: OpenAIServiceConfig) { this.config = config; // Get provider-specific configuration this.providerConfig = this.getProviderConfig(config.provider); // Set base URL if provided, or use OPENAI_BASE_URL env var, or use provider default, or use OpenAI default const baseUrl = config.baseUrl || process.env.OPENAI_BASE_URL || this.providerConfig.baseUrl || 'https://api.openai.com/v1'; // Resolve provider by base URL if using a custom endpoint const resolvedProvider = this.resolveProvider(config.provider, baseUrl); if (resolvedProvider !== config.provider) { this.providerConfig = this.getProviderConfig(resolvedProvider); } // Initialize OpenAI client this.client = new OpenAI({ apiKey: config.apiKey, baseURL: baseUrl, organization: config.organization, dangerouslyAllowBrowser: true, // For browser environments if needed }); // Get model configuration const modelName = config.model || this.providerConfig.defaultModel; this.modelConfig = this.getModelConfig(modelName); logger.info('OpenAI Service initialized', { provider: this.providerConfig.name, model: modelName, miniModel: config.miniModel || this.providerConfig.defaultMiniModel, embeddingsModel: this.getEmbeddingsModel(), baseUrl, }); } /** * Perform a minimal live connectivity probe to verify the API key and endpoint work. * Tries to list models; falls back to a tiny completion if listing isn't available. * Returns true on success within timeout, false on error/timeout. */ async quickProbe(timeoutMs: number = 3000): Promise<boolean> { const timeout = new Promise<never>((_, reject) => { const id = setTimeout( () => { clearTimeout(id); reject(new Error('probe-timeout')); }, Math.max(500, timeoutMs) ); }); const tryList = async () => { try { // Some OpenAI-compatible providers may not support listing models // If it throws 404/401, treat as failure and try completion fallback const res = await this.client.models.list(); return !!res && Array.isArray((res as any).data); } catch { return false; } }; const tryTinyCompletion = async () => { const probeModel = this.getModelForTask('mini'); try { if (this.isReasoningModel(probeModel)) { await this.client.responses.create({ model: probeModel, input: [ { role: 'user', content: [{ type: 'input_text', text: 'ping' }], }, ], reasoning: { effort: 'low' }, max_output_tokens: 1, }); } else { await this.client.chat.completions.create({ model: probeModel, messages: [{ role: 'system', content: 'ping' }], max_tokens: 1, temperature: 1, } as any); } return true; } catch { return false; } }; try { const ok = await Promise.race([ (async () => (await tryList()) || (await tryTinyCompletion()))(), timeout, ] as [Promise<boolean>, Promise<never>]); return !!ok; } catch { return false; } } /** * Get provider-specific configuration */ private getProviderConfig(provider: ProviderType): ProviderConfig { const configs: Record<ProviderType, ProviderConfig> = { openai: { name: 'OpenAI', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'gpt-5', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'gpt-5-mini', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL || 'text-embedding-3-large', maxTokensParam: 'max_tokens', baseUrl: 'https://api.openai.com/v1', }, qwen: { name: 'Qwen', requiresTemperatureOne: true, maxTokensParam: 'max_tokens', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'qwen-plus', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'qwen-turbo', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, // override via env when using Qwen-compatible embeddings baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1', }, azure: { name: 'Azure OpenAI', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'gpt-5', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'gpt-5-mini', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL || 'text-embedding-3-large', baseUrl: process.env.AZURE_OPENAI_ENDPOINT, }, anthropic: { name: 'Anthropic', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'claude-4-sonnet-latest', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'claude-4-haiku-20240307', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, // Anthropic does not provide embeddings; rely on env if proxying baseUrl: 'https://api.anthropic.com/v1', }, together: { name: 'Together AI', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'mistralai/Mixtral-8x7B-Instruct-v0.1', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'mistralai/Mistral-7B-Instruct-v0.2', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, // set for Together if applicable baseUrl: 'https://api.together.xyz/v1', }, openrouter: { name: 'OpenRouter', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'openrouter/auto', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'openrouter/auto-mini', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, baseUrl: 'https://openrouter.ai/api/v1', }, grok: { name: 'xAI Grok', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'grok-2-latest', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'grok-2-mini', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, baseUrl: 'https://api.x.ai/v1', }, groq: { name: 'Groq', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'llama-3.1-70b-versatile', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'llama-3.1-8b-instant', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, baseUrl: 'https://api.groq.com/openai/v1', }, custom: { name: 'Custom Provider', supportsStreaming: true, defaultModel: process.env.OPENAI_BASE_MODEL || 'gpt-5', defaultMiniModel: process.env.OPENAI_MINI_MODEL || 'gpt-5-mini', defaultEmbeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL || 'text-embedding-3-large', }, }; return configs[provider]; } /** * Resolve provider by base URL for common OpenAI-compatible services */ private resolveProvider(provider: ProviderType, baseUrl?: string): ProviderType { if (!baseUrl) return provider; try { const host = new URL(baseUrl).host.toLowerCase(); const shouldResolve = provider === 'custom' || provider === 'openai'; if (!shouldResolve) { return provider; } if (host.includes('openrouter.ai')) return 'openrouter'; if (host.includes('api.x.ai') || host.endsWith('.x.ai')) return 'grok'; if (host.includes('groq.com')) return 'groq'; if (host.includes('together.xyz')) return 'together'; if (host.includes('aliyuncs.com') || host.includes('qwen')) return 'qwen'; if (host.includes('anthropic.com')) return 'anthropic'; if (host.includes('azure')) return 'azure'; if (host.includes('openai.com')) return 'openai'; return provider; } catch { return provider; } } /** * Get model-specific configuration */ private getModelConfig(model: string): ModelConfig { // This would typically be more comprehensive const modelConfigs: Record<string, ModelConfig> = { 'gpt-5': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true, disallowMaxTokens: true, enforceTemperature: 1, }, 'gpt-5-mini': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true, disallowMaxTokens: true, enforceTemperature: 1, }, 'gpt-5-nano': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true, disallowMaxTokens: true, enforceTemperature: 1, }, 'gpt-4.1': { maxTokensLimit: 128000, supportsFunctions: true, supportsTools: true, }, 'gpt-4.1-mini': { maxTokensLimit: 128000, supportsFunctions: true, supportsTools: true, }, 'gpt-4': { maxTokensLimit: 8192, supportsFunctions: true, supportsTools: true }, 'gpt-4o': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true }, 'gpt-4o-mini': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true }, 'gpt-3.5-turbo': { maxTokensLimit: 4096, supportsFunctions: true, supportsTools: true }, 'qwen-plus': { maxTokensLimit: 32768, supportsFunctions: true, supportsTools: true }, 'qwen-turbo': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true }, 'qwen-max': { maxTokensLimit: 32768, supportsFunctions: true, supportsTools: true }, 'claude-3-5-sonnet-latest': { maxTokensLimit: 200000, supportsFunctions: false, supportsTools: true, }, 'claude-3-haiku-20240307': { maxTokensLimit: 200000, supportsFunctions: false, supportsTools: true, }, 'mistralai/Mixtral-8x7B-Instruct-v0.1': { maxTokensLimit: 32768, supportsFunctions: true, supportsTools: true, }, 'mistralai/Mistral-7B-Instruct-v0.2': { maxTokensLimit: 32768, supportsFunctions: true, supportsTools: true, }, 'openrouter/auto': { maxTokensLimit: 128000, supportsFunctions: true, supportsTools: true, }, 'openrouter/auto-mini': { maxTokensLimit: 64000, supportsFunctions: true, supportsTools: true, }, 'grok-2-latest': { maxTokensLimit: 32768, supportsFunctions: true, supportsTools: true, }, 'grok-2-mini': { maxTokensLimit: 16384, supportsFunctions: true, supportsTools: true, }, 'llama-3.1-70b-versatile': { maxTokensLimit: 8192, supportsFunctions: true, supportsTools: true, }, 'llama-3.1-8b-instant': { maxTokensLimit: 8192, supportsFunctions: true, supportsTools: true, }, }; // Return specific config or default return ( modelConfigs[model] || { maxTokensLimit: 4096, supportsFunctions: true, supportsTools: true } ); } /** * Detect whether a model should use the Responses API (reasoning models like GPT-5) */ private isReasoningModel(model?: string): boolean { if (!model) return false; const normalized = model.toLowerCase(); return normalized.startsWith('gpt-5'); } /** * Normalize chat message content into Responses API content blocks */ private normalizeResponseContent( content: ChatCompletionMessageParam['content'] ): Array<Record<string, any>> { if (content === null || content === undefined) { return [{ type: 'input_text', text: '' }]; } if (typeof content === 'string') { return [{ type: 'input_text', text: content }]; } if (Array.isArray(content)) { const parts = content .map(part => { if (typeof part === 'string') { return { type: 'input_text', text: part }; } if (part && typeof part === 'object') { if ('type' in part) { if ((part as any).type === 'input_text' && 'text' in part) { return { type: 'input_text', text: (part as any).text ?? '' }; } if ((part as any).type === 'text' && 'text' in part) { return { type: 'input_text', text: (part as any).text ?? '' }; } if ( ['input_text', 'input_image', 'input_file'].includes((part as any).type as string) ) { return { ...part }; } return { type: 'input_text', text: JSON.stringify(part) }; } if ('text' in part) { return { type: 'input_text', text: (part as any).text ?? '' }; } return { type: 'input_text', text: JSON.stringify(part) }; } return null; }) .filter(Boolean) as Array<Record<string, any>>; return parts.length ? parts : [{ type: 'input_text', text: '' }]; } if (content && typeof content === 'object') { const block = content as Record<string, any>; if ('type' in block) { if (block.type === 'input_text') { return [{ type: 'input_text', text: block.text ?? '' }]; } if (block.type === 'text') { return [{ type: 'input_text', text: block.text ?? '' }]; } if (['input_text', 'input_image', 'input_file'].includes(block.type)) { return [{ ...block }]; } return [{ type: 'input_text', text: JSON.stringify(block) }]; } if ('text' in block) { return [{ type: 'input_text', text: block.text ?? '' }]; } return [{ type: 'input_text', text: JSON.stringify(block) }]; } return [{ type: 'input_text', text: String(content) }]; } /** * Convert Chat Completions message array into Responses API input format */ private transformMessagesToResponseInput( messages: ChatCompletionMessageParam[] ): Array<Record<string, any>> { return messages.map(message => { const converted: Record<string, any> = { role: (message as any).role, content: this.normalizeResponseContent(message.content ?? ''), }; if ((message as any).name) { converted.name = (message as any).name; } if ((message as any).tool_call_id) { converted.tool_call_id = (message as any).tool_call_id; } if ((message as any).metadata) { converted.metadata = (message as any).metadata; } if (!converted.content || converted.content.length === 0) { converted.content = [{ type: 'input_text', text: '' }]; } return converted; }); } /** * Derive a reasoning effort value, preferring explicit settings and falling back to temperature */ private resolveReasoningEffort( params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming ): 'low' | 'medium' | 'high' { const explicit = (params as any).reasoning; if (explicit && typeof explicit === 'object' && typeof explicit.effort === 'string') { return explicit.effort as 'low' | 'medium' | 'high'; } const temperature = typeof params.temperature === 'number' ? params.temperature : null; if (temperature !== null) { if (temperature <= 0.2) return 'low'; if (temperature >= 0.8) return 'high'; } return 'medium'; } /** * Extract and cap max_output_tokens for reasoning models */ private extractMaxOutputTokens( params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming, modelConfig: ModelConfig ): number | undefined { const candidateValues = [ (params as any).max_output_tokens, (params as any).max_completion_tokens, params.max_tokens, ]; const firstDefined = candidateValues.find( value => typeof value === 'number' && Number.isFinite(value) ) as number | undefined; if (firstDefined === undefined) { return undefined; } const capped = Math.min(firstDefined, modelConfig.maxTokensLimit); if (capped !== firstDefined) { logger.warn( `max_output_tokens (${firstDefined}) exceeds model limit (${modelConfig.maxTokensLimit}), capping value` ); } return capped; } /** * Convert a Responses API result into a Chat Completions-compatible payload */ private adaptResponseToChatCompletion( response: ResponsesResponse ): OpenAI.Chat.Completions.ChatCompletion { const textSegments: string[] = []; const outputItems = Array.isArray((response as any).output) ? ((response as any).output as Array<Record<string, any>>) : []; for (const item of outputItems) { if (!item || typeof item !== 'object') continue; if (item.type !== 'message') continue; const contentBlocks = Array.isArray(item.content) ? item.content : []; for (const block of contentBlocks) { if (block && typeof block === 'object' && 'text' in block) { const textValue = (block as any).text; if (typeof textValue === 'string') { textSegments.push(textValue); } } else if (typeof block === 'string') { textSegments.push(block); } } } const aggregatedText = typeof (response as any).output_text === 'string' ? (response as any).output_text : textSegments.join(''); const responseStatus = (response as any).status; const incompleteReason = (response as any).incomplete_details?.reason; const originalFinishReason = outputItems.find(item => item?.stop_reason)?.stop_reason || outputItems.find(item => item?.finish_reason)?.finish_reason || null; let finishReason = originalFinishReason || 'stop'; let truncated = false; if (incompleteReason === 'max_output_tokens') { truncated = true; finishReason = 'length'; } else if (incompleteReason === 'content_filter') { finishReason = 'content_filter'; } else if (finishReason === 'max_output_tokens') { truncated = true; finishReason = 'length'; } else if (finishReason === 'length') { truncated = true; } if (responseStatus === 'incomplete' && !truncated && finishReason === 'stop') { truncated = true; finishReason = 'length'; } if (truncated) { logger.warn('Reasoning response truncated', { model: response.model, status: responseStatus, incompleteReason, originalFinishReason, }); } else if (incompleteReason === 'content_filter') { logger.warn('Reasoning response blocked by content filter', { model: response.model, status: responseStatus, }); } const promptTokens = (response as any).usage?.prompt_tokens ?? (response as any).usage?.input_tokens ?? 0; const completionTokens = (response as any).usage?.completion_tokens ?? (response as any).usage?.output_tokens ?? 0; const totalTokens = (response as any).usage?.total_tokens ?? (promptTokens || 0) + (completionTokens || 0); const completion: OpenAI.Chat.Completions.ChatCompletion = { id: response.id, object: 'chat.completion', created: (response as any).created ?? Math.floor(Date.now() / 1000), model: response.model, choices: [ { index: 0, finish_reason: finishReason || 'stop', logprobs: null, message: { role: 'assistant', content: aggregatedText, refusal: null, }, }, ], usage: (response as any).usage ? { prompt_tokens: promptTokens, completion_tokens: completionTokens, total_tokens: totalTokens, } : undefined, }; (completion as any).response_metadata = { status: responseStatus ?? 'completed', incomplete_reason: incompleteReason ?? null, original_finish_reason: originalFinishReason, truncated, cache_status: ((response as any).metadata && typeof (response as any).metadata === 'object' && 'response_cache' in (response as any).metadata && (response as any).metadata?.response_cache) || (response as any).response_cache || null, }; const providerMetadata = (response as any).metadata; if (providerMetadata && typeof providerMetadata === 'object') { (completion as any).response_metadata.provider_metadata = providerMetadata; } return completion; } /** * Handle GPT-5 style reasoning models via the Responses API while returning a chat-like payload */ private async createReasoningCompletion( params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming ): Promise<OpenAI.Chat.Completions.ChatCompletion> { const targetModel = params.model || this.getModelForTask('base'); const runtimeModelConfig = this.getModelConfig(targetModel); let maxOutputTokens = this.extractMaxOutputTokens(params, runtimeModelConfig); const initialMaxOutputTokens = maxOutputTokens; if (typeof params.temperature === 'number') { logger.info('Reasoning models ignore temperature; using reasoning.effort instead', { model: targetModel, providedTemperature: params.temperature, }); } const initialReasoningEffort = this.resolveReasoningEffort(params); let reasoningEffort: 'low' | 'medium' | 'high' = initialReasoningEffort; const baseMessages = this.transformMessagesToResponseInput( (params.messages as ChatCompletionMessageParam[]) || [] ); const baseMessagesJson = JSON.stringify(baseMessages); const baseToolsJson = (params as any).tools ? JSON.stringify((params as any).tools) : null; const baseToolChoiceJson = (params as any).tool_choice ? JSON.stringify((params as any).tool_choice) : null; const baseReasoning = (params as any).reasoning && typeof (params as any).reasoning === 'object' ? { ...(params as any).reasoning } : {}; const passthroughKeys: Array<keyof typeof params> = ['metadata', 'response_format', 'user']; const passthroughJson: Record<string, string> = {}; for (const key of passthroughKeys) { const value = (params as any)[key]; if (value !== undefined) { passthroughJson[key] = JSON.stringify(value); } } const buildRequest = (): Record<string, any> => { const request: Record<string, any> = { model: targetModel, input: JSON.parse(baseMessagesJson), reasoning: { ...baseReasoning, effort: reasoningEffort, }, }; if (maxOutputTokens !== undefined) { request.max_output_tokens = maxOutputTokens; } for (const key of passthroughKeys) { if (passthroughJson[key]) { request[key] = JSON.parse(passthroughJson[key]); } } if (baseToolsJson) { request.tools = JSON.parse(baseToolsJson); } if (baseToolChoiceJson) { request.tool_choice = JSON.parse(baseToolChoiceJson); } return request; }; const maxAttempts = 3; let attempt = 0; let response: ResponsesResponse | null = null; let lastStatus: string | undefined; let lastIncompleteReason: string | undefined; let lastRequestMaxOutputTokens: number | undefined; let lastRequestReasoningEffort: 'low' | 'medium' | 'high' = reasoningEffort; const adjustments = { increasedMaxOutputTokens: false, loweredReasoningEffort: false, }; const fallbackBaseMax = Math.min(4096, runtimeModelConfig.maxTokensLimit); try { while (attempt < maxAttempts) { attempt += 1; const request = buildRequest(); lastRequestMaxOutputTokens = request.max_output_tokens; lastRequestReasoningEffort = request.reasoning?.effort as 'low' | 'medium' | 'high'; logger.info('Creating reasoning response via Responses API', { model: targetModel, attempt, maxOutputTokens: request.max_output_tokens, reasoningEffort: request.reasoning?.effort, }); response = await this.client.responses.create(request); lastStatus = (response as any).status; lastIncompleteReason = (response as any).incomplete_details?.reason; const truncatedByTokens = lastStatus === 'incomplete' && lastIncompleteReason === 'max_output_tokens'; if (truncatedByTokens) { logger.warn('Reasoning response truncated by max_output_tokens', { model: targetModel, attempt, requestedMaxOutputTokens: request.max_output_tokens, reasoningEffort: request.reasoning?.effort, }); if (!adjustments.increasedMaxOutputTokens) { const current = maxOutputTokens ?? 0; const candidateBase = current === 0 ? fallbackBaseMax : current; const boosted = Math.min( runtimeModelConfig.maxTokensLimit, Math.max(candidateBase + 512, Math.ceil(candidateBase * 1.5)) ); if (boosted > candidateBase) { maxOutputTokens = boosted; adjustments.increasedMaxOutputTokens = true; logger.info('Retrying reasoning response with increased max_output_tokens', { model: targetModel, newMaxOutputTokens: maxOutputTokens, }); continue; } } if (!adjustments.loweredReasoningEffort && reasoningEffort !== 'low') { const nextEffort = reasoningEffort === 'high' ? 'medium' : 'low'; reasoningEffort = nextEffort; adjustments.loweredReasoningEffort = true; logger.info('Retrying reasoning response with reduced reasoning effort', { model: targetModel, reasoningEffort, }); continue; } } // Either successful or cannot adjust further break; } if (!response) { throw new Error('Failed to obtain response from reasoning model'); } logger.info('Reasoning response received', { model: targetModel, status: (response as any).status, usage: (response as any).usage, attempts: attempt, }); const completion = this.adaptResponseToChatCompletion(response); const metadata = ((completion as any).response_metadata ?? {}) as Record<string, any>; metadata.attempts = attempt; metadata.initial_max_output_tokens = initialMaxOutputTokens ?? null; metadata.used_max_output_tokens = lastRequestMaxOutputTokens ?? null; metadata.initial_reasoning_effort = initialReasoningEffort; metadata.reasoning_effort = lastRequestReasoningEffort; metadata.adjustments = { increased_max_output_tokens: adjustments.increasedMaxOutputTokens, lowered_reasoning_effort: adjustments.loweredReasoningEffort, }; if (metadata.cache_status) { logger.info('Reasoning response cache metadata', { model: targetModel, cache: metadata.cache_status, }); } const truncatedResult = metadata.truncated === true && lastIncompleteReason === 'max_output_tokens'; if (truncatedResult) { metadata.need_more_budget = true; metadata.partial_response = completion.choices?.[0]?.message?.content ?? ''; if ( lastRequestMaxOutputTokens && lastRequestMaxOutputTokens < runtimeModelConfig.maxTokensLimit ) { const suggested = Math.min( runtimeModelConfig.maxTokensLimit, Math.max(lastRequestMaxOutputTokens + 512, Math.ceil(lastRequestMaxOutputTokens * 1.5)) ); metadata.suggested_max_output_tokens = suggested; } if (lastRequestReasoningEffort !== 'low') { metadata.suggested_reasoning_effort = lastRequestReasoningEffort === 'high' ? 'medium' : 'low'; } logger.warn('Reasoning response requires additional token budget', { model: targetModel, status: lastStatus, incompleteReason: lastIncompleteReason, attempts: attempt, }); } (completion as any).response_metadata = metadata; return completion; } catch (error) { logger.error('Reasoning response failed', { model: targetModel, error: error instanceof Error ? error.message : String(error), }); throw error; } } /** * Normalize parameters for the specific provider */ private normalizeParameters( params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming ): OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming { const normalizedParams = { ...params }; // Handle temperature requirement for Qwen if (this.providerConfig.requiresTemperatureOne && normalizedParams.temperature !== 1) { logger.warn( `Provider ${this.providerConfig.name} requires temperature=1, overriding provided value`, { originalTemperature: normalizedParams.temperature, forcedTemperature: 1, } ); normalizedParams.temperature = 1; } // Map max_completion_tokens (modern) to max_tokens for legacy chat completions if ( (normalizedParams as any).max_completion_tokens !== undefined && (normalizedParams as any).max_tokens === undefined ) { (normalizedParams as any).max_tokens = (normalizedParams as any).max_completion_tokens; delete (normalizedParams as any).max_completion_tokens; } // Handle provider-specific max tokens parameter name (future-proofing) if (this.providerConfig.maxTokensParam && this.providerConfig.maxTokensParam !== 'max_tokens') { // Most providers we target accept 'max_tokens'; adapter left as-is for compatibility. } // Model-specific enforced temperature if ( this.modelConfig.enforceTemperature !== undefined && normalizedParams.temperature !== this.modelConfig.enforceTemperature ) { logger.warn( `Model ${normalizedParams.model} enforces temperature=${this.modelConfig.enforceTemperature}, overriding provided value`, { originalTemperature: normalizedParams.temperature, forcedTemperature: this.modelConfig.enforceTemperature, } ); normalizedParams.temperature = this.modelConfig.enforceTemperature; } // Model-specific disallow max_tokens if (this.modelConfig.disallowMaxTokens && 'max_tokens' in normalizedParams) { logger.warn(`Model ${normalizedParams.model} does not allow max_tokens, removing parameter`); delete (normalizedParams as any).max_tokens; } // Ensure max_tokens doesn't exceed model limit if ( normalizedParams.max_tokens && normalizedParams.max_tokens > this.modelConfig.maxTokensLimit ) { logger.warn( `max_tokens (${normalizedParams.max_tokens}) exceeds model limit (${this.modelConfig.maxTokensLimit}), capping value` ); normalizedParams.max_tokens = this.modelConfig.maxTokensLimit; } // Handle streaming support if (normalizedParams.stream && !this.providerConfig.supportsStreaming) { logger.warn( `Provider ${this.providerConfig.name} doesn't support streaming, disabling stream mode` ); normalizedParams.stream = false; } return normalizedParams; } /** * Create a chat completion with provider-specific handling */ async createChatCompletion( params: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming ): Promise<OpenAI.Chat.Completions.ChatCompletion> { const targetModel = params.model || this.getModelForTask('base'); const paramsWithModel = { ...params, model: targetModel, } as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming; const wantsReasoningModel = this.isReasoningModel(targetModel); try { // Validate context before making API call try { validateDynamicSignal(paramsWithModel.messages as Message[]); logger.debug('✅ Context validation passed', { messageCount: paramsWithModel.messages.length, provider: this.providerConfig.name, }); } catch (validationError) { if (validationError instanceof ValidationError) { logger.warn('🚫 Context validation failed, blocking API call', { error: validationError.message, code: validationError.structured.code, context: validationError.structured.context, suggestion: validationError.structured.suggestion, }); throw new Error( `INSUFFICIENT_CONTEXT: ${validationError.message}. ${validationError.structured.suggestion || ''}` ); } throw validationError; } if (wantsReasoningModel) { logger.info('Routing reasoning-capable model through Responses API', { provider: this.providerConfig.name, model: targetModel, }); return await this.createReasoningCompletion(paramsWithModel); } // Normalize parameters for the provider const normalizedParams = this.normalizeParameters(paramsWithModel); logger.info('Creating chat completion', { provider: this.providerConfig.name, model: normalizedParams.model, messagesCount: normalizedParams.messages.length, maxTokens: normalizedParams.max_tokens, temperature: normalizedParams.temperature, }); // Make the API call const response = await this.client.chat.completions.create(normalizedParams); logger.info('Chat completion successful', { provider: this.providerConfig.name, model: normalizedParams.model, usage: 'usage' in response ? response.usage : undefined, }); const truncatedChoices = response.choices.filter(choice => choice.finish_reason === 'length'); if (truncatedChoices.length > 0) { const usedMaxTokens = (normalizedParams as any).max_tokens ?? null; const modelConfig = this.getModelConfig(normalizedParams.model); const suggestedMaxTokens = usedMaxTokens && modelConfig?.maxTokensLimit ? Math.min( modelConfig.maxTokensLimit, Math.max(usedMaxTokens + 512, Math.ceil(usedMaxTokens * 1.5)) ) : null; logger.warn('Chat completion truncated by max_tokens', { provider: this.providerConfig.name, model: normalizedParams.model, usedMaxTokens, suggestedMaxTokens, }); const metadata = ((response as any).response_metadata ?? {}) as Record<string, any>; metadata.status = metadata.status ?? 'completed'; metadata.truncated = true; metadata.incomplete_reason = 'max_tokens'; metadata.original_finish_reasons = response.choices.map(choice => choice.finish_reason); metadata.need_more_budget = true; metadata.partial_response = response.choices[0]?.message?.content ?? ''; metadata.used_max_tokens = usedMaxTokens; metadata.suggested_max_tokens = suggestedMaxTokens; (response as any).response_metadata = metadata; } return response; } catch (error) { logger.error('Chat completion failed', { provider: this.providerConfig.name, model: targetModel, error: error instanceof Error ? error.message : String(error), }); throw error; } } /** * Create a streaming chat completion */ async createChatCompletionStream( params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming ): Promise<AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk>> { const targetModel = params.model || this.getModelForTask('base'); const paramsWithModel = { ...params, model: targetModel, stream: true, } as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming; const wantsReasoningModel = this.isReasoningModel(targetModel); try { if (wantsReasoningModel) { logger.info('Routing reasoning-capable model through Responses API (stream fallback)', { provider: this.providerConfig.name, model: targetModel, }); } else { logger.info('Creating streaming chat completion', { provider: this.providerConfig.name, model: paramsWithModel.model, messagesCount: paramsWithModel.messages.length, }); } // Validate context before making streaming API call try { validateDynamicSignal(paramsWithModel.messages as Message[]); logger.debug('✅ Streaming context validation passed', { messageCount: paramsWithModel.messages.length, provider: this.providerConfig.name, }); } catch (validationError) { if (validationError instanceof ValidationError) { logger.warn('🚫 Streaming context validation failed, blocking API call', { error: validationError.message, code: validationError.structured.code, context: validationError.structured.context, suggestion: validationError.structured.suggestion, }); throw new Error( `INSUFFICIENT_CONTEXT: ${validationError.message}. ${validationError.structured.suggestion || ''}` ); } throw validationError; } if (wantsReasoningModel) { const nonStreamingParams = { ...paramsWithModel, stream: false, } as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming; const completion = await this.createReasoningCompletion(nonStreamingParams); async function* singleChunkStream(): AsyncIterable<OpenAI.Chat.Completions.ChatCompletionChunk> { for (const choice of completion.choices) { yield { id: `${completion.id}-chunk-${choice.index}`, object: 'chat.completion.chunk', created: completion.created, model: completion.model, choices: [ { index: choice.index, delta: choice.message, finish_reason: choice.finish_reason, }, ], } as OpenAI.Chat.Completions.ChatCompletionChunk; } } return singleChunkStream(); } // Make the streaming API call for standard chat models return await this.client.chat.completions.create(paramsWithModel); } catch (error) { logger.error('Streaming chat completion failed', { provider: this.providerConfig.name, model: targetModel, error: error instanceof Error ? error.message : String(error), }); throw error; } } /** * Get current provider information */ getProviderInfo(): { provider: string; model: string; miniModel: string; supportsStreaming: boolean; } { return { provider: this.providerConfig.name, model: this.config.model || this.providerConfig.defaultModel, miniModel: this.config.miniModel || this.providerConfig.defaultMiniModel, supportsStreaming: this.providerConfig.supportsStreaming || false, }; } /** * Get the appropriate model based on complexity needs */ getModelForTask(task: 'base' | 'mini' = 'base'): string { if (task === 'base') { return this.config.model || this.providerConfig.defaultModel; } else { return this.config.miniModel || this.providerConfig.defaultMiniModel; } } /** * Get embeddings model name */ getEmbeddingsModel(): string { return ( this.config.embeddingsModel || process.env.OPENAI_EMBEDDINGS_MODEL || this.providerConfig.defaultEmbeddingsModel || 'text-embedding-3-large' ); } /** * Validate if the current configuration is ready for use */ isReady(): boolean { return !!this.client && !!this.config.apiKey; } /** * Get the underlying OpenAI client */ getClient(): OpenAI | null { return this.isReady() ? this.client : null; } /** * Dispose of resources */ async dispose(): Promise<void> { // Close any open connections (this.client as any) = undefined; logger.info('OpenAI Service disposed'); } } // Factory function for easier instantiation export function createOpenAIService(config: OpenAIServiceConfig): OpenAIService { return new OpenAIService(config); } // Lazy-loaded singleton instance let _openaiServiceInstance: OpenAIService | null = null; /** * Get the OpenAI service instance, creating it lazily if needed */ export function getOpenAIService(): OpenAIService { if (!_openaiServiceInstance) { _openaiServiceInstance = new OpenAIService({ apiKey: process.env.OPENAI_API_KEY || '', provider: (process.env.OPENAI_PROVIDER as ProviderType) || 'openai', model: process.env.OPENAI_BASE_MODEL, miniModel: process.env.OPENAI_MINI_MODEL, embeddingsModel: process.env.OPENAI_EMBEDDINGS_MODEL, baseUrl: process.env.OPENAI_BASE_URL, }); } return _openaiServiceInstance; } /** * Reset the OpenAI service instance (useful for testing) */ export function resetOpenAIService(): void { _openaiServiceInstance = null; } // Export the service getter for backward compatibility export const openaiService = { get instance() { return getOpenAIService(); }, // Delegate all methods to the lazy-loaded instance createChatCompletion: (...args: Parameters<OpenAIService['createChatCompletion']>) => getOpenAIService().createChatCompletion(...args), createChatCompletionStream: (...args: Parameters<OpenAIService['createChatCompletionStream']>) => getOpenAIService().createChatCompletionStream(...args), getProviderInfo: () => getOpenAIService().getProviderInfo(), getModelForTask: (...args: Parameters<OpenAIService['getModelForTask']>) => getOpenAIService().getModelForTask(...args), getEmbeddingsModel: () => getOpenAIService().getEmbeddingsModel(), isReady: () => getOpenAIService().isReady(), getClient: () => getOpenAIService().getClient(), quickProbe: (...args: Parameters<OpenAIService['quickProbe']>) => getOpenAIService().quickProbe(...args), dispose: () => getOpenAIService().dispose(), }; export default OpenAIService;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sbarron/AmbianceMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server