DevOps AI Toolkit

dot-ai
src
core
providers

vercel-provider.ts•26 KiB

/** * Vercel AI Provider Implementation * * Implements AIProvider interface using Vercel AI SDK. * Supports OpenAI and Google Gemini providers through unified interface. */ import { generateText, jsonSchema, tool, stepCountIs } from 'ai'; import { createOpenAI } from '@ai-sdk/openai'; import { createGoogleGenerativeAI } from '@ai-sdk/google'; import { createAnthropic } from '@ai-sdk/anthropic'; import { createXai } from '@ai-sdk/xai'; import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'; import { createOpenRouter } from '@openrouter/ai-sdk-provider'; import { AIProvider, AIResponse, AIProviderConfig, ToolLoopConfig, AgenticResult, } from '../ai-provider.interface'; import { generateDebugId, debugLogInteraction, debugLogPromptOnly, createAndLogAgenticResult, logEvaluationDataset, EvaluationMetrics, } from './provider-debug-utils'; import { CURRENT_MODELS } from '../model-config'; import { withAITracing } from '../tracing/ai-tracing'; type SupportedProvider = keyof typeof CURRENT_MODELS; // Get all supported provider keys dynamically from CURRENT_MODELS const SUPPORTED_PROVIDERS = Object.keys(CURRENT_MODELS) as SupportedProvider[]; export class VercelProvider implements AIProvider { private providerType: SupportedProvider; private model: string; private apiKey: string; private debugMode: boolean; private baseURL?: string; // PRD #194: Custom endpoint URL for OpenAI-compatible APIs private modelInstance: any; // Vercel AI SDK model instance constructor(config: AIProviderConfig) { this.apiKey = config.apiKey; this.providerType = config.provider as SupportedProvider; this.model = config.model || this.getDefaultModel(); this.debugMode = config.debugMode ?? process.env.DEBUG_DOT_AI === 'true'; this.baseURL = config.baseURL; // PRD #194: Store custom endpoint URL this.validateConfiguration(); this.initializeModel(); } private validateConfiguration(): void { if (!this.apiKey) { throw new Error(`API key is required for ${this.providerType} provider`); } if (!SUPPORTED_PROVIDERS.includes(this.providerType)) { throw new Error( `Unsupported provider: ${this.providerType}. Must be one of: ${SUPPORTED_PROVIDERS.join(', ')}` ); } } private initializeModel(): void { try { let provider: any; switch (this.providerType) { case 'openai': provider = createOpenAI({ apiKey: this.apiKey, }); break; case 'google': provider = createGoogleGenerativeAI({ apiKey: this.apiKey }); break; case 'anthropic': case 'anthropic_opus': case 'anthropic_haiku': provider = createAnthropic({ apiKey: this.apiKey, // Enable 1M token context window for Claude Sonnet 4 (5x increase from 200K) // Required for models like claude-sonnet-4-5-20250929 headers: { 'anthropic-beta': 'context-1m-2025-08-07', }, }); break; case 'xai': provider = createXai({ apiKey: this.apiKey }); break; case 'kimi': case 'kimi_thinking': // PRD #237: Moonshot AI Kimi K2 - uses OpenAI-compatible API // Use .chat() explicitly to use /chat/completions instead of /responses // Use global endpoint (api.moonshot.ai) - China endpoint (api.moonshot.cn) requires China-specific API keys provider = createOpenAI({ apiKey: this.apiKey, baseURL: 'https://api.moonshot.ai/v1', }); this.modelInstance = provider.chat(this.model); return; // Early return - model instance already set case 'amazon_bedrock': // PRD #175: Amazon Bedrock provider // AWS SDK automatically uses credential chain: // 1. Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION) // 2. ~/.aws/credentials file // 3. IAM roles (EC2 instance profiles, ECS roles, EKS service accounts) provider = createAmazonBedrock({ region: process.env.AWS_REGION || 'us-east-1', }); break; case 'openrouter': // PRD #194: OpenRouter custom endpoint support // Use dedicated OpenRouter provider for proper format conversion provider = createOpenRouter({ apiKey: this.apiKey, }); break; case 'custom': // PRD #194: Generic custom endpoint support for OpenAI-compatible APIs // For non-OpenRouter custom endpoints (Ollama, vLLM, LiteLLM, etc.) if (!this.baseURL) { throw new Error( 'Custom endpoint requires CUSTOM_LLM_BASE_URL to be set' ); } provider = createOpenAI({ apiKey: this.apiKey, baseURL: this.baseURL, }); // Use .chat() explicitly for custom endpoints to use /chat/completions instead of /responses this.modelInstance = provider.chat(this.model); return; // Early return - model instance already set default: throw new Error( `Cannot initialize model for provider: ${this.providerType}` ); } this.modelInstance = provider(this.model); } catch (error) { throw new Error( `Failed to initialize ${this.providerType} model: ${error}` ); } } getProviderType(): string { return this.providerType; } getDefaultModel(): string { return CURRENT_MODELS[this.providerType]; } getModelName(): string { return this.model; } isInitialized(): boolean { return this.modelInstance !== undefined; } private logDebugIfEnabled( operation: string, prompt: string, response: AIResponse ): { promptFile: string; responseFile: string } | null { if (!this.debugMode) return null; const debugId = generateDebugId(operation); debugLogInteraction( debugId, prompt, response, operation, this.getProviderType(), this.model, this.debugMode ); // Return the actual debug file names created return { promptFile: `${debugId}_prompt.md`, responseFile: `${debugId}_response.md`, }; } async sendMessage( message: string, operation: string = 'generic', evaluationContext?: { user_intent?: string; interaction_id?: string; } ): Promise<AIResponse> { if (!this.isInitialized()) { throw new Error(`${this.providerType} provider not initialized`); } return await withAITracing( { provider: this.providerType, model: this.model, operation: 'chat', }, async () => { const startTime = Date.now(); try { // Use Vercel AI SDK generateText // Note: maxOutputTokens not specified - provider will use model's natural maximum const result = await generateText({ model: this.modelInstance, prompt: message, }); const response: AIResponse = { content: result.text, usage: { input_tokens: (result.totalUsage || result.usage).inputTokens || 0, output_tokens: (result.totalUsage || result.usage).outputTokens || 0, }, }; const durationMs = Date.now() - startTime; // Debug log the interaction if enabled if (this.debugMode) { const debugId = generateDebugId(operation); debugLogInteraction( debugId, message, response, operation, this.getProviderType(), this.model, this.debugMode ); // PRD #154: Always use new evaluation dataset system const evaluationMetrics: EvaluationMetrics = { // Core execution data operation, sdk: this.getProviderType(), inputTokens: response.usage.input_tokens, outputTokens: response.usage.output_tokens, durationMs, // Required fields iterationCount: 1, toolCallCount: 0, status: 'completed', completionReason: 'stop', modelVersion: this.model, // Required evaluation context - NO DEFAULTS, must be provided test_scenario: operation, ai_response_summary: response.content, user_intent: evaluationContext?.user_intent || '', interaction_id: evaluationContext?.interaction_id || '', // Optional performance data ...(response.usage.cache_creation_input_tokens && { cacheCreationTokens: response.usage.cache_creation_input_tokens, }), ...(response.usage.cache_read_input_tokens && { cacheReadTokens: response.usage.cache_read_input_tokens, }), }; // Calculate cache hit rate if applicable if ( response.usage.cache_read_input_tokens && response.usage.input_tokens > 0 ) { evaluationMetrics.cacheHitRate = Math.round( (response.usage.cache_read_input_tokens / response.usage.input_tokens) * 100 ); } logEvaluationDataset(evaluationMetrics, this.debugMode); } return response; } catch (error) { // Log the prompt that caused the error for debugging if (this.debugMode) { const debugId = generateDebugId(operation); debugLogPromptOnly( debugId, message, operation, this.getProviderType(), this.model, this.debugMode ); } // Generate dataset for failed AI interaction if (this.debugMode && evaluationContext) { const failureMetrics: EvaluationMetrics = { operation, user_intent: evaluationContext.user_intent || '', ai_response_summary: `Error: ${error instanceof Error ? error.message : String(error)}`, durationMs: Date.now() - startTime, inputTokens: 0, outputTokens: 0, iterationCount: 0, toolCallCount: 0, status: 'failed', completionReason: 'error', sdk: this.getProviderType(), modelVersion: this.model, test_scenario: operation, interaction_id: evaluationContext.interaction_id || generateDebugId(operation), failure_analysis: { failure_type: 'error', failure_reason: `${this.providerType} API error: ${error instanceof Error ? error.message : String(error)}`, time_to_failure: Date.now() - startTime, }, }; logEvaluationDataset(failureMetrics, this.debugMode); } throw new Error(`${this.providerType} API error: ${error}`); } }, (response: AIResponse) => ({ inputTokens: response.usage.input_tokens, outputTokens: response.usage.output_tokens, cacheReadTokens: response.usage.cache_read_input_tokens, cacheCreationTokens: response.usage.cache_creation_input_tokens, }) ); } /** * Agentic tool loop using Vercel AI SDK * * Implements multi-turn tool calling using generateText with maxSteps. * The Vercel AI SDK handles the conversation loop automatically. * * Provider-specific caching: * - Anthropic: Manual cache control via providerOptions * - OpenAI: Automatic caching (no code changes needed) * - Google: Check Gemini caching capabilities * * See PRD #143 Milestone 2.5 for Vercel provider implementation details. */ async toolLoop(config: ToolLoopConfig): Promise<AgenticResult> { if (!this.isInitialized()) { throw new Error(`${this.providerType} provider not initialized`); } return await withAITracing( { provider: this.providerType, model: this.model, operation: 'tool_loop', }, async () => { const startTime = Date.now(); const maxIterations = config.maxIterations || 20; const operation = config.operation || 'tool-loop'; // Convert AITool[] to Vercel AI SDK tool format const tools: Record<string, any> = {}; for (let i = 0; i < config.tools.length; i++) { const aiTool = config.tools[i]; const isLastTool = i === config.tools.length - 1; const toolDef = tool({ description: aiTool.description, inputSchema: jsonSchema(aiTool.inputSchema), execute: async (input: any) => { return await config.toolExecutor(aiTool.name, input); }, }); // Add cache control ONLY to last tool for Anthropic (max 4 cache breakpoints) // This caches the system prompt + all tools together if ( (this.providerType === 'anthropic' || this.providerType === 'anthropic_opus' || this.providerType === 'anthropic_haiku') && isLastTool ) { (toolDef as any).providerOptions = { anthropic: { cacheControl: { type: 'ephemeral' }, }, }; } // TODO: Check if Google Gemini supports caching in future SDK versions // Google Gemini may have caching capabilities - research providerOptions.google syntax // if (this.providerType === 'google' && isLastTool) { // (toolDef as any).providerOptions = { // google: { /* caching config if available */ } // }; // } tools[aiTool.name] = toolDef; } // Build messages array with system prompt caching for Anthropic // Anthropic caching requires system messages in messages array with providerOptions const messages: any[] = []; let systemParam: string | undefined; if ( this.providerType === 'anthropic' || this.providerType === 'anthropic_opus' || this.providerType === 'anthropic_haiku' ) { // For Anthropic: Put system in messages array with cacheControl messages.push({ role: 'system', content: config.systemPrompt, providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' }, }, }, }); // Don't use system parameter for Anthropic when caching systemParam = undefined; } else { // For OpenAI/Google: Use system parameter (string) systemParam = config.systemPrompt; } // Add user message messages.push({ role: 'user', content: config.userMessage, }); // TODO: Check if Google Gemini supports system prompt caching in future SDK versions // if (this.providerType === 'google') { // messages.unshift({ // role: 'system', // content: config.systemPrompt, // providerOptions: { // google: { /* caching config if available */ } // } // }); // systemParam = undefined; // } try { // Use Vercel AI SDK's generateText with stopWhen for automatic loop // Default is stepCountIs(1) - we need to increase for multi-step investigation // Note: maxOutputTokens not specified - provider will use model's natural maximum const generateConfig: any = { model: this.modelInstance, messages, tools, stopWhen: stepCountIs(maxIterations), }; // Add system parameter for non-Anthropic providers if (systemParam) { generateConfig.system = systemParam; } const result = await generateText(generateConfig); // Log raw response immediately after generation (before any processing) let debugFiles: { promptFile: string; responseFile: string } | null = null; if (this.debugMode) { // Build the full conversation context like Anthropic provider does let finalPrompt = `System: ${config.systemPrompt}\n\n`; // Always include the original user intent first finalPrompt += `user: ${config.userMessage}\n\n`; // Then add the conversation history if available if (result.response?.messages) { finalPrompt += result.response.messages .map(msg => { if (typeof msg.content === 'string') { return `${msg.role}: ${msg.content}`; } else if (Array.isArray(msg.content)) { const contentParts = msg.content .map(part => { if (part.type === 'text') { return (part as any).text; } else if (part.type === 'tool-call') { return `[TOOL_USE: ${(part as any).toolName}]`; } else if (part.type === 'tool-result') { const resultData = (part as any).output || (part as any).result || (part as any).content; if (typeof resultData === 'string') { return `[TOOL_RESULT: ${(part as any).toolName}]\n${resultData}`; } else if (resultData) { return `[TOOL_RESULT: ${(part as any).toolName}]\n${JSON.stringify(resultData, null, 2)}`; } return `[TOOL_RESULT: ${(part as any).toolName}]`; } return `[${part.type}]`; }) .join(' '); return `${msg.role}: ${contentParts}`; } return `${msg.role}: [complex_content]`; }) .join('\n\n'); } // Create raw response content that includes ALL data from result let rawResponseContent = `# RAW RESPONSE DATA\n\n`; rawResponseContent += `**result.text**: ${result.text || '[EMPTY]'}\n\n`; if (result.steps && result.steps.length > 0) { rawResponseContent += `**Steps (${result.steps.length})**:\n`; result.steps.forEach((step, i) => { rawResponseContent += `\nStep ${i + 1}:\n`; rawResponseContent += `- text: ${step.text || '[EMPTY]'}\n`; if (step.toolCalls) { rawResponseContent += `- toolCalls: ${step.toolCalls.length}\n`; } if (step.toolResults) { rawResponseContent += `- toolResults: ${step.toolResults.length}\n`; } }); rawResponseContent += '\n'; } // Add the last step's text for easy access let lastStepText = ''; if (result.steps && result.steps.length > 0) { for (let i = result.steps.length - 1; i >= 0; i--) { if (result.steps[i].text && result.steps[i].text.trim()) { lastStepText = result.steps[i].text; break; } } } rawResponseContent += `**Last step with text**: ${lastStepText || '[NONE]'}\n\n`; const usage = result.totalUsage || result.usage; const rawAiResponse = { content: rawResponseContent, usage: { input_tokens: usage.inputTokens || 0, output_tokens: usage.outputTokens || 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0, }, }; debugFiles = this.logDebugIfEnabled( `${operation}-raw`, finalPrompt, rawAiResponse ); } // Extract tool call history from steps const toolCallsExecuted: Array<{ tool: string; input: any; output: any; }> = []; for (const step of result.steps || []) { for (const toolCall of step.toolCalls || []) { const toolResult = step.toolResults?.find( (tr: any) => tr.toolCallId === toolCall.toolCallId ); toolCallsExecuted.push({ tool: (toolCall as any).toolName, input: (toolCall as any).args, output: (toolResult as any)?.result, }); } } // Normalize token metrics across providers // NOTE: Vercel AI SDK had token reporting bugs that were fixed in PR #8945 (merged Sept 26, 2025) // - GitHub Issue #8349: cache tokens only reflected last step, not summed across all steps // - GitHub Issue #8795: Token reporting issues with Anthropic provider (streaming) // Our version (5.0.60, released Oct 2, 2025) includes these fixes. // However, testing still shows ~70% fewer tokens reported vs Anthropic native SDK. // Root cause: We were using result.usage (final step only) instead of result.totalUsage (sum of all steps)! const usage = result.totalUsage || result.usage; let cacheReadTokens = 0; let cacheCreationTokens = 0; // Anthropic via Vercel uses cachedInputTokens (confirmed in AI SDK 5+) if ((usage as any).cachedInputTokens) { cacheReadTokens = (usage as any).cachedInputTokens; } // OpenAI uses cached_tokens or cachedTokens (automatic caching, no config needed) if ('cachedTokens' in usage || (usage as any).cached_tokens) { cacheReadTokens = (usage as any).cachedTokens || (usage as any).cached_tokens || 0; } // Anthropic native SDK uses separate cache_creation and cache_read fields if ((usage as any).cache_creation_input_tokens) { cacheCreationTokens = (usage as any).cache_creation_input_tokens; } if ((usage as any).cache_read_input_tokens) { cacheReadTokens = (usage as any).cache_read_input_tokens; } // TODO: Check if Google Gemini reports cache metrics in future SDK versions // Google Gemini may return cache-related metrics - check usage object structure // Possible fields: cachedTokens, cacheHits, or provider-specific naming // Add normalization logic here when Gemini caching is confirmed // Extract final text from the last step (result.text might be empty if last step had tool calls) let finalText = result.text; if (!finalText || finalText.trim().length === 0) { // If result.text is empty, find the last text response from steps for (let i = (result.steps || []).length - 1; i >= 0; i--) { const step = result.steps![i]; if (step.text && step.text.trim().length > 0) { finalText = step.text; break; } } } // Log processed summary response (keep existing functionality) if (this.debugMode && debugFiles === null) { // Only log summary if we haven't already logged raw response let finalPrompt = `System: ${config.systemPrompt}\n\nuser: ${config.userMessage}`; const aiResponse: AIResponse = { content: finalText || '', usage: { input_tokens: usage.inputTokens || 0, output_tokens: usage.outputTokens || 0, cache_creation_input_tokens: cacheCreationTokens, cache_read_input_tokens: cacheReadTokens, }, }; debugFiles = this.logDebugIfEnabled( `${operation}-summary`, finalPrompt, aiResponse ); } return createAndLogAgenticResult({ finalMessage: finalText || '', iterations: result.steps?.length || 1, toolCallsExecuted, totalTokens: { input: usage.inputTokens || 0, output: usage.outputTokens || 0, cacheCreation: cacheCreationTokens, cacheRead: cacheReadTokens, }, status: 'success', completionReason: 'investigation_complete', modelVersion: this.model, operation: `${operation}-summary`, sdk: this.getProviderType(), startTime, debugMode: this.debugMode, debugFiles, evaluationContext: config.evaluationContext, interaction_id: config.interaction_id, }); } catch (error) { // Return error result with extended metrics return createAndLogAgenticResult({ finalMessage: `Error during investigation: ${error instanceof Error ? error.message : String(error)}`, iterations: 0, toolCallsExecuted: [], totalTokens: { input: 0, output: 0, cacheCreation: 0, cacheRead: 0, }, status: 'failed', completionReason: 'error', modelVersion: this.model, operation: `${operation}-error`, sdk: this.getProviderType(), startTime, debugMode: this.debugMode, evaluationContext: config.evaluationContext, interaction_id: config.interaction_id, }); } }, (result: AgenticResult) => ({ inputTokens: result.totalTokens.input, outputTokens: result.totalTokens.output, cacheReadTokens: result.totalTokens.cacheRead, cacheCreationTokens: result.totalTokens.cacheCreation, }) ); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/vfarcic/dot-ai'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

vercel-provider.ts•26 KiB