Skip to main content
Glama
llm.ts7.51 kB
import { GoogleGenerativeAI } from '@google/generative-ai'; import { SummaryOptions } from './types.js'; import { LLMError } from '../error/index.js'; // Retry configuration for LLM calls export interface RetryOptions { maxRetries: number; initialDelayMs: number; maxDelayMs: number; backoffFactor: number; retryableStatusCodes: number[]; } const DEFAULT_RETRY_OPTIONS: RetryOptions = { maxRetries: 3, initialDelayMs: 1000, maxDelayMs: 10000, backoffFactor: 2, retryableStatusCodes: [429, 500, 502, 503, 504] }; // Interface for LLM implementations export interface LLM { summarize(code: string, language: string, options?: SummaryOptions): Promise<string>; } // Implementation of async retry with exponential backoff async function withRetry<T>( fn: () => Promise<T>, retryOptions: RetryOptions = DEFAULT_RETRY_OPTIONS, retryCount = 0 ): Promise<T> { try { return await fn(); } catch (error) { // Check if we should retry if (retryCount >= retryOptions.maxRetries) { console.log(`[RETRY] Maximum retries (${retryOptions.maxRetries}) reached, giving up.`); throw error; // Max retries reached, propagate the error } // Determine if the error is retryable let isRetryable = false; if (error instanceof LLMError) { isRetryable = error.isRetryable; console.log(`[RETRY] LLMError retryable status: ${isRetryable}`); } else if (error instanceof Error) { // Check for network errors or status code errors const statusCodeMatch = error.message.match(/status code (\d+)/i); if (statusCodeMatch) { const statusCode = parseInt(statusCodeMatch[1], 10); isRetryable = retryOptions.retryableStatusCodes.includes(statusCode); console.log(`[RETRY] Status code ${statusCode}, retryable: ${isRetryable}`); } else { // Network errors are generally retryable const isNetworkError = error.message.includes('network') || error.message.includes('timeout') || error.message.includes('connection'); // Content/token errors are not retryable const isContentError = error.message.includes('exceeds maximum') || error.message.includes('too large') || error.message.includes('token limit'); isRetryable = isNetworkError && !isContentError; console.log(`[RETRY] Error type: ${isNetworkError ? 'Network' : (isContentError ? 'Content' : 'Other')}, retryable: ${isRetryable}`); } } if (!isRetryable) { console.log(`[RETRY] Error not retryable, propagating.`); throw error; // Not retryable, propagate the error } // Calculate delay with exponential backoff and jitter const delay = Math.min( retryOptions.initialDelayMs * Math.pow(retryOptions.backoffFactor, retryCount), retryOptions.maxDelayMs ); // Add jitter (±20%) const jitter = 0.8 + Math.random() * 0.4; const delayWithJitter = Math.floor(delay * jitter); // Log retry attempt console.warn(`[RETRY] Retrying LLM call (${retryCount + 1}/${retryOptions.maxRetries}) after ${delayWithJitter}ms delay`); // Wait and retry await new Promise(resolve => setTimeout(resolve, delayWithJitter)); return withRetry(fn, retryOptions, retryCount + 1); } } // Gemini Flash 2.0 implementation export class GeminiLLM implements LLM { private apiKey: string; private retryOptions: RetryOptions; constructor(apiKey: string, retryOptions?: Partial<RetryOptions>) { this.apiKey = apiKey; this.retryOptions = { ...DEFAULT_RETRY_OPTIONS, ...retryOptions }; } async summarize(code: string, language: string, options?: SummaryOptions): Promise<string> { // Create a unique ID for this request for tracing const requestId = `req_${Date.now()}_${Math.random().toString(36).substring(2, 9)}`; try { // Default options const summaryOptions: SummaryOptions = { detailLevel: options?.detailLevel || 'medium', maxLength: options?.maxLength || 500 }; // Customize prompt based on detail level using a mapping object const detailPromptMap = { 'low': 'Keep it very brief, focusing only on the main purpose.', 'medium': '', 'high': 'Provide a detailed analysis including functions, methods, and how they interact.' }; const detailPrompt = detailPromptMap[summaryOptions.detailLevel]; const prompt = `Provide an overview summary of the code in this ${language} file. ${detailPrompt} Keep the summary under ${summaryOptions.maxLength} characters. ${code}`; // Calculate token estimate (approximate) const totalChars = prompt.length; const estimatedTokens = Math.ceil(totalChars / 4); console.log(`[${requestId}] Summarizing ${language} code, detail level: ${summaryOptions.detailLevel}`); console.log(`[${requestId}] Estimated prompt tokens: ~${estimatedTokens}`); // Check if we're likely approaching token limits (32k is common for many models) if (estimatedTokens > 30000) { console.warn(`[${requestId}] WARNING: Prompt may exceed token limits (~${estimatedTokens} tokens)`); } // Use retry mechanism for the actual API call return await withRetry(async () => { console.log(`[${requestId}] Initializing Gemini API call`); // Initialize the Google Generative AI client const genAI = new GoogleGenerativeAI(this.apiKey); const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" }); const startTime = Date.now(); console.log(`[${requestId}] Sending request to Gemini API`); const result = await model.generateContent(prompt); const elapsed = Date.now() - startTime; console.log(`[${requestId}] Gemini response received in ${elapsed}ms`); // Check if response is empty or malformed const responseText = result.response.text(); if (!responseText) { console.warn(`[${requestId}] Received empty response from Gemini API`); return "Failed to generate summary."; } return responseText; }, this.retryOptions); } catch (error) { // Extract API-specific error information if available const errorDetails = error.response?.data || {}; const statusCode = error.response?.status; console.error(`[${requestId}] Error summarizing with Gemini:`); console.error(` Message: ${error instanceof Error ? error.message : String(error)}`); if (statusCode) { console.error(` Status: ${statusCode}`); } if (error.response) { console.error(` Response: ${JSON.stringify(errorDetails)}`); } // Determine if this is a token limit error const isTokenLimitError = (error instanceof Error && (error.message.includes('exceeds maximum') || error.message.includes('too large') || error.message.includes('token limit'))); throw new LLMError( `Failed to generate summary: ${error instanceof Error ? error.message : String(error)}`, { isRetryable: !isTokenLimitError, // Don't retry token limit errors context: { language, requestId, isTokenLimitError } } ); } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/nicobailon/code-summarizer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server