Skip to main content
Glama
token-tracking.ts13.2 kB
import { logger } from '@snakagent/core'; import type { AIMessage } from '@langchain/core/messages'; /** * A utility class for tracking token usage across LLM calls within a session. * Provides static methods to track usage from various response formats and maintains session totals. * Offers fallback estimation when explicit token counts are unavailable. */ export class TokenTracker { // Constants for token estimation heuristic private static readonly TOKENS_PER_WORD = 1.3; private static readonly TOKENS_PER_SPECIAL_CHAR = 0.5; // Static session counters private static sessionPromptTokens: number = 0; private static sessionResponseTokens: number = 0; private static sessionTotalTokens: number = 0; /** * Resets the static session token counters to zero. */ public static resetSessionCounters(): void { this.sessionPromptTokens = 0; this.sessionResponseTokens = 0; this.sessionTotalTokens = 0; } /** * Retrieves the cumulative token usage for the current session. * @returns An object containing the total prompt, response, and overall tokens used in the session. */ public static getSessionTokenUsage(): { promptTokens: number; responseTokens: number; totalTokens: number; } { return { promptTokens: this.sessionPromptTokens, responseTokens: this.sessionResponseTokens, totalTokens: this.sessionTotalTokens, }; } /** * Tracks token usage based on the result object from an LLM call. * It inspects the result for known metadata structures (`usage_metadata`, `response_metadata`) * across different formats (standard LangChain, OpenAI, Anthropic). * If no explicit token info is found, it falls back to estimation. * Updates the session counters. * * @param result - The result object from an LLM call (e.g., AIMessage, array of messages, or other structures). * @param modelName - The name of the model used for the call (optional, defaults to 'unknown_model'). * @returns An object containing the prompt, response, and total tokens for this specific call. */ public static trackCall( result: any, modelName: string = 'unknown_model' ): { promptTokens: number; responseTokens: number; totalTokens: number } { // Handle null or undefined result early if (result == null) { logger.warn( `trackCall received null or undefined result for model [${modelName}]. Returning zero tokens.` ); return { promptTokens: 0, responseTokens: 0, totalTokens: 0 }; } let messageToProcess: AIMessage | null = null; // Attempt to find an AIMessage within the result if ( result && typeof result === 'object' && 'content' in result && result.content !== undefined && result._getType && result._getType() === 'ai' ) { messageToProcess = result as AIMessage; } else if (Array.isArray(result)) { // Check array elements for the last AIMessage for (let i = result.length - 1; i >= 0; i--) { const msg = result[i]; if ( msg && typeof msg === 'object' && msg._getType && msg._getType() === 'ai' ) { messageToProcess = msg as AIMessage; break; } } } if (messageToProcess) { // Try standard `usage_metadata` first if (messageToProcess.usage_metadata) { const { input_tokens = 0, output_tokens = 0, total_tokens = 0, } = messageToProcess.usage_metadata; const finalTotalTokens = total_tokens || input_tokens + output_tokens; logger.debug( `Token usage for model [${modelName}]: Prompt tokens: ${input_tokens}, Response tokens: ${output_tokens}, Total tokens: ${finalTotalTokens}` ); this.sessionPromptTokens += input_tokens; this.sessionResponseTokens += output_tokens; this.sessionTotalTokens += finalTotalTokens; return { promptTokens: input_tokens, responseTokens: output_tokens, totalTokens: finalTotalTokens, }; } // Then try provider-specific `response_metadata` if (messageToProcess.response_metadata) { // OpenAI format if ('tokenUsage' in messageToProcess.response_metadata) { const { promptTokens = 0, completionTokens = 0, totalTokens = 0, } = messageToProcess.response_metadata.tokenUsage; const finalTotalTokens = totalTokens || promptTokens + completionTokens; logger.debug( `Token usage for model [${modelName}]: Prompt tokens: ${promptTokens}, Response tokens: ${completionTokens}, Total tokens: ${finalTotalTokens}` ); this.sessionPromptTokens += promptTokens; this.sessionResponseTokens += completionTokens; this.sessionTotalTokens += finalTotalTokens; return { promptTokens: promptTokens, responseTokens: completionTokens, totalTokens: finalTotalTokens, }; } // Anthropic format if ('usage' in messageToProcess.response_metadata) { const { input_tokens = 0, output_tokens = 0 } = messageToProcess.response_metadata.usage; const total_tokens = input_tokens + output_tokens; logger.debug( `Token usage for model [${modelName}]: Prompt tokens: ${input_tokens}, Response tokens: ${output_tokens}, Total tokens: ${total_tokens}` ); this.sessionPromptTokens += input_tokens; this.sessionResponseTokens += output_tokens; this.sessionTotalTokens += total_tokens; return { promptTokens: input_tokens, responseTokens: output_tokens, totalTokens: total_tokens, }; } } } logger.warn( `No token usage information available for model [${modelName}], using fallback estimation.` ); const estimation = this.estimateTokensFromResult(result); this.sessionPromptTokens += estimation.promptTokens; this.sessionResponseTokens += estimation.responseTokens; this.sessionTotalTokens += estimation.totalTokens; logger.debug( `Token estimation fallback for model [${modelName}]: ` + `Response tokens: ~${estimation.responseTokens} (prompt unknown)` ); return estimation; } /** * Fallback estimation of response tokens based on the result content. * Attempts to extract text content from various result structures. * Note: This method currently only estimates *response* tokens as prompt info isn't available here. * * @param result - The result object from an LLM call. * @returns An object with estimated response tokens (prompt tokens set to 0). */ private static estimateTokensFromResult(result: any): { promptTokens: number; responseTokens: number; totalTokens: number; } { let responseText = ''; if (typeof result === 'string') { responseText = result; } else if (Array.isArray(result)) { responseText = result .map((msg) => typeof msg === 'object' && msg.content ? typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) : '' ) .join(' '); } else if (result && typeof result === 'object') { if ('content' in result) { responseText = typeof result.content === 'string' ? result.content : JSON.stringify(result.content); } else { responseText = JSON.stringify(result); } } const responseTokens = this.estimateTokensFromText(responseText); // Currently, fallback only estimates response tokens return { promptTokens: 0, responseTokens, totalTokens: responseTokens }; } /** * Tracks token usage using potentially more complete information, including the prompt. * This is often used with LangChain callbacks like `handleLLMEnd` which provide separate * prompt and response details. It prioritizes explicit token data (`llmOutput.tokenUsage`) * then checks for metadata within the result object (`generations`), and finally falls back * to estimating both prompt and response tokens based on text content. * Updates the session counters. * * @param promptText - The input prompt text or structure. * @param resultObj - The result object from the LLM call, potentially containing `llmOutput` or `generations`. * @param modelName - The name of the model used (optional, defaults to 'unknown_model'). * @returns An object containing the prompt, response, and total tokens for this call (potentially estimated). */ public static trackFullUsage( promptText: any, resultObj: any, modelName: string = 'unknown_model' ): { promptTokens: number; responseTokens: number; totalTokens: number } { // Prioritize explicit token usage data if available if (resultObj.llmOutput?.tokenUsage) { const { promptTokens = 0, completionTokens = 0, totalTokens = 0, } = resultObj.llmOutput.tokenUsage; const finalTotalTokens = totalTokens || promptTokens + completionTokens; logger.debug( `Token usage for model [${modelName}]: Prompt tokens: ${promptTokens}, Response tokens: ${completionTokens}, Total tokens: ${finalTotalTokens}` ); this.sessionPromptTokens += promptTokens; this.sessionResponseTokens += completionTokens; this.sessionTotalTokens += finalTotalTokens; return { promptTokens, responseTokens: completionTokens, totalTokens: finalTotalTokens, }; } // Check for AIMessage with metadata within `generations` structure if (resultObj.generations?.[0]?.[0]?.message) { const message = resultObj.generations[0][0].message; // Use trackCall to parse the message metadata const messageUsage = this.trackCall(message, modelName); // This also updates session counters // If trackCall couldn't find prompt tokens (e.g., only response metadata available), // but we have the promptText here, estimate the prompt tokens separately. if (messageUsage.promptTokens === 0 && promptText) { const promptString = typeof promptText === 'string' ? promptText : JSON.stringify(promptText); const estimatedPromptTokens = this.estimateTokensFromText(promptString); // Update session counters for the estimated prompt tokens // Note: sessionResponseTokens were already updated in the trackCall above this.sessionPromptTokens += estimatedPromptTokens; this.sessionTotalTokens += estimatedPromptTokens; logger.debug( `[ESTIMATED PROMPT] Token usage for model [${modelName}]: ` + `Prompt tokens: ~${estimatedPromptTokens}, Response tokens: ${messageUsage.responseTokens}, ` + `Total tokens: ~${estimatedPromptTokens + messageUsage.responseTokens}` ); return { promptTokens: estimatedPromptTokens, responseTokens: messageUsage.responseTokens, totalTokens: estimatedPromptTokens + messageUsage.responseTokens, }; } return messageUsage; } const promptString = typeof promptText === 'string' ? promptText : JSON.stringify(promptText); let responseString = ''; if (resultObj.generations?.[0]?.[0]?.text) { responseString = resultObj.generations[0][0].text; } else if (resultObj.content) { responseString = typeof resultObj.content === 'string' ? resultObj.content : JSON.stringify(resultObj.content); } else { responseString = JSON.stringify(resultObj); } const promptTokens = this.estimateTokensFromText(promptString); const responseTokens = this.estimateTokensFromText(responseString); const totalTokens = promptTokens + responseTokens; logger.warn( `[FALLBACK ESTIMATE - FULL] Token usage for model [${modelName}]: ` + `Prompt tokens: ~${promptTokens}, Response tokens: ~${responseTokens}, Total tokens: ~${totalTokens}` ); this.sessionPromptTokens += promptTokens; this.sessionResponseTokens += responseTokens; this.sessionTotalTokens += totalTokens; return { promptTokens, responseTokens, totalTokens }; } /** * Utility method to estimate token count for a given text string. * Uses a basic heuristic: approximately 1.3 tokens per word plus 0.5 per special character. * This is a rough estimate and may not be accurate for all models or languages. * * @param text - The text string to estimate tokens for. * @returns The estimated number of tokens. */ private static estimateTokensFromText(text: string): number { if (!text) return 0; const wordCount = text.split(/\s+/).filter(Boolean).length; const specialChars = (text.match(/[^a-zA-Z0-9\s]/g) || []).length; return Math.ceil( wordCount * this.TOKENS_PER_WORD + specialChars * this.TOKENS_PER_SPECIAL_CHAR ); } }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/KasarLabs/snak'

If you have feedback or need assistance with the MCP directory API, please join our Discord server