Ultra MCP

MIT License

241

Overview InspectNew Endpoints Schema Related Servers Reviews Score

tokenizer.ts•4.54 kB

import { get_encoding, TiktokenEncoding } from 'tiktoken'; type TikTokenEncoder = ReturnType<typeof get_encoding>; export class TokenizerManager { private static encoders = new Map<string, TikTokenEncoder>(); /** * Get tokenizer for specific model */ private static getEncoder(model: string): TikTokenEncoder { // Map model names to encoding types let encodingName: string; if (model.startsWith('gpt-4') || model.startsWith('gpt-3.5')) { encodingName = 'cl100k_base'; // GPT-4 and GPT-3.5-turbo } else if (model.startsWith('text-davinci') || model.startsWith('text-curie')) { encodingName = 'p50k_base'; // Codex models } else if (model.startsWith('gemini')) { // Gemini uses similar tokenization to GPT-4 encodingName = 'cl100k_base'; } else { // Default to GPT-4 encoding for unknown models encodingName = 'cl100k_base'; } if (!this.encoders.has(encodingName)) { this.encoders.set(encodingName, get_encoding(encodingName as TiktokenEncoding)); } return this.encoders.get(encodingName)!; } /** * Count tokens in text for specific model */ static countTokens(text: string, model = 'gpt-4'): number { try { const encoder = this.getEncoder(model); return encoder.encode(text).length; } catch (error) { // Fallback to character-based estimation if tiktoken fails console.warn('Tiktoken failed, falling back to character estimation:', error); return Math.ceil(text.length / 4); } } /** * Estimate tokens for conversation messages */ static countMessageTokens( messages: Array<{ role: string; content: string; name?: string }>, model = 'gpt-4' ): number { try { const encoder = this.getEncoder(model); let totalTokens = 0; for (const message of messages) { // Every message follows <|start|>{role/name}\n{content}<|end|>\n totalTokens += 3; // start, role, end tokens if (message.name) { totalTokens += encoder.encode(message.name).length; } totalTokens += encoder.encode(message.role).length; totalTokens += encoder.encode(message.content).length; } totalTokens += 3; // Every reply is primed with <|start|>assistant<|message|> return totalTokens; } catch (error) { // Fallback estimation console.warn('Tiktoken failed for message counting, falling back to estimation:', error); const totalChars = messages.reduce((sum, msg) => sum + msg.content.length + msg.role.length + (msg.name?.length || 0), 0 ); return Math.ceil(totalChars / 4) + messages.length * 4; // Add overhead per message } } /** * Truncate text to fit within token limit */ static truncateToTokenLimit(text: string, maxTokens: number, model = 'gpt-4'): string { try { const encoder = this.getEncoder(model); const tokens = encoder.encode(text); if (tokens.length <= maxTokens) { return text; } const truncatedTokens = tokens.slice(0, maxTokens); return new TextDecoder().decode(encoder.decode(truncatedTokens)); } catch (error) { // Fallback to character-based truncation console.warn('Tiktoken failed for truncation, falling back to character estimation:', error); const maxChars = maxTokens * 4; return text.length <= maxChars ? text : text.substring(0, maxChars); } } /** * Get token usage breakdown for debugging */ static getTokenBreakdown( text: string, model = 'gpt-4' ): { total: number; method: 'tiktoken' | 'fallback'; encoding?: string } { try { const encoder = this.getEncoder(model); const tokens = encoder.encode(text).length; return { total: tokens, method: 'tiktoken', encoding: model.startsWith('gpt-4') ? 'cl100k_base' : 'p50k_base' }; } catch (error) { return { total: Math.ceil(text.length / 4), method: 'fallback' }; } } /** * Clean up encoders to free memory */ static cleanup(): void { for (const encoder of this.encoders.values()) { encoder.free(); } this.encoders.clear(); } } // Cleanup on process exit process.on('exit', () => { TokenizerManager.cleanup(); }); process.on('SIGINT', () => { TokenizerManager.cleanup(); process.exit(); }); process.on('SIGTERM', () => { TokenizerManager.cleanup(); process.exit(); });

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RealMikeChong/ultra-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server