// Tokenization cache for performance optimization (T060)
// Simple Map-based cache to avoid re-tokenizing same content
// Memory footprint: ~100 bytes per cached tokenization (reasonable for production)
import { tokenize } from '../core/tokenize.mjs';
/**
* Create a tokenization cache wrapper
* @param {Object} options - Configuration options
* @param {number} options.maxSize - Maximum cache entries (default: 1000)
* @param {boolean} options.enabled - Whether caching is enabled (default: true)
*/
export function createTokenizationCache(options = {}) {
const { maxSize = 1000, enabled = true } = options;
// Simple Map-based LRU cache
const cache = new Map();
/**
* Get cache key for content
* @param {string} content - Text content to tokenize
* @returns {string} Cache key
*/
function getCacheKey(content) {
// Use content length + first/last 50 chars as lightweight key
if (content.length <= 100) {
return content;
}
return `${content.length}:${content.substring(0, 50)}:${content.substring(content.length - 50)}`;
}
/**
* Tokenize with caching
* @param {string} content - Text content to tokenize
* @returns {string[]} Array of tokens
*/
function cachedTokenize(content) {
if (!enabled || typeof content !== 'string') {
return tokenize(content);
}
const key = getCacheKey(content);
// Check cache hit
if (cache.has(key)) {
const result = cache.get(key);
// LRU: move to end
cache.delete(key);
cache.set(key, result);
return result;
}
// Cache miss: compute and store
const tokens = tokenize(content);
// Evict oldest if at capacity
if (cache.size >= maxSize) {
const firstKey = cache.keys().next().value;
cache.delete(firstKey);
}
cache.set(key, tokens);
return tokens;
}
/**
* Get cache statistics
* @returns {Object} Cache stats
*/
function getStats() {
return {
size: cache.size,
maxSize,
enabled,
hitRatio: null // Would need hit/miss tracking for this
};
}
/**
* Clear cache
*/
function clear() {
cache.clear();
}
return {
tokenize: cachedTokenize,
getStats,
clear,
_cache: cache // For testing
};
}
// Default instance for convenience
export const defaultTokenizationCache = createTokenizationCache();
export const { tokenize: cachedTokenize } = defaultTokenizationCache;