import { createTokenizationCache, defaultTokenizationCache, cachedTokenize } from '../../../src/utils/tokenizationCache.mjs';
import { describe, test, expect, runTests } from '../core/_harness.mjs';
describe('tokenization-cache', () => {
test('creates cache with default options', () => {
const cache = createTokenizationCache();
const stats = cache.getStats();
expect(stats.maxSize).toBe(1000);
expect(stats.enabled).toBe(true);
expect(stats.size).toBe(0);
});
test('caches tokenization results', () => {
const cache = createTokenizationCache({ maxSize: 3 });
const content = 'database connection timeout error';
// First call - cache miss
const tokens1 = cache.tokenize(content);
expect(Array.isArray(tokens1)).toBe(true);
expect(tokens1.length > 0).toBe(true);
expect(cache.getStats().size).toBe(1);
// Second call - cache hit (should return same reference)
const tokens2 = cache.tokenize(content);
expect(tokens2).toBe(tokens1); // Same reference
expect(cache.getStats().size).toBe(1);
});
test('evicts oldest entries when at capacity', () => {
const cache = createTokenizationCache({ maxSize: 2 });
const content1 = 'first content to tokenize';
const content2 = 'second content to tokenize';
const content3 = 'third content to tokenize';
cache.tokenize(content1);
cache.tokenize(content2);
expect(cache.getStats().size).toBe(2);
// Adding third should evict first
cache.tokenize(content3);
expect(cache.getStats().size).toBe(2);
// First content should require re-computation
cache.tokenize(content1);
expect(cache.getStats().size).toBe(2); // Still 2, but different entries
});
test('handles disabled caching', () => {
const cache = createTokenizationCache({ enabled: false });
const content = 'test content for disabled cache';
cache.tokenize(content);
expect(cache.getStats().size).toBe(0); // No caching
expect(cache.getStats().enabled).toBe(false);
});
test('clears cache', () => {
const cache = createTokenizationCache();
cache.tokenize('content 1');
cache.tokenize('content 2');
expect(cache.getStats().size).toBe(2);
cache.clear();
expect(cache.getStats().size).toBe(0);
});
test('default instance works', () => {
// Clear any existing state
defaultTokenizationCache.clear();
const content = 'test default instance';
const tokens1 = cachedTokenize(content);
const tokens2 = cachedTokenize(content);
expect(Array.isArray(tokens1)).toBe(true);
expect(tokens2).toBe(tokens1); // Should be cached
});
test('generates consistent cache keys', () => {
const cache = createTokenizationCache();
// Short content uses full content as key
const shortContent = 'short';
cache.tokenize(shortContent);
expect(cache._cache.has(shortContent)).toBe(true);
// Long content uses length + prefix + suffix
const longContent = 'a'.repeat(200);
cache.tokenize(longContent);
// Key should be: "200:aaaa...aaaa:aaaa...aaaa"
const expectedKey = `200:${'a'.repeat(50)}:${'a'.repeat(50)}`;
expect(cache._cache.has(expectedKey)).toBe(true);
});
test('maintains LRU order', () => {
const cache = createTokenizationCache({ maxSize: 2 });
cache.tokenize('first');
cache.tokenize('second');
// Access first again to make it most recent
cache.tokenize('first');
// Add third - should evict second (least recently used)
cache.tokenize('third');
// First and third should be in cache
const keys = Array.from(cache._cache.keys());
expect(keys.includes('first')).toBe(true);
expect(keys.includes('third')).toBe(true);
expect(keys.includes('second')).toBe(false);
});
});
await runTests();