import { logger } from './logger.js';
import { createHash } from 'crypto';
/**
* OCR result cache entry
*/
interface OCRCacheEntry {
imageHash: string;
text: string;
textLocations: any[];
timestamp: number;
accessCount: number;
lastAccessed: number;
imageSize: string;
}
/**
* Memory-efficient OCR result cache with automatic cleanup
*/
export class OCRCache {
private cache = new Map<string, OCRCacheEntry>();
private maxEntries: number;
private maxAge: number;
private cleanupInterval: NodeJS.Timeout | null = null;
private hitCount = 0;
private missCount = 0;
private evictionCount = 0;
constructor(maxEntries = 100, maxAge = 300000) { // 5 minutes default
this.maxEntries = maxEntries;
this.maxAge = maxAge;
this.startCleanup();
}
/**
* Generate a hash for the image data
*/
private generateImageHash(imageData: string, region?: any): string {
const regionKey = region ? `${region.x},${region.y},${region.width},${region.height}` : 'full';
// Use first and last 1000 characters to create a hash for large images
const sampleData = imageData.length > 2000
? imageData.substring(0, 1000) + imageData.substring(imageData.length - 1000)
: imageData;
return createHash('sha256')
.update(sampleData + regionKey)
.digest('hex')
.substring(0, 16); // Use first 16 chars for efficiency
}
/**
* Get cached OCR results
*/
getCachedText(imageData: string, region?: any): string | null {
const hash = this.generateImageHash(imageData, region);
const entry = this.cache.get(hash);
if (!entry) {
this.missCount++;
return null;
}
// Check if entry is expired
if (Date.now() - entry.timestamp > this.maxAge) {
this.cache.delete(hash);
this.evictionCount++;
this.missCount++;
return null;
}
// Update access stats
entry.accessCount++;
entry.lastAccessed = Date.now();
this.hitCount++;
logger.debug('OCR cache hit', {
hash: hash.substring(0, 8),
textLength: entry.text.length,
accessCount: entry.accessCount,
imageSize: entry.imageSize
});
return entry.text;
}
/**
* Get cached text locations
*/
getCachedTextLocations(imageData: string, region?: any): any[] | null {
const hash = this.generateImageHash(imageData, region);
const entry = this.cache.get(hash);
if (!entry || Date.now() - entry.timestamp > this.maxAge) {
if (entry) {
this.cache.delete(hash);
this.evictionCount++;
}
this.missCount++;
return null;
}
entry.accessCount++;
entry.lastAccessed = Date.now();
this.hitCount++;
return entry.textLocations;
}
/**
* Cache OCR results
*/
cacheResults(imageData: string, text: string, textLocations: any[], region?: any, imageSize?: string): void {
const hash = this.generateImageHash(imageData, region);
// Enforce cache size limit
if (this.cache.size >= this.maxEntries) {
this.evictLeastRecentlyUsed();
}
const entry: OCRCacheEntry = {
imageHash: hash,
text,
textLocations: textLocations || [],
timestamp: Date.now(),
accessCount: 1,
lastAccessed: Date.now(),
imageSize: imageSize || 'unknown'
};
this.cache.set(hash, entry);
logger.debug('OCR results cached', {
hash: hash.substring(0, 8),
textLength: text.length,
locationsCount: textLocations?.length || 0,
cacheSize: this.cache.size,
imageSize
});
}
/**
* Evict least recently used entries
*/
private evictLeastRecentlyUsed(): void {
let oldestEntry: [string, OCRCacheEntry] | null = null;
let oldestTime = Date.now();
for (const [key, entry] of this.cache.entries()) {
if (entry.lastAccessed < oldestTime) {
oldestTime = entry.lastAccessed;
oldestEntry = [key, entry];
}
}
if (oldestEntry) {
this.cache.delete(oldestEntry[0]);
this.evictionCount++;
logger.debug('Evicted LRU cache entry', {
hash: oldestEntry[1].imageHash.substring(0, 8),
age: Date.now() - oldestEntry[1].timestamp,
accessCount: oldestEntry[1].accessCount
});
}
}
/**
* Start automatic cleanup
*/
private startCleanup(): void {
this.cleanupInterval = setInterval(() => {
this.cleanup();
}, 60000); // Clean up every minute
}
/**
* Clean up expired entries
*/
private cleanup(): void {
const now = Date.now();
const initialSize = this.cache.size;
let cleanedCount = 0;
for (const [key, entry] of this.cache.entries()) {
if (now - entry.timestamp > this.maxAge) {
this.cache.delete(key);
this.evictionCount++;
cleanedCount++;
}
}
if (cleanedCount > 0) {
logger.debug('OCR cache cleanup completed', {
removedEntries: cleanedCount,
remainingEntries: this.cache.size,
previousSize: initialSize
});
}
// Force garbage collection if available after cleanup
if (cleanedCount > 10 && global.gc) {
global.gc();
}
}
/**
* Get cache statistics
*/
getStats() {
const totalRequests = this.hitCount + this.missCount;
const hitRate = totalRequests > 0 ? this.hitCount / totalRequests : 0;
return {
size: this.cache.size,
hitCount: this.hitCount,
missCount: this.missCount,
hitRate,
evictionCount: this.evictionCount,
totalRequests,
maxEntries: this.maxEntries,
maxAge: this.maxAge
};
}
/**
* Clear all cache entries
*/
clear(): void {
const size = this.cache.size;
this.cache.clear();
this.hitCount = 0;
this.missCount = 0;
this.evictionCount = 0;
logger.info('OCR cache cleared', { clearedEntries: size });
}
/**
* Shutdown the cache
*/
shutdown(): void {
if (this.cleanupInterval) {
clearInterval(this.cleanupInterval);
this.cleanupInterval = null;
}
this.clear();
logger.info('OCR cache shut down');
}
/**
* Get memory usage estimate
*/
getMemoryUsage(): number {
let totalSize = 0;
for (const entry of this.cache.values()) {
// Estimate memory usage: strings + objects + arrays
totalSize += entry.text.length * 2; // Characters are 2 bytes each
totalSize += JSON.stringify(entry.textLocations).length * 2;
totalSize += 200; // Overhead for entry object
}
return totalSize;
}
/**
* Optimize cache by removing low-value entries
*/
optimize(): void {
const entries = Array.from(this.cache.entries());
// Sort by access count (ascending) to identify low-value entries
entries.sort(([, a], [, b]) => a.accessCount - b.accessCount);
// Remove bottom 25% if cache is over 75% full
if (this.cache.size > this.maxEntries * 0.75) {
const removeCount = Math.floor(this.cache.size * 0.25);
for (let i = 0; i < removeCount; i++) {
if (entries[i]) {
this.cache.delete(entries[i][0]);
this.evictionCount++;
}
}
logger.info('OCR cache optimized', {
removedEntries: removeCount,
remainingEntries: this.cache.size
});
}
}
}
// Global cache instance
let globalOCRCache: OCRCache | null = null;
/**
* Get the global OCR cache instance
*/
export function getOCRCache(): OCRCache {
if (!globalOCRCache) {
globalOCRCache = new OCRCache();
logger.info('Global OCR cache initialized');
}
return globalOCRCache;
}
/**
* Shutdown the global OCR cache
*/
export function shutdownOCRCache(): void {
if (globalOCRCache) {
globalOCRCache.shutdown();
globalOCRCache = null;
}
}