import { createWorker, type Worker } from 'tesseract.js';
import { Image, Region } from '@nut-tree-fork/nut-js';
import { imageToBase64 } from './image-utils.js';
import { logger } from './logger.js';
import { OCRError, TimeoutError } from './errors.js';
import { getOCRWorkerPool, initializeOCRWorkerPool, shutdownOCRWorkerPool, OCRTaskPriority } from './core/ocr-worker-pool.js';
// OCR configuration
export interface OCRConfig {
minConfidence: number;
fuzzyMatchThreshold: number;
relaxedFuzzyThreshold: number;
cacheEnabled: boolean;
cacheTTL: number;
maxCacheSize: number;
timeoutMs: number;
}
const DEFAULT_OCR_CONFIG: OCRConfig = {
minConfidence: 50,
fuzzyMatchThreshold: 0.7,
relaxedFuzzyThreshold: 0.5,
cacheEnabled: true,
cacheTTL: 30000, // 30 seconds
maxCacheSize: 100,
timeoutMs: 30000 // 30 seconds
};
let currentConfig: OCRConfig = { ...DEFAULT_OCR_CONFIG };
// OCR result cache
interface OCRCacheEntry {
timestamp: number;
result: any;
hash: string;
}
const ocrCache = new Map<string, OCRCacheEntry>();
/**
* Calculate a simple hash for caching purposes
*/
function calculateImageHash(base64: string): string {
let hash = 0;
for (let i = 0; i < base64.length; i++) {
const char = base64.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return hash.toString(36);
}
/**
* Configure OCR settings
*/
export function configureOCR(config: Partial<OCRConfig>): void {
currentConfig = { ...currentConfig, ...config };
logger.info('OCR configuration updated', currentConfig);
}
/**
* Get current OCR configuration
*/
export function getOCRConfig(): OCRConfig {
return { ...currentConfig };
}
/**
* Reset OCR configuration to defaults
*/
export function resetOCRConfig(): void {
currentConfig = { ...DEFAULT_OCR_CONFIG };
logger.info('OCR configuration reset to defaults');
}
/**
* Clean expired cache entries
*/
function cleanCache(): void {
if (!currentConfig.cacheEnabled) {
return;
}
const now = Date.now();
for (const [key, entry] of ocrCache.entries()) {
if (now - entry.timestamp > currentConfig.cacheTTL) {
ocrCache.delete(key);
}
}
// If cache is still too large, remove oldest entries
if (ocrCache.size > currentConfig.maxCacheSize) {
const entries = Array.from(ocrCache.entries()).sort((a, b) => a[1].timestamp - b[1].timestamp);
const toRemove = entries.slice(0, ocrCache.size - currentConfig.maxCacheSize);
for (const [key] of toRemove) {
ocrCache.delete(key);
}
}
}
/**
* Levenshtein distance for fuzzy text matching
*/
function levenshteinDistance(str1: string, str2: string): number {
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
for (let i = 0; i <= str1.length; i++) {
matrix[0][i] = i;
}
for (let j = 0; j <= str2.length; j++) {
matrix[j][0] = j;
}
for (let j = 1; j <= str2.length; j++) {
for (let i = 1; i <= str1.length; i++) {
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
matrix[j][i] = Math.min(
matrix[j][i - 1] + 1, // deletion
matrix[j - 1][i] + 1, // insertion
matrix[j - 1][i - 1] + indicator // substitution
);
}
}
return matrix[str2.length][str1.length];
}
/**
* Calculate similarity score between two strings (0-1, where 1 is exact match)
*/
function calculateSimilarity(str1: string, str2: string): number {
const maxLength = Math.max(str1.length, str2.length);
if (maxLength === 0) {return 1.0;}
const distance = levenshteinDistance(str1.toLowerCase(), str2.toLowerCase());
return (maxLength - distance) / maxLength;
}
/**
* Check if text matches search criteria with fuzzy matching
*/
function isTextMatch(text: string, searchText: string, threshold?: number): boolean {
const normalizedText = text.toLowerCase().trim();
const normalizedSearch = searchText.toLowerCase().trim();
const effectiveThreshold = threshold ?? currentConfig.fuzzyMatchThreshold;
// Exact match
if (normalizedText.includes(normalizedSearch)) {
return true;
}
// Word boundary matching
const words = normalizedText.split(/\s+/);
for (const word of words) {
if (word.includes(normalizedSearch) || normalizedSearch.includes(word)) {
return true;
}
// Fuzzy matching for individual words
const similarity = calculateSimilarity(word, normalizedSearch);
if (similarity >= effectiveThreshold) {
return true;
}
}
// Fuzzy matching for the entire text
const similarity = calculateSimilarity(normalizedText, normalizedSearch);
return similarity >= effectiveThreshold;
}
// Legacy single worker for backward compatibility
let legacyWorker: Worker | null = null;
let useWorkerPool = true;
let isInitialized = false;
/**
* Initialize OCR with worker pool (recommended) or legacy single worker
*/
export async function initializeOCR(useLegacy = false): Promise<void> {
// Don't reinitialize if already initialized unless explicitly forced
if (isInitialized) {
return;
}
useWorkerPool = !useLegacy;
if (useWorkerPool) {
try {
await initializeOCRWorkerPool();
logger.info('OCR initialized with worker pool');
} catch (error) {
logger.warn('Failed to initialize worker pool, falling back to legacy worker', error as Error);
useWorkerPool = false;
await initializeLegacyWorker();
}
} else {
await initializeLegacyWorker();
}
isInitialized = true;
}
/**
* Initialize legacy single worker
*/
async function initializeLegacyWorker(): Promise<void> {
if (!legacyWorker) {
legacyWorker = await createWorker('eng');
logger.info('OCR initialized with legacy single worker');
}
}
/**
* Terminate OCR resources
*/
export async function terminateOCR(): Promise<void> {
if (useWorkerPool) {
await shutdownOCRWorkerPool();
} else if (legacyWorker) {
await legacyWorker.terminate();
legacyWorker = null;
}
isInitialized = false;
}
async function performOCRWithCache(base64: string, priority = OCRTaskPriority.NORMAL): Promise<any> {
// Clean cache periodically
cleanCache();
// Check cache first if enabled
if (currentConfig.cacheEnabled) {
const hash = calculateImageHash(base64);
const cacheKey = `ocr_${hash}`;
const cachedEntry = ocrCache.get(cacheKey);
if (cachedEntry && (Date.now() - cachedEntry.timestamp < currentConfig.cacheTTL)) {
logger.debug('OCR cache hit', { hash });
return cachedEntry.result;
}
}
logger.debug('OCR cache miss, performing recognition');
let result: any;
try {
if (useWorkerPool) {
// Use worker pool for concurrent OCR operations
const workerPool = getOCRWorkerPool();
result = await workerPool.recognize(base64, priority, currentConfig.timeoutMs);
} else {
// Legacy single worker implementation
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => {
reject(new TimeoutError('OCR recognition', currentConfig.timeoutMs));
}, currentConfig.timeoutMs);
});
result = await Promise.race([
legacyWorker!.recognize(base64),
timeoutPromise
]);
}
// Validate result structure
if (!result?.data) {
throw new OCRError('Invalid OCR result structure');
}
// Cache the result if caching is enabled
if (currentConfig.cacheEnabled) {
const hash = calculateImageHash(base64);
const cacheKey = `ocr_${hash}`;
ocrCache.set(cacheKey, {
timestamp: Date.now(),
result,
hash
});
}
return result;
} catch (error) {
if (error instanceof TimeoutError) {
throw new OCRError(`OCR recognition timed out after ${currentConfig.timeoutMs}ms`);
}
if (error instanceof OCRError) {
throw error;
}
throw new OCRError(`OCR recognition failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
export async function extractTextFromImage(image: Image, region?: Region, priority = OCRTaskPriority.NORMAL): Promise<string> {
try {
await initializeOCR();
logger.debug('Converting image to base64 for OCR...');
const base64 = await imageToBase64(image);
if (!base64) {
throw new OCRError('Failed to convert image to base64');
}
logger.debug('Performing OCR text extraction...');
const result = await performOCRWithCache(base64, priority);
const text = result.data.text;
logger.debug('OCR text extraction completed', { textLength: text.length });
return text.trim();
} catch (error) {
if (error instanceof TimeoutError || error instanceof OCRError) {
throw error;
}
throw new OCRError(`Text extraction failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
export async function findTextInImage(image: Image, searchText: string, priority = OCRTaskPriority.NORMAL): Promise<boolean> {
try {
logger.debug('Searching for text in image', { searchText });
// Use the enhanced getTextLocations for consistent results with fuzzy matching
const locations = await getTextLocations(image, searchText, undefined, priority);
const found = locations.length > 0;
logger.debug('Text search completed', {
searchText,
found,
matchCount: locations.length,
bestMatch: found ? locations[0].text : null
});
return found;
} catch (error) {
logger.error('Error during text search', error as Error, { searchText });
return false;
}
}
export interface TextLocation {
text: string;
x: number;
y: number;
width: number;
height: number;
confidence: number;
}
export async function getTextLocations(image: Image, searchText?: string, region?: Region, priority = OCRTaskPriority.NORMAL): Promise<TextLocation[]> {
try {
await initializeOCR();
logger.debug('Converting image for text location detection...', { hasSearchText: !!searchText });
const base64 = await imageToBase64(image);
if (!base64) {
throw new OCRError('Failed to convert image to base64');
}
logger.debug('Detecting text locations...');
const result = await performOCRWithCache(base64, priority);
const data = result.data;
const allLocations: TextLocation[] = [];
// Extract all text locations with confidence filtering
const minConfidence = currentConfig.minConfidence;
if ('words' in data && Array.isArray(data.words)) {
for (const word of data.words as any[]) {
if (word.confidence > minConfidence && word.text && word.text.trim().length > 0) {
allLocations.push({
text: word.text.trim(),
x: word.bbox.x0,
y: word.bbox.y0,
width: word.bbox.x1 - word.bbox.x0,
height: word.bbox.y1 - word.bbox.y0,
confidence: word.confidence
});
}
}
}
let filteredLocations = allLocations;
// Filter by search text if provided
if (searchText && searchText.trim().length > 0) {
const searchTerm = searchText.trim();
logger.debug('Filtering locations by search text', { searchTerm, totalLocations: allLocations.length });
filteredLocations = allLocations.filter(location => {
const matches = isTextMatch(location.text, searchTerm, currentConfig.fuzzyMatchThreshold);
if (matches) {
logger.debug('Text match found', {
locationText: location.text,
searchTerm,
confidence: location.confidence
});
}
return matches;
});
// If no fuzzy matches found, try with relaxed threshold
if (filteredLocations.length === 0) {
logger.debug('No matches with standard threshold, trying relaxed matching');
filteredLocations = allLocations.filter(location =>
isTextMatch(location.text, searchTerm, currentConfig.relaxedFuzzyThreshold)
);
}
// Sort by confidence (descending) and similarity
filteredLocations.sort((a, b) => {
const similarityA = calculateSimilarity(a.text, searchTerm);
const similarityB = calculateSimilarity(b.text, searchTerm);
// Prioritize by similarity first, then confidence
if (Math.abs(similarityA - similarityB) > 0.1) {
return similarityB - similarityA;
}
return b.confidence - a.confidence;
});
} else {
// No search text, return all locations sorted by confidence
filteredLocations.sort((a, b) => b.confidence - a.confidence);
}
logger.debug('Text location detection completed', {
totalLocations: allLocations.length,
filteredLocations: filteredLocations.length,
searchText: searchText || 'none'
});
return filteredLocations;
} catch (error) {
if (error instanceof TimeoutError || error instanceof OCRError) {
throw error;
}
throw new OCRError(`Failed to get text locations: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Get OCR worker pool metrics (only available when using worker pool)
*/
export function getOCRMetrics() {
if (!useWorkerPool) {
return null;
}
const workerPool = getOCRWorkerPool();
return workerPool.getMetrics();
}
/**
* Get detailed worker states (only available when using worker pool)
*/
export function getOCRWorkerStates() {
if (!useWorkerPool) {
return null;
}
const workerPool = getOCRWorkerPool();
return workerPool.getWorkerStates();
}
/**
* Check if OCR is using worker pool
*/
export function isUsingWorkerPool(): boolean {
return useWorkerPool;
}
/**
* Extract text from multiple images concurrently using worker pool
* Falls back to sequential processing if using legacy worker
*/
export async function extractTextFromImages(
images: Image[],
priority = OCRTaskPriority.NORMAL
): Promise<string[]> {
if (!useWorkerPool) {
// Sequential processing for legacy worker
const results: string[] = [];
for (const image of images) {
const text = await extractTextFromImage(image, undefined, priority);
results.push(text);
}
return results;
}
// Concurrent processing with worker pool
const promises = images.map(image => extractTextFromImage(image, undefined, priority));
return Promise.all(promises);
}
/**
* Get text locations from multiple images concurrently using worker pool
* Falls back to sequential processing if using legacy worker
*/
export async function getTextLocationsFromImages(
images: Image[],
priority = OCRTaskPriority.NORMAL
): Promise<TextLocation[][]> {
if (!useWorkerPool) {
// Sequential processing for legacy worker
const results: TextLocation[][] = [];
for (const image of images) {
const locations = await getTextLocations(image, undefined, undefined, priority);
results.push(locations);
}
return results;
}
// Concurrent processing with worker pool
const promises = images.map(image => getTextLocations(image, undefined, undefined, priority));
return Promise.all(promises);
}
/**
* Enhanced text search with configurable fuzzy matching
*/
export async function findTextWithOptions(
image: Image,
searchText: string,
options: {
region?: Region;
priority?: OCRTaskPriority;
fuzzyThreshold?: number;
minConfidence?: number;
} = {}
): Promise<TextLocation[]> {
const {
region,
priority = OCRTaskPriority.NORMAL,
minConfidence = currentConfig.minConfidence
} = options;
// Temporarily override config if options are provided
const originalConfig = { ...currentConfig };
if (options.fuzzyThreshold !== undefined) {
currentConfig.fuzzyMatchThreshold = options.fuzzyThreshold;
}
if (options.minConfidence !== undefined) {
currentConfig.minConfidence = options.minConfidence;
}
try {
const locations = await getTextLocations(image, searchText, region, priority);
// Additional filtering by confidence if specified
return locations.filter(loc => loc.confidence >= minConfidence);
} catch (error) {
logger.error('Error in enhanced text search', error as Error, { searchText, options });
return [];
} finally {
// Restore original config
currentConfig = originalConfig;
}
}
/**
* Get cache statistics for monitoring
*/
export function getOCRCacheStats() {
const now = Date.now();
let validEntries = 0;
let expiredEntries = 0;
for (const [, entry] of ocrCache.entries()) {
if (now - entry.timestamp < currentConfig.cacheTTL) {
validEntries++;
} else {
expiredEntries++;
}
}
return {
totalEntries: ocrCache.size,
validEntries,
expiredEntries,
cacheHitRate: validEntries / Math.max(1, ocrCache.size),
cacheEnabled: currentConfig.cacheEnabled,
cacheTTL: currentConfig.cacheTTL,
maxCacheSize: currentConfig.maxCacheSize
};
}
/**
* Clear OCR cache manually
*/
export function clearOCRCache(): void {
ocrCache.clear();
logger.info('OCR cache cleared');
}
// Re-export worker pool types and enums for convenience
export { OCRTaskPriority, type OCRWorkerPoolConfig, type PoolMetrics, type WorkerState } from './core/ocr-worker-pool.js';