macOS Simulator MCP Server

ocr-utils.ts•17 KiB

import { createWorker, type Worker } from 'tesseract.js'; import { Image, Region } from '@nut-tree-fork/nut-js'; import { imageToBase64 } from './image-utils.js'; import { logger } from './logger.js'; import { OCRError, TimeoutError } from './errors.js'; import { getOCRWorkerPool, initializeOCRWorkerPool, shutdownOCRWorkerPool, OCRTaskPriority } from './core/ocr-worker-pool.js'; // OCR configuration export interface OCRConfig { minConfidence: number; fuzzyMatchThreshold: number; relaxedFuzzyThreshold: number; cacheEnabled: boolean; cacheTTL: number; maxCacheSize: number; timeoutMs: number; } const DEFAULT_OCR_CONFIG: OCRConfig = { minConfidence: 50, fuzzyMatchThreshold: 0.7, relaxedFuzzyThreshold: 0.5, cacheEnabled: true, cacheTTL: 30000, // 30 seconds maxCacheSize: 100, timeoutMs: 30000 // 30 seconds }; let currentConfig: OCRConfig = { ...DEFAULT_OCR_CONFIG }; // OCR result cache interface OCRCacheEntry { timestamp: number; result: any; hash: string; } const ocrCache = new Map<string, OCRCacheEntry>(); /** * Calculate a simple hash for caching purposes */ function calculateImageHash(base64: string): string { let hash = 0; for (let i = 0; i < base64.length; i++) { const char = base64.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32-bit integer } return hash.toString(36); } /** * Configure OCR settings */ export function configureOCR(config: Partial<OCRConfig>): void { currentConfig = { ...currentConfig, ...config }; logger.info('OCR configuration updated', currentConfig); } /** * Get current OCR configuration */ export function getOCRConfig(): OCRConfig { return { ...currentConfig }; } /** * Reset OCR configuration to defaults */ export function resetOCRConfig(): void { currentConfig = { ...DEFAULT_OCR_CONFIG }; logger.info('OCR configuration reset to defaults'); } /** * Clean expired cache entries */ function cleanCache(): void { if (!currentConfig.cacheEnabled) { return; } const now = Date.now(); for (const [key, entry] of ocrCache.entries()) { if (now - entry.timestamp > currentConfig.cacheTTL) { ocrCache.delete(key); } } // If cache is still too large, remove oldest entries if (ocrCache.size > currentConfig.maxCacheSize) { const entries = Array.from(ocrCache.entries()).sort((a, b) => a[1].timestamp - b[1].timestamp); const toRemove = entries.slice(0, ocrCache.size - currentConfig.maxCacheSize); for (const [key] of toRemove) { ocrCache.delete(key); } } } /** * Levenshtein distance for fuzzy text matching */ function levenshteinDistance(str1: string, str2: string): number { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i++) { matrix[0][i] = i; } for (let j = 0; j <= str2.length; j++) { matrix[j][0] = j; } for (let j = 1; j <= str2.length; j++) { for (let i = 1; i <= str1.length; i++) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min( matrix[j][i - 1] + 1, // deletion matrix[j - 1][i] + 1, // insertion matrix[j - 1][i - 1] + indicator // substitution ); } } return matrix[str2.length][str1.length]; } /** * Calculate similarity score between two strings (0-1, where 1 is exact match) */ function calculateSimilarity(str1: string, str2: string): number { const maxLength = Math.max(str1.length, str2.length); if (maxLength === 0) {return 1.0;} const distance = levenshteinDistance(str1.toLowerCase(), str2.toLowerCase()); return (maxLength - distance) / maxLength; } /** * Check if text matches search criteria with fuzzy matching */ function isTextMatch(text: string, searchText: string, threshold?: number): boolean { const normalizedText = text.toLowerCase().trim(); const normalizedSearch = searchText.toLowerCase().trim(); const effectiveThreshold = threshold ?? currentConfig.fuzzyMatchThreshold; // Exact match if (normalizedText.includes(normalizedSearch)) { return true; } // Word boundary matching const words = normalizedText.split(/\s+/); for (const word of words) { if (word.includes(normalizedSearch) || normalizedSearch.includes(word)) { return true; } // Fuzzy matching for individual words const similarity = calculateSimilarity(word, normalizedSearch); if (similarity >= effectiveThreshold) { return true; } } // Fuzzy matching for the entire text const similarity = calculateSimilarity(normalizedText, normalizedSearch); return similarity >= effectiveThreshold; } // Legacy single worker for backward compatibility let legacyWorker: Worker | null = null; let useWorkerPool = true; let isInitialized = false; /** * Initialize OCR with worker pool (recommended) or legacy single worker */ export async function initializeOCR(useLegacy = false): Promise<void> { // Don't reinitialize if already initialized unless explicitly forced if (isInitialized) { return; } useWorkerPool = !useLegacy; if (useWorkerPool) { try { await initializeOCRWorkerPool(); logger.info('OCR initialized with worker pool'); } catch (error) { logger.warn('Failed to initialize worker pool, falling back to legacy worker', error as Error); useWorkerPool = false; await initializeLegacyWorker(); } } else { await initializeLegacyWorker(); } isInitialized = true; } /** * Initialize legacy single worker */ async function initializeLegacyWorker(): Promise<void> { if (!legacyWorker) { legacyWorker = await createWorker('eng'); logger.info('OCR initialized with legacy single worker'); } } /** * Terminate OCR resources */ export async function terminateOCR(): Promise<void> { if (useWorkerPool) { await shutdownOCRWorkerPool(); } else if (legacyWorker) { await legacyWorker.terminate(); legacyWorker = null; } isInitialized = false; } async function performOCRWithCache(base64: string, priority = OCRTaskPriority.NORMAL): Promise<any> { // Clean cache periodically cleanCache(); // Check cache first if enabled if (currentConfig.cacheEnabled) { const hash = calculateImageHash(base64); const cacheKey = `ocr_${hash}`; const cachedEntry = ocrCache.get(cacheKey); if (cachedEntry && (Date.now() - cachedEntry.timestamp < currentConfig.cacheTTL)) { logger.debug('OCR cache hit', { hash }); return cachedEntry.result; } } logger.debug('OCR cache miss, performing recognition'); let result: any; try { if (useWorkerPool) { // Use worker pool for concurrent OCR operations const workerPool = getOCRWorkerPool(); result = await workerPool.recognize(base64, priority, currentConfig.timeoutMs); } else { // Legacy single worker implementation const timeoutPromise = new Promise<never>((_, reject) => { setTimeout(() => { reject(new TimeoutError('OCR recognition', currentConfig.timeoutMs)); }, currentConfig.timeoutMs); }); result = await Promise.race([ legacyWorker!.recognize(base64), timeoutPromise ]); } // Validate result structure if (!result?.data) { throw new OCRError('Invalid OCR result structure'); } // Cache the result if caching is enabled if (currentConfig.cacheEnabled) { const hash = calculateImageHash(base64); const cacheKey = `ocr_${hash}`; ocrCache.set(cacheKey, { timestamp: Date.now(), result, hash }); } return result; } catch (error) { if (error instanceof TimeoutError) { throw new OCRError(`OCR recognition timed out after ${currentConfig.timeoutMs}ms`); } if (error instanceof OCRError) { throw error; } throw new OCRError(`OCR recognition failed: ${error instanceof Error ? error.message : String(error)}`); } } export async function extractTextFromImage(image: Image, region?: Region, priority = OCRTaskPriority.NORMAL): Promise<string> { try { await initializeOCR(); logger.debug('Converting image to base64 for OCR...'); const base64 = await imageToBase64(image); if (!base64) { throw new OCRError('Failed to convert image to base64'); } logger.debug('Performing OCR text extraction...'); const result = await performOCRWithCache(base64, priority); const text = result.data.text; logger.debug('OCR text extraction completed', { textLength: text.length }); return text.trim(); } catch (error) { if (error instanceof TimeoutError || error instanceof OCRError) { throw error; } throw new OCRError(`Text extraction failed: ${error instanceof Error ? error.message : String(error)}`); } } export async function findTextInImage(image: Image, searchText: string, priority = OCRTaskPriority.NORMAL): Promise<boolean> { try { logger.debug('Searching for text in image', { searchText }); // Use the enhanced getTextLocations for consistent results with fuzzy matching const locations = await getTextLocations(image, searchText, undefined, priority); const found = locations.length > 0; logger.debug('Text search completed', { searchText, found, matchCount: locations.length, bestMatch: found ? locations[0].text : null }); return found; } catch (error) { logger.error('Error during text search', error as Error, { searchText }); return false; } } export interface TextLocation { text: string; x: number; y: number; width: number; height: number; confidence: number; } export async function getTextLocations(image: Image, searchText?: string, region?: Region, priority = OCRTaskPriority.NORMAL): Promise<TextLocation[]> { try { await initializeOCR(); logger.debug('Converting image for text location detection...', { hasSearchText: !!searchText }); const base64 = await imageToBase64(image); if (!base64) { throw new OCRError('Failed to convert image to base64'); } logger.debug('Detecting text locations...'); const result = await performOCRWithCache(base64, priority); const data = result.data; const allLocations: TextLocation[] = []; // Extract all text locations with confidence filtering const minConfidence = currentConfig.minConfidence; if ('words' in data && Array.isArray(data.words)) { for (const word of data.words as any[]) { if (word.confidence > minConfidence && word.text && word.text.trim().length > 0) { allLocations.push({ text: word.text.trim(), x: word.bbox.x0, y: word.bbox.y0, width: word.bbox.x1 - word.bbox.x0, height: word.bbox.y1 - word.bbox.y0, confidence: word.confidence }); } } } let filteredLocations = allLocations; // Filter by search text if provided if (searchText && searchText.trim().length > 0) { const searchTerm = searchText.trim(); logger.debug('Filtering locations by search text', { searchTerm, totalLocations: allLocations.length }); filteredLocations = allLocations.filter(location => { const matches = isTextMatch(location.text, searchTerm, currentConfig.fuzzyMatchThreshold); if (matches) { logger.debug('Text match found', { locationText: location.text, searchTerm, confidence: location.confidence }); } return matches; }); // If no fuzzy matches found, try with relaxed threshold if (filteredLocations.length === 0) { logger.debug('No matches with standard threshold, trying relaxed matching'); filteredLocations = allLocations.filter(location => isTextMatch(location.text, searchTerm, currentConfig.relaxedFuzzyThreshold) ); } // Sort by confidence (descending) and similarity filteredLocations.sort((a, b) => { const similarityA = calculateSimilarity(a.text, searchTerm); const similarityB = calculateSimilarity(b.text, searchTerm); // Prioritize by similarity first, then confidence if (Math.abs(similarityA - similarityB) > 0.1) { return similarityB - similarityA; } return b.confidence - a.confidence; }); } else { // No search text, return all locations sorted by confidence filteredLocations.sort((a, b) => b.confidence - a.confidence); } logger.debug('Text location detection completed', { totalLocations: allLocations.length, filteredLocations: filteredLocations.length, searchText: searchText || 'none' }); return filteredLocations; } catch (error) { if (error instanceof TimeoutError || error instanceof OCRError) { throw error; } throw new OCRError(`Failed to get text locations: ${error instanceof Error ? error.message : String(error)}`); } } /** * Get OCR worker pool metrics (only available when using worker pool) */ export function getOCRMetrics() { if (!useWorkerPool) { return null; } const workerPool = getOCRWorkerPool(); return workerPool.getMetrics(); } /** * Get detailed worker states (only available when using worker pool) */ export function getOCRWorkerStates() { if (!useWorkerPool) { return null; } const workerPool = getOCRWorkerPool(); return workerPool.getWorkerStates(); } /** * Check if OCR is using worker pool */ export function isUsingWorkerPool(): boolean { return useWorkerPool; } /** * Extract text from multiple images concurrently using worker pool * Falls back to sequential processing if using legacy worker */ export async function extractTextFromImages( images: Image[], priority = OCRTaskPriority.NORMAL ): Promise<string[]> { if (!useWorkerPool) { // Sequential processing for legacy worker const results: string[] = []; for (const image of images) { const text = await extractTextFromImage(image, undefined, priority); results.push(text); } return results; } // Concurrent processing with worker pool const promises = images.map(image => extractTextFromImage(image, undefined, priority)); return Promise.all(promises); } /** * Get text locations from multiple images concurrently using worker pool * Falls back to sequential processing if using legacy worker */ export async function getTextLocationsFromImages( images: Image[], priority = OCRTaskPriority.NORMAL ): Promise<TextLocation[][]> { if (!useWorkerPool) { // Sequential processing for legacy worker const results: TextLocation[][] = []; for (const image of images) { const locations = await getTextLocations(image, undefined, undefined, priority); results.push(locations); } return results; } // Concurrent processing with worker pool const promises = images.map(image => getTextLocations(image, undefined, undefined, priority)); return Promise.all(promises); } /** * Enhanced text search with configurable fuzzy matching */ export async function findTextWithOptions( image: Image, searchText: string, options: { region?: Region; priority?: OCRTaskPriority; fuzzyThreshold?: number; minConfidence?: number; } = {} ): Promise<TextLocation[]> { const { region, priority = OCRTaskPriority.NORMAL, minConfidence = currentConfig.minConfidence } = options; // Temporarily override config if options are provided const originalConfig = { ...currentConfig }; if (options.fuzzyThreshold !== undefined) { currentConfig.fuzzyMatchThreshold = options.fuzzyThreshold; } if (options.minConfidence !== undefined) { currentConfig.minConfidence = options.minConfidence; } try { const locations = await getTextLocations(image, searchText, region, priority); // Additional filtering by confidence if specified return locations.filter(loc => loc.confidence >= minConfidence); } catch (error) { logger.error('Error in enhanced text search', error as Error, { searchText, options }); return []; } finally { // Restore original config currentConfig = originalConfig; } } /** * Get cache statistics for monitoring */ export function getOCRCacheStats() { const now = Date.now(); let validEntries = 0; let expiredEntries = 0; for (const [, entry] of ocrCache.entries()) { if (now - entry.timestamp < currentConfig.cacheTTL) { validEntries++; } else { expiredEntries++; } } return { totalEntries: ocrCache.size, validEntries, expiredEntries, cacheHitRate: validEntries / Math.max(1, ocrCache.size), cacheEnabled: currentConfig.cacheEnabled, cacheTTL: currentConfig.cacheTTL, maxCacheSize: currentConfig.maxCacheSize }; } /** * Clear OCR cache manually */ export function clearOCRCache(): void { ocrCache.clear(); logger.info('OCR cache cleared'); } // Re-export worker pool types and enums for convenience export { OCRTaskPriority, type OCRWorkerPoolConfig, type PoolMetrics, type WorkerState } from './core/ocr-worker-pool.js';

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ohqay/macos-simulator-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

ocr-utils.ts•17 KiB