test-ocr-simple.jsā¢2.39 kB
#!/usr/bin/env node
// Simple OCR test
const tesseract = require('node-tesseract-ocr');
const fs = require('fs');
const path = require('path');
async function testOCR() {
try {
console.log('š Testing Tesseract OCR integration...');
// Check if Tesseract is accessible
const tesseractPath = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe';
if (fs.existsSync(tesseractPath)) {
console.log('ā
Tesseract found at:', tesseractPath);
process.env.TESSERACT_BINARY_PATH = tesseractPath;
} else {
console.log('ā ļø Using Tesseract from PATH');
}
const ocrConfig = {
lang: 'eng',
oem: 1,
psm: 3,
};
console.log('š OCR configuration:', ocrConfig);
console.log('ā
OCR test setup completed!');
// Test if we can find some sample PDFs
const libraryPath = 'D:\\e-library';
console.log(`š Checking library at: ${libraryPath}`);
if (fs.existsSync(libraryPath)) {
console.log('ā
Library directory exists');
// Look for some PDF files
const findPDFs = (dir, files = []) => {
const items = fs.readdirSync(dir);
for (const item of items.slice(0, 20)) { // Only check first 20 items
const fullPath = path.join(dir, item);
const stat = fs.statSync(fullPath);
if (stat.isFile() && item.toLowerCase().endsWith('.pdf')) {
files.push(fullPath);
if (files.length >= 3) break; // Only need a few examples
}
}
return files;
};
const pdfFiles = findPDFs(libraryPath);
console.log(`Found ${pdfFiles.length} PDF files for testing`);
if (pdfFiles.length > 0) {
console.log('Example PDF:', pdfFiles[0]);
}
} else {
console.log('ā Library directory not found');
}
console.log('š OCR test completed!');
} catch (error) {
console.error('ā OCR test failed:', error.message);
console.error('Stack:', error.stack);
}
}
testOCR();