Calibre RAG MCP Server

test-ocr-full.js•4.73 kB

#!/usr/bin/env node const tesseract = require('node-tesseract-ocr'); const pdf2pic = require('pdf2pic'); const fs = require('fs'); const path = require('path'); const os = require('os'); async function testOCROnLibrary() { console.log('🔍 Testing OCR on library files...'); try { // Set up Tesseract path const tesseractPath = 'C:\\\\Program Files\\\\Tesseract-OCR\\\\tesseract.exe'; if (fs.existsSync(tesseractPath)) { console.log('✅ Found Tesseract at:', tesseractPath); process.env.TESSERACT_BINARY_PATH = tesseractPath; } else { console.log('⚠️ Using Tesseract from PATH'); } const libraryPath = 'D:\\\\e-library'; console.log(`🔍 Searching for PDF files in: ${libraryPath}`); // Find first PDF file function findFirstPDF(dir) { try { const items = fs.readdirSync(dir); for (const item of items.slice(0, 50)) { // Check first 50 items const fullPath = path.join(dir, item); const stat = fs.statSync(fullPath); if (stat.isFile() && item.toLowerCase().endsWith('.pdf')) { return fullPath; } else if (stat.isDirectory()) { const found = findFirstPDF(fullPath); if (found) return found; } } } catch (error) { // Skip directories we can't access } return null; } const pdfFile = findFirstPDF(libraryPath); if (!pdfFile) { console.log('❌ No PDF files found in library'); return; } console.log('📄 Found PDF file:', pdfFile); console.log('📏 File size:', Math.round(fs.statSync(pdfFile).size / 1024), 'KB'); // Test PDF to image conversion console.log('\\n🔄 Testing PDF to image conversion...'); const tempDir = path.join(os.tmpdir(), 'ocr-test'); if (!fs.existsSync(tempDir)) { fs.mkdirSync(tempDir, { recursive: true }); } const convert = pdf2pic.fromPath(pdfFile, { density: 150, // Lower density for faster testing saveFilename: 'test-page', savePath: tempDir, format: 'png', width: 1000, height: 1000 }); try { console.log('🖼️ Converting first page to image...'); const result = await convert(1); if (result && result.path && fs.existsSync(result.path)) { console.log('✅ PDF conversion successful:', result.path); console.log('📏 Image size:', Math.round(fs.statSync(result.path).size / 1024), 'KB'); // Test OCR on the image console.log('\\n🔍 Testing OCR on converted image...'); const ocrConfig = { lang: 'eng', oem: 1, psm: 3, }; const startTime = Date.now(); const text = await tesseract.recognize(result.path, ocrConfig); const duration = Date.now() - startTime; console.log('✅ OCR completed in', duration, 'ms'); console.log('📝 Text length:', text.length, 'characters'); if (text.length > 0) { console.log('📄 Sample text (first 200 chars):'); console.log(text.substring(0, 200)); console.log('\\n🎉 OCR is working correctly!'); } else { console.log('⚠️ OCR returned empty text - the page might be blank or image quality poor'); } // Clean up try { fs.unlinkSync(result.path); fs.rmSync(tempDir, { recursive: true, force: true }); } catch (e) { // Ignore cleanup errors } } else { console.log('❌ PDF conversion failed - no image created'); } } catch (conversionError) { console.log('❌ PDF conversion error:', conversionError.message); } } catch (error) { console.error('❌ OCR test failed:', error.message); console.error('Stack:', error.stack); } } testOCROnLibrary();

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ispyridis/calibre-rag-mcp-nodejs'

If you have feedback or need assistance with the MCP directory API, please join our Discord server