Skip to main content
Glama
josuekongolo

CompanyIQ MCP Server

by josuekongolo
test-ocr-parser.js2.77 kB
import { OCRPDFParser } from './build/scraper/ocr_pdf_parser.js'; async function testOCRParser() { console.log('🔍 Testing OCR PDF Parser'); console.log('═'.repeat(50)); console.log('This will use Tesseract OCR to extract text from image PDFs'); console.log('First run may take time to download language models...\n'); const parser = new OCRPDFParser(); const pdfPath = '/Users/josuekongolo/Downloads/aarsregnskap_999059198-2024.pdf'; console.log(`📄 Testing with: ${pdfPath.split('/').pop()}`); console.log('─'.repeat(50) + '\n'); try { const startTime = Date.now(); console.log('🤖 Starting OCR process...\n'); const result = await parser.parseFinancialPDF(pdfPath); const duration = Math.round((Date.now() - startTime) / 1000); console.log('\n' + '═'.repeat(50)); console.log('📊 EXTRACTED FINANCIAL DATA'); console.log('═'.repeat(50)); console.log(`\n💰 Revenue: ${result.revenue ? (result.revenue / 1000000).toFixed(1) + 'M NOK' : 'Not found'}`); console.log(`📊 Profit: ${result.profit ? (result.profit / 1000000).toFixed(1) + 'M NOK' : 'Not found'}`); console.log(`🏢 Assets: ${result.assets ? (result.assets / 1000000).toFixed(1) + 'M NOK' : 'Not found'}`); console.log(`💎 Equity: ${result.equity ? (result.equity / 1000000).toFixed(1) + 'M NOK' : 'Not found'}`); console.log(`\n⏱️ Time taken: ${duration} seconds`); if (result.revenue || result.profit || result.assets || result.equity) { console.log('\n✅ SUCCESS! OCR extracted financial data from the image PDF!'); // Compare with expected values if known console.log('\n📋 Expected values for 2024 (from API):'); console.log(' Revenue: 831.0M NOK'); console.log(' Profit: 81.3M NOK'); if (result.revenue) { const accuracy = Math.abs(result.revenue - 831000000) / 831000000 * 100; console.log(`\n📈 Revenue extraction accuracy: ${(100 - accuracy).toFixed(1)}%`); } } else { console.log('\n⚠️ No financial data extracted.'); console.log('Possible reasons:'); console.log('- PDF might be encrypted or protected'); console.log('- Text might be in unexpected format'); console.log('- OCR quality might need improvement'); console.log('\n💡 Try with different PDFs or adjust OCR settings'); } } catch (error) { console.error('\n❌ Test failed:', error.message || error); console.error('\nMake sure:'); console.error('1. The PDF file exists'); console.error('2. Tesseract language data is downloaded'); console.error('3. pdf2pic dependencies are installed'); } console.log('\n' + '═'.repeat(50)); console.log('Test complete!\n'); } testOCRParser();

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/josuekongolo/companyiq-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server