import dotenv from 'dotenv';
dotenv.config();
import { OpenAIVisionParser } from './build/scraper/openai_vision_parser.js';
import { existsSync, readdirSync } from 'fs';
import { resolve } from 'path';
async function testPngConversion() {
console.log('π§ͺ Testing PNG Conversion Process');
console.log('ββββββββββββββββββββββββββββββββββββββ\n');
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
console.error('β OPENAI_API_KEY not found!');
process.exit(1);
}
try {
console.log('π Initializing OpenAI Vision Parser...');
const parser = new OpenAIVisionParser(apiKey);
// Test with an existing PDF
// First check what PDFs are available
const pdfFolders = ['data/pdfs/984562861', 'data/pdfs/999059198', 'data/pdfs/temp'];
let testPdf = null;
for (const folder of pdfFolders) {
if (existsSync(folder)) {
const files = readdirSync(folder).filter(f => f.endsWith('.pdf'));
if (files.length > 0) {
testPdf = resolve(folder, files[0]);
break;
}
}
}
if (!testPdf) {
console.error(`β No test PDFs found in any folder`);
console.error(' Please run browser scraper first to download PDFs');
process.exit(1);
}
console.log(`β
Found test PDF: ${testPdf}`);
console.log(`π Working directory: ${process.cwd()}`);
console.log('\nπ Starting PDF parsing (will convert to PNG)...\n');
const startTime = Date.now();
const result = await parser.parseFinancialPDF(testPdf);
const duration = Math.round((Date.now() - startTime) / 1000);
console.log('\nββββββββββββββββββββββββββββββββββββββ');
console.log('π EXTRACTION RESULTS');
console.log('ββββββββββββββββββββββββββββββββββββββ\n');
console.log(`β±οΈ Time taken: ${duration} seconds`);
console.log(`π° Revenue: ${result.revenue ? (result.revenue/1000000).toFixed(1) + 'M NOK' : 'Not found'}`);
console.log(`π Profit: ${result.profit ? (result.profit/1000000).toFixed(1) + 'M NOK' : 'Not found'}`);
console.log(`π’ Assets: ${result.assets ? (result.assets/1000000).toFixed(1) + 'M NOK' : 'Not found'}`);
console.log(`π Equity: ${result.equity ? (result.equity/1000000).toFixed(1) + 'M NOK' : 'Not found'}`);
// Check if PNG files were created
// Extract org number and filename from test PDF
const pdfName = testPdf.split('/').pop().replace('.pdf', '');
const orgMatch = pdfName.match(/(\d{9})/);
const orgNr = orgMatch ? orgMatch[1] : 'unknown';
const pngFolder = resolve('data/pdfs/png_images', orgNr, pdfName);
console.log(`\nπ PNG folder: ${pngFolder}`);
if (existsSync(pngFolder)) {
const pngFiles = readdirSync(pngFolder).filter(f => f.endsWith('.png'));
console.log(`β
Created ${pngFiles.length} PNG files`);
if (pngFiles.length > 0) {
console.log(` Sample files: ${pngFiles.slice(0, 3).join(', ')}...`);
}
} else {
console.log('β οΈ PNG folder not found - conversion might have failed');
}
if (result.revenue || result.profit || result.assets || result.equity) {
console.log('\nπ PNG conversion and Vision API extraction successful!');
} else {
console.log('\nβ οΈ No data extracted - check the logs above for issues');
}
} catch (error) {
console.error('\nβ Test failed:', error.message || error);
console.error('\nStack trace:', error.stack);
}
}
testPngConversion();