import { OpenAIVisionParser } from './build/scraper/openai_vision_parser.js';
import { readdirSync } from 'fs';
import { resolve } from 'path';
// Set your OpenAI API key here
const OPENAI_API_KEY = process.env.OPENAI_API_KEY || "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";
async function testOpenAIOnAllPDFs() {
console.log('🚀 Testing OpenAI API on All Downloaded PDFs');
console.log('═'.repeat(50));
console.log('This will use OpenAI GPT-4 to extract financial data');
console.log('from all downloaded årsregnskap PDFs (2012-2023)');
console.log('═'.repeat(50) + '\n');
// Check if API key is valid
if (!OPENAI_API_KEY || OPENAI_API_KEY === 'your-api-key-here') {
console.error('❌ Please set a valid OpenAI API key');
console.error(' Get your key from: https://platform.openai.com/api-keys');
console.error(' Then set it in this file or as environment variable OPENAI_API_KEY');
return;
}
const parser = new OpenAIVisionParser(OPENAI_API_KEY);
const pdfDir = './data/pdfs';
// Get all PDF files
const pdfFiles = readdirSync(pdfDir)
.filter(file => file.endsWith('.pdf'))
.sort();
console.log(`📁 Found ${pdfFiles.length} PDF files in ${pdfDir}`);
console.log('─'.repeat(50) + '\n');
const results = [];
for (const pdfFile of pdfFiles) {
// Extract year from filename (format: aarsregnskap_999059198-YYYY.pdf)
const yearMatch = pdfFile.match(/(\d{4})\.pdf$/);
const year = yearMatch ? parseInt(yearMatch[1]) : null;
console.log(`\n📄 Processing: ${pdfFile} (Year: ${year})`);
console.log('─'.repeat(40));
const pdfPath = resolve(pdfDir, pdfFile);
try {
const startTime = Date.now();
const result = await parser.parseFinancialPDF(pdfPath);
const duration = Math.round((Date.now() - startTime) / 1000);
// Store result with year
results.push({
year,
...result,
file: pdfFile,
processingTime: duration
});
// Display results for this year
if (result.revenue || result.profit || result.assets || result.equity) {
console.log(`✅ Data extracted in ${duration}s:`);
if (result.revenue) console.log(` 💰 Revenue: ${(result.revenue / 1000000).toFixed(1)}M NOK`);
if (result.profit !== null) console.log(` 📊 Profit: ${(result.profit / 1000000).toFixed(1)}M NOK`);
if (result.assets) console.log(` 🏢 Assets: ${(result.assets / 1000000).toFixed(1)}M NOK`);
if (result.equity) console.log(` 💎 Equity: ${(result.equity / 1000000).toFixed(1)}M NOK`);
} else {
console.log(`⚠️ No data extracted (${duration}s)`);
console.log(' PDF might be pure image-based without text');
}
// Rate limiting for API calls
if (pdfFiles.indexOf(pdfFile) < pdfFiles.length - 1) {
console.log('⏳ Waiting 2 seconds before next PDF (API rate limiting)...');
await new Promise(resolve => setTimeout(resolve, 2000));
}
} catch (error) {
console.error(`❌ Error processing ${pdfFile}:`, error.message);
if (error.status === 401) {
console.error('\n🔑 API Key Authentication Error!');
console.error(' The OpenAI API key is invalid or expired.');
console.error(' Please provide a valid API key.');
break; // Stop processing if API key is invalid
}
results.push({
year,
revenue: null,
profit: null,
assets: null,
equity: null,
file: pdfFile,
error: error.message
});
}
}
// Summary
console.log('\n' + '═'.repeat(50));
console.log('📊 SUMMARY OF ALL YEARS');
console.log('═'.repeat(50));
const successfulYears = results.filter(r =>
r.revenue !== null || r.profit !== null ||
r.assets !== null || r.equity !== null
);
console.log(`\nTotal PDFs processed: ${results.length}`);
console.log(`Successful extractions: ${successfulYears.length}`);
console.log(`Failed extractions: ${results.length - successfulYears.length}`);
if (successfulYears.length > 0) {
console.log('\n📈 Years with extracted data:');
successfulYears.sort((a, b) => (a.year || 0) - (b.year || 0));
successfulYears.forEach(result => {
console.log(`\n${result.year}:`);
if (result.revenue) console.log(` Revenue: ${(result.revenue / 1000000).toFixed(1)}M NOK`);
if (result.profit !== null) console.log(` Profit: ${(result.profit / 1000000).toFixed(1)}M NOK`);
if (result.assets) console.log(` Assets: ${(result.assets / 1000000).toFixed(1)}M NOK`);
if (result.equity) console.log(` Equity: ${(result.equity / 1000000).toFixed(1)}M NOK`);
});
// Calculate growth if we have multiple years
if (successfulYears.length > 1) {
const firstYear = successfulYears[0];
const lastYear = successfulYears[successfulYears.length - 1];
if (firstYear.revenue && lastYear.revenue) {
const revenueGrowth = ((lastYear.revenue - firstYear.revenue) / firstYear.revenue * 100).toFixed(1);
console.log(`\n📊 Revenue growth (${firstYear.year}-${lastYear.year}): ${revenueGrowth}%`);
}
}
}
console.log('\n' + '═'.repeat(50));
console.log('Test complete!\n');
if (results.length === 0 || successfulYears.length === 0) {
console.log('💡 Tips:');
console.log('1. Make sure you have a valid OpenAI API key');
console.log('2. The PDFs from Brønnøysund are image-based (scanned)');
console.log('3. Consider using GPT-4 Vision API for better results');
console.log('4. Or implement PDF-to-image conversion first');
}
}
testOpenAIOnAllPDFs().catch(console.error);