Skip to main content
Glama
josuekongolo

CompanyIQ MCP Server

by josuekongolo
test-openai-all-pdfs.js5.83 kB
import { OpenAIVisionParser } from './build/scraper/openai_vision_parser.js'; import { readdirSync } from 'fs'; import { resolve } from 'path'; // Set your OpenAI API key here const OPENAI_API_KEY = process.env.OPENAI_API_KEY || "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"; async function testOpenAIOnAllPDFs() { console.log('🚀 Testing OpenAI API on All Downloaded PDFs'); console.log('═'.repeat(50)); console.log('This will use OpenAI GPT-4 to extract financial data'); console.log('from all downloaded årsregnskap PDFs (2012-2023)'); console.log('═'.repeat(50) + '\n'); // Check if API key is valid if (!OPENAI_API_KEY || OPENAI_API_KEY === 'your-api-key-here') { console.error('❌ Please set a valid OpenAI API key'); console.error(' Get your key from: https://platform.openai.com/api-keys'); console.error(' Then set it in this file or as environment variable OPENAI_API_KEY'); return; } const parser = new OpenAIVisionParser(OPENAI_API_KEY); const pdfDir = './data/pdfs'; // Get all PDF files const pdfFiles = readdirSync(pdfDir) .filter(file => file.endsWith('.pdf')) .sort(); console.log(`📁 Found ${pdfFiles.length} PDF files in ${pdfDir}`); console.log('─'.repeat(50) + '\n'); const results = []; for (const pdfFile of pdfFiles) { // Extract year from filename (format: aarsregnskap_999059198-YYYY.pdf) const yearMatch = pdfFile.match(/(\d{4})\.pdf$/); const year = yearMatch ? parseInt(yearMatch[1]) : null; console.log(`\n📄 Processing: ${pdfFile} (Year: ${year})`); console.log('─'.repeat(40)); const pdfPath = resolve(pdfDir, pdfFile); try { const startTime = Date.now(); const result = await parser.parseFinancialPDF(pdfPath); const duration = Math.round((Date.now() - startTime) / 1000); // Store result with year results.push({ year, ...result, file: pdfFile, processingTime: duration }); // Display results for this year if (result.revenue || result.profit || result.assets || result.equity) { console.log(`✅ Data extracted in ${duration}s:`); if (result.revenue) console.log(` 💰 Revenue: ${(result.revenue / 1000000).toFixed(1)}M NOK`); if (result.profit !== null) console.log(` 📊 Profit: ${(result.profit / 1000000).toFixed(1)}M NOK`); if (result.assets) console.log(` 🏢 Assets: ${(result.assets / 1000000).toFixed(1)}M NOK`); if (result.equity) console.log(` 💎 Equity: ${(result.equity / 1000000).toFixed(1)}M NOK`); } else { console.log(`⚠️ No data extracted (${duration}s)`); console.log(' PDF might be pure image-based without text'); } // Rate limiting for API calls if (pdfFiles.indexOf(pdfFile) < pdfFiles.length - 1) { console.log('⏳ Waiting 2 seconds before next PDF (API rate limiting)...'); await new Promise(resolve => setTimeout(resolve, 2000)); } } catch (error) { console.error(`❌ Error processing ${pdfFile}:`, error.message); if (error.status === 401) { console.error('\n🔑 API Key Authentication Error!'); console.error(' The OpenAI API key is invalid or expired.'); console.error(' Please provide a valid API key.'); break; // Stop processing if API key is invalid } results.push({ year, revenue: null, profit: null, assets: null, equity: null, file: pdfFile, error: error.message }); } } // Summary console.log('\n' + '═'.repeat(50)); console.log('📊 SUMMARY OF ALL YEARS'); console.log('═'.repeat(50)); const successfulYears = results.filter(r => r.revenue !== null || r.profit !== null || r.assets !== null || r.equity !== null ); console.log(`\nTotal PDFs processed: ${results.length}`); console.log(`Successful extractions: ${successfulYears.length}`); console.log(`Failed extractions: ${results.length - successfulYears.length}`); if (successfulYears.length > 0) { console.log('\n📈 Years with extracted data:'); successfulYears.sort((a, b) => (a.year || 0) - (b.year || 0)); successfulYears.forEach(result => { console.log(`\n${result.year}:`); if (result.revenue) console.log(` Revenue: ${(result.revenue / 1000000).toFixed(1)}M NOK`); if (result.profit !== null) console.log(` Profit: ${(result.profit / 1000000).toFixed(1)}M NOK`); if (result.assets) console.log(` Assets: ${(result.assets / 1000000).toFixed(1)}M NOK`); if (result.equity) console.log(` Equity: ${(result.equity / 1000000).toFixed(1)}M NOK`); }); // Calculate growth if we have multiple years if (successfulYears.length > 1) { const firstYear = successfulYears[0]; const lastYear = successfulYears[successfulYears.length - 1]; if (firstYear.revenue && lastYear.revenue) { const revenueGrowth = ((lastYear.revenue - firstYear.revenue) / firstYear.revenue * 100).toFixed(1); console.log(`\n📊 Revenue growth (${firstYear.year}-${lastYear.year}): ${revenueGrowth}%`); } } } console.log('\n' + '═'.repeat(50)); console.log('Test complete!\n'); if (results.length === 0 || successfulYears.length === 0) { console.log('💡 Tips:'); console.log('1. Make sure you have a valid OpenAI API key'); console.log('2. The PDFs from Brønnøysund are image-based (scanned)'); console.log('3. Consider using GPT-4 Vision API for better results'); console.log('4. Or implement PDF-to-image conversion first'); } } testOpenAIOnAllPDFs().catch(console.error);

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/josuekongolo/companyiq-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server