Skip to main content
Glama
josuekongolo

CompanyIQ MCP Server

by josuekongolo
test-openai-vision-extraction.jsβ€’7.29 kB
import { OpenAIVisionParser } from './build/scraper/openai_vision_parser.js'; import { readdirSync } from 'fs'; import { resolve } from 'path'; import { writeFile, mkdir } from 'fs/promises'; import { existsSync } from 'fs'; // Set your OpenAI API key here const OPENAI_API_KEY = process.env.OPENAI_API_KEY || "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"; async function testVisionExtraction() { console.log('πŸš€ Testing OpenAI Vision API PDF Extraction'); console.log('═'.repeat(50)); console.log('This will use OpenAI GPT-4 Vision to extract financial data'); console.log('from scanned Γ₯rsregnskap PDFs by converting them to images'); console.log('═'.repeat(50) + '\n'); // Check if API key is valid if (!OPENAI_API_KEY || OPENAI_API_KEY === 'your-api-key-here') { console.error('❌ Please set a valid OpenAI API key'); console.error(' Get your key from: https://platform.openai.com/api-keys'); return; } const parser = new OpenAIVisionParser(OPENAI_API_KEY); const pdfDir = './data/pdfs'; const outputDir = './data/extracted'; // Create output directory if it doesn't exist if (!existsSync(outputDir)) { await mkdir(outputDir, { recursive: true }); console.log(`πŸ“ Created output directory: ${outputDir}`); } // Get all PDF files const pdfFiles = readdirSync(pdfDir) .filter(file => file.endsWith('.pdf')) .sort(); console.log(`πŸ“ Found ${pdfFiles.length} PDF files in ${pdfDir}`); console.log('─'.repeat(50) + '\n'); const allResults = []; const successfulExtractions = []; for (const pdfFile of pdfFiles) { // Extract year from filename (format: aarsregnskap_999059198-YYYY.pdf) const yearMatch = pdfFile.match(/(\d{4})\.pdf$/); const year = yearMatch ? parseInt(yearMatch[1]) : null; console.log(`\nπŸ“„ Processing: ${pdfFile} (Year: ${year})`); console.log('─'.repeat(40)); const pdfPath = resolve(pdfDir, pdfFile); try { const startTime = Date.now(); console.log('πŸ”„ Converting PDF to images and extracting with Vision API...'); const result = await parser.parseFinancialPDF(pdfPath); const duration = Math.round((Date.now() - startTime) / 1000); // Add year to result const fullResult = { year, ...result, file: pdfFile, processingTime: duration, extractedAt: new Date().toISOString() }; allResults.push(fullResult); // Display results for this year if (result.revenue || result.profit || result.assets || result.equity) { console.log(`βœ… Data extracted in ${duration}s:`); if (result.revenue) console.log(` πŸ’° Revenue: ${(result.revenue / 1000000).toFixed(1)}M NOK`); if (result.profit !== null) console.log(` πŸ“Š Profit: ${(result.profit / 1000000).toFixed(1)}M NOK`); if (result.assets) console.log(` 🏒 Assets: ${(result.assets / 1000000).toFixed(1)}M NOK`); if (result.equity) console.log(` πŸ’Ž Equity: ${(result.equity / 1000000).toFixed(1)}M NOK`); successfulExtractions.push(fullResult); // Save individual year's data to JSON const outputFile = resolve(outputDir, `financial_data_${year}.json`); await writeFile(outputFile, JSON.stringify(fullResult, null, 2)); console.log(` πŸ’Ύ Saved to: ${outputFile}`); } else { console.log(`⚠️ No data extracted (${duration}s)`); console.log(' Trying Vision API with image conversion...'); } // Rate limiting for API calls if (pdfFiles.indexOf(pdfFile) < pdfFiles.length - 1) { console.log('⏳ Waiting 3 seconds before next PDF (API rate limiting)...'); await new Promise(resolve => setTimeout(resolve, 3000)); } } catch (error) { console.error(`❌ Error processing ${pdfFile}:`, error.message); if (error.status === 401) { console.error('\nπŸ”‘ API Key Authentication Error!'); console.error(' The OpenAI API key is invalid or expired.'); console.error(' Please provide a valid API key.'); break; // Stop processing if API key is invalid } allResults.push({ year, revenue: null, profit: null, assets: null, equity: null, file: pdfFile, error: error.message, extractedAt: new Date().toISOString() }); } } // Save combined results const combinedOutputFile = resolve(outputDir, 'all_financial_data.json'); await writeFile(combinedOutputFile, JSON.stringify(allResults, null, 2)); console.log(`\nπŸ’Ύ Saved all results to: ${combinedOutputFile}`); // Save summary const summary = { totalPdfs: allResults.length, successfulExtractions: successfulExtractions.length, failedExtractions: allResults.length - successfulExtractions.length, extractionDate: new Date().toISOString(), yearsCovered: allResults.map(r => r.year).filter(Boolean).sort(), financialSummary: successfulExtractions.map(r => ({ year: r.year, revenue: r.revenue, profit: r.profit, assets: r.assets, equity: r.equity })).sort((a, b) => (a.year || 0) - (b.year || 0)) }; const summaryFile = resolve(outputDir, 'extraction_summary.json'); await writeFile(summaryFile, JSON.stringify(summary, null, 2)); console.log(`πŸ’Ύ Saved summary to: ${summaryFile}`); // Summary console.log('\n' + '═'.repeat(50)); console.log('πŸ“Š EXTRACTION SUMMARY'); console.log('═'.repeat(50)); console.log(`\nTotal PDFs processed: ${allResults.length}`); console.log(`Successful extractions: ${successfulExtractions.length}`); console.log(`Failed extractions: ${allResults.length - successfulExtractions.length}`); if (successfulExtractions.length > 0) { console.log('\nπŸ“ˆ Years with extracted data:'); successfulExtractions.sort((a, b) => (a.year || 0) - (b.year || 0)); successfulExtractions.forEach(result => { console.log(`\n${result.year}:`); if (result.revenue) console.log(` Revenue: ${(result.revenue / 1000000).toFixed(1)}M NOK`); if (result.profit !== null) console.log(` Profit: ${(result.profit / 1000000).toFixed(1)}M NOK`); if (result.assets) console.log(` Assets: ${(result.assets / 1000000).toFixed(1)}M NOK`); if (result.equity) console.log(` Equity: ${(result.equity / 1000000).toFixed(1)}M NOK`); }); // Calculate growth if we have multiple years if (successfulExtractions.length > 1) { const firstYear = successfulExtractions[0]; const lastYear = successfulExtractions[successfulExtractions.length - 1]; if (firstYear.revenue && lastYear.revenue) { const revenueGrowth = ((lastYear.revenue - firstYear.revenue) / firstYear.revenue * 100).toFixed(1); console.log(`\nπŸ“Š Revenue growth (${firstYear.year}-${lastYear.year}): ${revenueGrowth}%`); } } } console.log('\n' + '═'.repeat(50)); console.log('πŸŽ‰ Vision API extraction complete!'); console.log(`πŸ“ All extracted data saved to: ${outputDir}`); console.log('═'.repeat(50) + '\n'); } testVisionExtraction().catch(console.error);

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/josuekongolo/companyiq-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server