Skip to main content
Glama
josuekongolo

CompanyIQ MCP Server

by josuekongolo
test-pdf-parser.js3.02 kB
import pdfParse from 'pdf-parse'; import { readFile } from 'fs/promises'; import { resolve, dirname } from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); async function testPdfParser() { console.log('🔍 Testing PDF Parser with Real Norwegian Financial PDF\n'); // Use a PDF from Downloads folder const pdfPath = '/Users/josuekongolo/Downloads/aarsregnskap_999059198-2024.pdf'; try { console.log(`📄 Reading PDF: ${pdfPath}\n`); const dataBuffer = await readFile(pdfPath); const data = await pdfParse(dataBuffer); console.log('📊 PDF Info:'); console.log(`Pages: ${data.numpages}`); console.log(`Text length: ${data.text.length} characters\n`); console.log('📝 First 2000 characters of extracted text:'); console.log('━'.repeat(50)); console.log(data.text.substring(0, 2000)); console.log('━'.repeat(50)); console.log('\n🔍 Looking for key financial terms...\n'); // Look for common Norwegian financial terms const terms = [ 'Driftsinntekter', 'Salgsinntekt', 'Sum inntekter', 'Årsresultat', 'Resultat', 'Sum eiendeler', 'Egenkapital', 'Omsetning', 'Driftsresultat' ]; terms.forEach(term => { const index = data.text.indexOf(term); if (index !== -1) { // Extract 100 characters around the term const start = Math.max(0, index - 50); const end = Math.min(data.text.length, index + term.length + 100); const context = data.text.substring(start, end); console.log(`✅ Found "${term}":`); console.log(` ${context.replace(/\n/g, ' ')}`); console.log(); } else { console.log(`❌ Not found: "${term}"`); } }); console.log('\n🔍 Looking for number patterns...\n'); // Look for Norwegian number formats (spaces as thousand separators) const numberPatterns = [ /\d{1,3}(?:\s\d{3})*(?:,\d+)?/g, // Norwegian format: 1 234 567,89 /\d{1,3}(?:\.\d{3})*(?:,\d+)?/g, // Alternative: 1.234.567,89 /\d+(?:\s\d{3})+/g // Simple thousands: 123 456 ]; numberPatterns.forEach((pattern, i) => { const matches = data.text.match(pattern); if (matches && matches.length > 0) { // Filter to only show larger numbers (likely financial amounts) const largeNumbers = matches.filter(m => { const cleanNum = m.replace(/\s/g, '').replace(/\./g, '').replace(',', '.'); return parseFloat(cleanNum) > 10000; }); if (largeNumbers.length > 0) { console.log(`Pattern ${i + 1} found ${largeNumbers.length} large numbers:`); console.log(` Examples: ${largeNumbers.slice(0, 5).join(', ')}`); } } }); } catch (error) { console.error('❌ Error parsing PDF:', error.message); console.error('\nMake sure the PDF exists at:', pdfPath); } } testPdfParser();

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/josuekongolo/companyiq-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server