import puppeteer from 'puppeteer';
async function debugBrowserScraper() {
console.log('🔍 DEBUG: Browser Scraper Test');
console.log('═'.repeat(50));
const orgNr = '999059198';
let browser;
try {
console.log('🌐 Launching browser...');
browser = await puppeteer.launch({
executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
headless: false, // Visible browser
args: [
'--no-sandbox',
'--disable-setuid-sandbox'
],
defaultViewport: null
});
const page = await browser.newPage();
// Navigate to the company page
const url = `https://virksomhet.brreg.no/nb/oppslag/enheter/${orgNr}`;
console.log(`📄 Navigating to: ${url}`);
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 60000
});
console.log('⏳ Waiting for page to load...');
await page.waitForTimeout(5000);
// Scroll to load content
console.log('📜 Scrolling to load all content...');
await page.evaluate(async () => {
// Scroll down page by page
const scrollHeight = document.body.scrollHeight;
let currentPosition = 0;
const scrollStep = window.innerHeight;
while (currentPosition < scrollHeight) {
window.scrollTo(0, currentPosition);
currentPosition += scrollStep;
await new Promise(r => setTimeout(r, 500));
}
// Scroll back to top
window.scrollTo(0, 0);
});
await page.waitForTimeout(2000);
// Debug: Check what's on the page
console.log('\n🔍 DEBUG: Checking page content...');
const pageInfo = await page.evaluate(() => {
// Find all buttons
const allButtons = Array.from(document.querySelectorAll('button'));
const buttonTexts = allButtons.map(b => b.textContent?.trim() || '');
// Find "Vis flere" buttons specifically
const visFlereButtons = allButtons.filter(button => {
const text = button.textContent?.toLowerCase() || '';
return text.includes('vis flere');
});
// Find all elements with download test IDs
const downloadElements = Array.from(document.querySelectorAll('[data-testid*="download"]'));
const downloadTestIds = downloadElements.map(el => el.getAttribute('data-testid'));
// Find årsregnskap section
const aarsregnskapSection = document.querySelector('[data-testid="aarsregnskap"]') ||
Array.from(document.querySelectorAll('h1, h2, h3, h4')).find(el =>
el.textContent?.includes('Årsregnskap')
);
// Get all year mentions
const yearPattern = /\b20\d{2}\b/g;
const bodyText = document.body.innerText;
const years = [...new Set(bodyText.match(yearPattern) || [])].sort().reverse();
return {
totalButtons: allButtons.length,
buttonTexts: buttonTexts.slice(0, 20), // First 20 button texts
visFlereCount: visFlereButtons.length,
downloadElementsCount: downloadElements.length,
downloadTestIds: downloadTestIds.slice(0, 10),
hasAarsregnskapSection: !!aarsregnskapSection,
yearsFound: years,
pageTitle: document.title
};
});
console.log('📊 Page Analysis:');
console.log(` - Page Title: ${pageInfo.pageTitle}`);
console.log(` - Total buttons: ${pageInfo.totalButtons}`);
console.log(` - "Vis flere" buttons found: ${pageInfo.visFlereCount}`);
console.log(` - Download elements: ${pageInfo.downloadElementsCount}`);
console.log(` - Has Årsregnskap section: ${pageInfo.hasAarsregnskapSection}`);
console.log(` - Years mentioned on page: ${pageInfo.yearsFound.join(', ')}`);
if (pageInfo.buttonTexts.length > 0) {
console.log('\n Button samples:');
pageInfo.buttonTexts.forEach((text, i) => {
if (text) console.log(` ${i + 1}. "${text}"`);
});
}
if (pageInfo.downloadTestIds.length > 0) {
console.log('\n Download test IDs found:');
pageInfo.downloadTestIds.forEach(id => {
console.log(` - ${id}`);
});
}
// Try to find and click "Vis flere" buttons
console.log('\n🔘 Attempting to click "Vis flere" buttons...');
let totalClicks = 0;
for (let attempt = 1; attempt <= 10; attempt++) {
const clicked = await page.evaluate(() => {
const buttons = Array.from(document.querySelectorAll('button'));
let clickedAny = false;
buttons.forEach(button => {
const text = button.textContent?.toLowerCase() || '';
// Check for various "show more" patterns
if ((text.includes('vis flere') ||
text.includes('vis mer') ||
text.includes('last inn') ||
text.includes('se alle')) &&
!button.disabled) {
try {
button.click();
clickedAny = true;
console.log(`Clicked button: "${button.textContent}"`);
} catch (e) {
// Ignore
}
}
});
return clickedAny;
});
if (clicked) {
totalClicks++;
console.log(` ✓ Clicked "Vis flere" (attempt ${attempt})`);
await page.waitForTimeout(3000); // Wait for content to load
} else {
console.log(` - No more buttons to click (after ${totalClicks} total clicks)`);
break;
}
}
// Re-check after clicking
console.log('\n🔍 Re-checking after clicks...');
const finalInfo = await page.evaluate(() => {
const downloadElements = Array.from(document.querySelectorAll('[data-testid*="download-aarsregnskap"]'));
const years = downloadElements.map(el => {
const testId = el.getAttribute('data-testid') || '';
const match = testId.match(/(\d{4})/);
return match ? match[1] : null;
}).filter(Boolean);
return {
downloadCount: downloadElements.length,
yearsAvailable: [...new Set(years)].sort().reverse()
};
});
console.log(`\n📊 Final Results:`);
console.log(` - Download buttons found: ${finalInfo.downloadCount}`);
console.log(` - Years available: ${finalInfo.yearsAvailable.join(', ')}`);
// Keep browser open for manual inspection
console.log('\n⏸️ Browser will stay open for 10 seconds for manual inspection...');
await page.waitForTimeout(10000);
} catch (error) {
console.error('❌ Error:', error);
} finally {
if (browser) {
await browser.close();
console.log('🔒 Browser closed');
}
}
}
debugBrowserScraper();