import { z } from 'zod';
import { CompanyDatabase } from '../database/db.js';
import { BrregClient } from '../apis/brreg.js';
import { BrowserScraper } from '../scraper/browser_scraper.js';
import OpenAI from 'openai';
const AutoScrapeFinancialsSchema = z.object({
org_nr: z.string().describe("Organisasjonsnummer"),
auto_import: z.boolean().default(true).describe("Automatisk lagre til database"),
use_api_first: z.boolean().default(true).describe("Prøv API først før scraping"),
include_analysis: z.boolean().default(true).describe("Inkluder AI-basert finansiell analyse"),
force_rescrape: z.boolean().default(false).describe("Tving ny scraping selv om data finnes i database")
});
/**
* 🤖 FULL AUTOMATION: Scrape ALL available years from Brønnøysund website
* Uses headless browser to download and parse PDFs automatically
* NO MANUAL WORK REQUIRED!
*/
/**
* Generate AI-powered financial analysis using OpenAI
*/
async function generateFinancialAnalysis(company: any, financialData: any[]): Promise<string> {
if (!process.env.OPENAI_API_KEY) {
return '';
}
try {
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const financialSummary = financialData.map(d => ({
year: d.year,
revenue: d.revenue,
profit: d.profit,
assets: d.assets,
equity: d.equity
}));
const response = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{
role: "system",
content: "Du er en ekspert finansanalytiker som analyserer norske selskaper. Gi en kort, konsis analyse på norsk."
},
{
role: "user",
content: `Analyser dette norske selskapet:
Selskap: ${company.name}
Bransje: ${company.nace_description || company.nace_code || 'Ukjent'}
Finansielle data (siste ${financialData.length} år):
${JSON.stringify(financialSummary, null, 2)}
Gi en kort analyse (3-5 punkter) av:
1. Omsetningsutvikling og veksttrender
2. Lønnsomhet og marginer
3. Finansiell styrke (egenkapital/gjeld)
4. Nøkkeltall som skiller seg ut
5. Kort risiko-vurdering
Bruk norsk format (NOK, norske termer). Vær konsis og fokuser på det viktigste.`
}
],
max_tokens: 800,
temperature: 0.3
});
return response.choices[0]?.message?.content || '';
} catch (error) {
console.error('AI analysis failed:', error);
return '';
}
}
export async function autoScrapeFinancials(args: unknown, db: CompanyDatabase, brreg: BrregClient) {
const params = AutoScrapeFinancialsSchema.parse(args);
try {
console.error(`🌐 ALWAYS checking website first with Chromium browser...`);
console.error(`📊 This ensures we get ALL available years for ANY company`);
// Verify company exists
let company = await db.getCompany(params.org_nr);
if (!company) {
const brregCompany = await brreg.getCompany(params.org_nr);
if (!brregCompany) {
return {
content: [{
type: "text" as const,
text: `Fant ikke selskap med org.nr ${params.org_nr}`
}]
};
}
await db.insertOrUpdateCompany({
org_nr: brregCompany.organisasjonsnummer,
name: brregCompany.navn,
organization_form: brregCompany.organisasjonsform?.beskrivelse,
nace_code: brregCompany.naeringskode1?.kode,
employees_count: brregCompany.antallAnsatte,
last_updated: new Date().toISOString()
} as any);
company = await db.getCompany(params.org_nr);
}
const results: any[] = [];
// FIRST: Check database for existing data (unless force_rescrape is true)
if (!params.force_rescrape) {
console.error(`🔍 Checking database for existing financial data...`);
const existingData = await db.getFinancialHistory(params.org_nr, 10); // Get last 10 years
if (existingData && existingData.length > 0) {
console.error(`✅ Found ${existingData.length} years in database!`);
// Check if we have recent data (within last year)
const currentYear = new Date().getFullYear();
const latestYear = Math.max(...existingData.map((d: any) => d.year));
if (latestYear >= currentYear - 1 && existingData.length >= 3) {
console.error(`📊 Database has recent data (up to ${latestYear}) - no scraping needed!`);
// Add AI analysis if requested
let aiAnalysis = '';
if (params.include_analysis && existingData.length > 0) {
console.error(`🤖 Generating AI-powered financial analysis...`);
aiAnalysis = await generateFinancialAnalysis(company, existingData);
}
// Calculate growth if we have multiple years
let growthAnalysis = '';
if (existingData.length >= 2) {
const latest: any = existingData[0];
const oldest: any = existingData[existingData.length - 1];
if (latest.revenue && oldest.revenue && oldest.revenue > 0) {
const totalGrowth = ((latest.revenue - oldest.revenue) / oldest.revenue) * 100;
const years = latest.year - oldest.year;
const cagr = (Math.pow(latest.revenue / oldest.revenue, 1 / years) - 1) * 100;
growthAnalysis = `
📈 ${years}-ÅRS VEKSTANALYSE:
- Omsetningsvekst: ${totalGrowth > 0 ? '+' : ''}${totalGrowth.toFixed(1)}%
- CAGR: ${cagr.toFixed(1)}% per år
${totalGrowth > 30 ? '🚀 HØYVEKST!' : totalGrowth > 15 ? '📈 God vekst' : '➡️ Stabil'}`;
}
}
// Return cached data immediately
return {
content: [{
type: "text" as const,
text: `
🎯 DATA HENTET FRA DATABASE (INSTANT!)
📊 ${company?.name} - FINANSIELLE DATA:
${existingData.map((d: any) =>
`${d.year}: ${d.revenue ? (d.revenue / 1000000).toFixed(0) + 'M NOK' : 'N/A'} omsetning, ${d.profit ? (d.profit / 1000000).toFixed(0) + 'M NOK' : 'N/A'} resultat`
).join('\n')}
${growthAnalysis}
${aiAnalysis ? `\n🤖 AI-DREVET FINANSIELL ANALYSE:\n${aiAnalysis}` : ''}
✅ Data hentet fra database på < 1 sekund!
📅 Siste oppdatering: År ${latestYear}
💾 Totalt ${existingData.length} år lagret
💡 Tips: For å tvinge ny scraping, bruk parameter force_rescrape: true
`
}]
};
} else {
console.error(`⚠️ Database data needs update (latest: ${latestYear}, count: ${existingData.length})`);
// Add existing data to results to avoid re-scraping what we have
results.push(...existingData);
}
} else {
console.error(`📭 No existing data in database for ${params.org_nr}`);
}
} else {
console.error(`🔄 Force rescrape enabled - bypassing database cache`);
}
// Try API first for latest year (faster)
if (params.use_api_first) {
try {
const RegnskapClient = (await import('../apis/regnskap.js')).RegnskapClient;
const regnskapClient = new RegnskapClient();
const apiData = await regnskapClient.getExtractedFinancials(params.org_nr);
if (apiData) {
console.error(`✅ Got ${apiData.year} from API (fast path)`);
results.push(apiData);
if (params.auto_import) {
await db.insertFinancialSnapshot({
org_nr: apiData.org_nr,
year: apiData.year,
revenue: apiData.revenue,
profit: apiData.profit,
assets: apiData.assets,
equity: apiData.equity,
employees: null,
source: 'regnskapsregisteret_api'
});
}
}
} catch (error) {
console.error('API fetch failed, will use scraping:', error);
}
}
// Only scrape if we need more data or force_rescrape is enabled
let scrapedData: any[] = [];
const needsScraping = params.force_rescrape ||
results.length === 0 ||
(results.length > 0 && Math.max(...results.map(r => r.year)) < new Date().getFullYear() - 1);
if (needsScraping) {
console.error(`🤖 Starting intelligent scraper for ${params.org_nr}...`);
console.error('⏳ Analyzing page structure to find all available years...');
try {
// Use BrowserScraper for comprehensive PDF downloading with OpenAI Vision extraction
const openaiApiKey = process.env.OPENAI_API_KEY;
if (!openaiApiKey) {
console.error('❌ OPENAI_API_KEY not found in environment variables!');
console.error(' Please ensure OPENAI_API_KEY is set in the .env file');
throw new Error('OpenAI API key is required. Please set OPENAI_API_KEY in .env file');
}
console.error(`✅ Using OpenAI API key: ${openaiApiKey.substring(0, 20)}...`);
const scraper = new BrowserScraper(openaiApiKey, params.org_nr);
scrapedData = await Promise.race([
scraper.getAllFinancialYears(params.org_nr),
new Promise<any[]>((_, reject) =>
setTimeout(() => reject(new Error('Scraping timeout after 5 minutes')), 300000)
)
]);
console.error(`✅ Scraped and parsed ${scrapedData.length} years`);
// Debug: Log what we got
scrapedData.forEach(d => {
console.error(` Year ${d.year}: Revenue=${d.revenue ? (d.revenue/1000000).toFixed(1)+'M' : 'null'}, Source=${d.source}`);
});
} catch (scrapingError) {
console.error(`❌ Scraping failed:`, scrapingError);
// Continue with what we have from database/API
}
} else {
console.error(`✅ Sufficient data already in database - skipping browser scraping`);
}
// Variables for discovered years (defined at outer scope)
let yearsWithData: any[] = [];
let yearsDiscovered: any[] = [];
// Process scraped data if we have any
if (scrapedData.length > 0) {
// Import to database - ONLY years with actual data (not nulls)
yearsWithData = scrapedData.filter(d =>
(d.revenue !== null || d.profit !== null || d.assets !== null) &&
d.source !== 'needs_manual_import' &&
d.source !== 'discovered_needs_data'
);
yearsDiscovered = scrapedData.filter(d =>
d.source === 'needs_manual_import' || d.source === 'discovered_needs_data'
);
console.error(`📊 Data summary: ${yearsWithData.length} years with data, ${yearsDiscovered.length} discovered`);
if (params.auto_import) {
console.error(`💾 Saving ${yearsWithData.length} years to database...`);
for (const data of yearsWithData) {
// Skip if we already got this year
if (!results.find(r => r.year === data.year)) {
try {
await db.insertFinancialSnapshot({
org_nr: params.org_nr,
year: data.year,
revenue: data.revenue,
profit: data.profit,
assets: data.assets,
equity: data.equity,
employees: null,
source: data.source
});
results.push(data);
console.error(` ✅ Saved ${data.year}: Revenue=${data.revenue?(data.revenue/1000000).toFixed(1)+'M':'N/A'}, Source=${data.source}`);
} catch (error) {
console.error(` ❌ Failed to save year ${data.year}:`, error);
}
} else {
console.error(` ⏭️ Skipped ${data.year} - already in results from API`);
}
}
console.error(`✅ Database import complete!`);
}
// Add the scraped data to results (if any)
if (scrapedData.length > 0) {
results.push(...scrapedData.filter(d => !results.find(r => r.year === d.year)));
}
}
// Sort by year (newest first)
results.sort((a, b) => b.year - a.year);
// Generate AI analysis if requested
let aiAnalysis = '';
if (params.include_analysis && company && results.length > 0) {
console.error(`🤖 Generating AI-powered financial analysis...`);
aiAnalysis = await generateFinancialAnalysis(company, results);
}
// Calculate growth if we have multiple years
let growthAnalysis = '';
if (results.length >= 2) {
const latest = results[0];
const oldest = results[results.length - 1];
if (latest.revenue && oldest.revenue && oldest.revenue > 0) {
const totalGrowth = ((latest.revenue - oldest.revenue) / oldest.revenue) * 100;
const years = latest.year - oldest.year;
const cagr = (Math.pow(latest.revenue / oldest.revenue, 1 / years) - 1) * 100;
growthAnalysis = `
📈 ${years}-ÅRS VEKSTANALYSE:
- Omsetningsvekst: ${oldest.year} → ${latest.year}: ${totalGrowth > 0 ? '+' : ''}${totalGrowth.toFixed(1)}%
- CAGR: ${cagr.toFixed(1)}% per år
${totalGrowth > 30 ? '🚀 HØYVEKST!' : totalGrowth > 15 ? '📈 God vekst' : '➡️ Stabil'}
`;
}
}
// Generate CSV template for missing years
let csvTemplate = '';
if (yearsDiscovered.length > 0) {
csvTemplate = `\n📝 CSV TEMPLATE FOR MANGLENDE ÅR:\n\norg_nr,year,revenue,profit,assets,equity,source`;
// Add the year we have data for first
if (yearsWithData.length > 0) {
const latest = yearsWithData[0];
csvTemplate += `\n${params.org_nr},${latest.year},${latest.revenue || ''},${latest.profit || ''},${latest.assets || ''},${latest.equity || ''},auto`;
}
// Add discovered years without data
yearsDiscovered.forEach((y: any) => {
csvTemplate += `\n${params.org_nr},${y.year},[fyll inn],[fyll inn],[fyll inn],[fyll inn],manual`;
});
csvTemplate += `\n\n💡 Last ned PDFs fra: https://virksomhet.brreg.no/nb/oppslag/enheter/${params.org_nr}`;
csvTemplate += `\n💡 Deretter: import_financials_from_file /path/to/file.csv format csv`;
}
const report = `
🤖 FULLSTENDIG AUTOMATISK HENTING: ${company?.name}
🎉 HENTET ${results.length} ÅR MED REGNSKAPSDATA!
📊 OVERSIKT:
${results.map(r =>
`${r.year}: ${r.revenue ? (r.revenue / 1000000).toFixed(0) + 'M NOK' : 'N/A'} omsetning, ${r.profit ? (r.profit / 1000000).toFixed(0) + 'M NOK' : 'N/A'} resultat`
).join('\n')}
${growthAnalysis}
${aiAnalysis ? `
🤖 AI-DREVET FINANSIELL ANALYSE:
${aiAnalysis}
` : ''}
📥 NEDLASTEDE FILER FOR ${params.org_nr}:
📁 PDF-mappe: ${process.cwd()}/data/pdfs/${params.org_nr}/
📁 PNG-bilder: ${process.cwd()}/data/png_images/
📁 JSON-data: ${process.cwd()}/data/extracted/${params.org_nr}/
Du kan åpne mappene og se:
- Originale årsregnskap-PDFer (per selskap)
- Konverterte PNG-bilder (brukt av AI)
- Ekstraherte JSON-data med FULL regnskapsinfo
🌐 Last ned fra Brønnøysund:
🔗 https://virksomhet.brreg.no/nb/oppslag/enheter/${params.org_nr}
Slik laster du ned manuelt:
1. Åpne lenken over
2. Skroll ned til "Årsregnskap" seksjonen
3. Klikk "Vis alle" for å se alle år
4. Klikk "Innsendt årsregnskap" for året du vil laste ned
Ekstraherte år:
${results.map(r => `${r.year} - ${r.source === 'openai_vision_extraction' ? 'AI-ekstrahert fra PDF' : r.source === 'regnskapsregisteret_api' ? 'API' : r.source}`).join('\n')}
💡 Alle PDFer er automatisk lastet ned, analysert med AI og lagret!
📋 DETALJER SISTE ÅR (${results[0].year}):
${results[0].revenue ? `💰 Omsetning: ${(results[0].revenue / 1000000).toFixed(1)}M NOK` : ''}
${results[0].profit ? `📈 Resultat: ${(results[0].profit / 1000000).toFixed(1)}M NOK` : ''}
${results[0].revenue && results[0].profit ? `📊 Margin: ${((results[0].profit / results[0].revenue) * 100).toFixed(1)}%` : ''}
${results[0].assets ? `🏢 Eiendeler: ${(results[0].assets / 1000000).toFixed(1)}M NOK` : ''}
${results[0].equity ? `💎 Egenkapital: ${(results[0].equity / 1000000).toFixed(1)}M NOK` : ''}
${params.auto_import ? `
✅ ALLE ${results.length} ÅR LAGRET I DATABASE
💡 VIDERE ANALYSE:
- analyze_financials → Fullstendig risikoanalyse
- analyze_growth → Automatisk vekstberegning med ${results.length} års data
- Sammenligning med SSB bransjesnitt
` : ''}
🚀 100% AUTOMATISK!
- Headless browser: ✅
- PDF nedlasting: ✅ (${results.length} filer)
- Data-ekstraksjon: ✅
- Database lagring: ✅
${results.some(r => !r.revenue) ? `
⚠️ MERK: Noen tall kunne ikke ekstraheres automatisk fra PDF.
Dette kan skyldes:
- Ulike PDF-formater
- Håndskrevet/scannet tekst
- Spesielle regnskapsformater
Bruk 'import_financials' for å manuelt korrigere manglende tall.
` : ''}
⏱️ Totaltid: ~${results.length * 10} sekunder
💰 Kostnad: GRATIS
🎯 Resultat: ${results.length} år med automatisk hentet data!
`;
return {
content: [{
type: "text" as const,
text: report
}]
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [{
type: "text" as const,
text: `
❌ AUTOMATISK SCRAPING FEILET
Feil: ${errorMessage}
Dette kan skyldes:
- Nettverksproblemer
- Nettstedet er endret
- Ingen årsregnskap tilgjengelig
- Browser automation blokkert
💡 ALTERNATIV:
Bruk 'build_financial_history' for guidet manuell prosess.
`
}],
isError: true
};
}
}