Skip to main content
Glama
josuekongolo

CompanyIQ MCP Server

by josuekongolo
auto_scrape_financials.ts17 kB
import { z } from 'zod'; import { CompanyDatabase } from '../database/db.js'; import { BrregClient } from '../apis/brreg.js'; import { BrowserScraper } from '../scraper/browser_scraper.js'; import OpenAI from 'openai'; const AutoScrapeFinancialsSchema = z.object({ org_nr: z.string().describe("Organisasjonsnummer"), auto_import: z.boolean().default(true).describe("Automatisk lagre til database"), use_api_first: z.boolean().default(true).describe("Prøv API først før scraping"), include_analysis: z.boolean().default(true).describe("Inkluder AI-basert finansiell analyse"), force_rescrape: z.boolean().default(false).describe("Tving ny scraping selv om data finnes i database") }); /** * 🤖 FULL AUTOMATION: Scrape ALL available years from Brønnøysund website * Uses headless browser to download and parse PDFs automatically * NO MANUAL WORK REQUIRED! */ /** * Generate AI-powered financial analysis using OpenAI */ async function generateFinancialAnalysis(company: any, financialData: any[]): Promise<string> { if (!process.env.OPENAI_API_KEY) { return ''; } try { const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); const financialSummary = financialData.map(d => ({ year: d.year, revenue: d.revenue, profit: d.profit, assets: d.assets, equity: d.equity })); const response = await openai.chat.completions.create({ model: "gpt-4o-mini", messages: [ { role: "system", content: "Du er en ekspert finansanalytiker som analyserer norske selskaper. Gi en kort, konsis analyse på norsk." }, { role: "user", content: `Analyser dette norske selskapet: Selskap: ${company.name} Bransje: ${company.nace_description || company.nace_code || 'Ukjent'} Finansielle data (siste ${financialData.length} år): ${JSON.stringify(financialSummary, null, 2)} Gi en kort analyse (3-5 punkter) av: 1. Omsetningsutvikling og veksttrender 2. Lønnsomhet og marginer 3. Finansiell styrke (egenkapital/gjeld) 4. Nøkkeltall som skiller seg ut 5. Kort risiko-vurdering Bruk norsk format (NOK, norske termer). Vær konsis og fokuser på det viktigste.` } ], max_tokens: 800, temperature: 0.3 }); return response.choices[0]?.message?.content || ''; } catch (error) { console.error('AI analysis failed:', error); return ''; } } export async function autoScrapeFinancials(args: unknown, db: CompanyDatabase, brreg: BrregClient) { const params = AutoScrapeFinancialsSchema.parse(args); try { console.error(`🌐 ALWAYS checking website first with Chromium browser...`); console.error(`📊 This ensures we get ALL available years for ANY company`); // Verify company exists let company = await db.getCompany(params.org_nr); if (!company) { const brregCompany = await brreg.getCompany(params.org_nr); if (!brregCompany) { return { content: [{ type: "text" as const, text: `Fant ikke selskap med org.nr ${params.org_nr}` }] }; } await db.insertOrUpdateCompany({ org_nr: brregCompany.organisasjonsnummer, name: brregCompany.navn, organization_form: brregCompany.organisasjonsform?.beskrivelse, nace_code: brregCompany.naeringskode1?.kode, employees_count: brregCompany.antallAnsatte, last_updated: new Date().toISOString() } as any); company = await db.getCompany(params.org_nr); } const results: any[] = []; // FIRST: Check database for existing data (unless force_rescrape is true) if (!params.force_rescrape) { console.error(`🔍 Checking database for existing financial data...`); const existingData = await db.getFinancialHistory(params.org_nr, 10); // Get last 10 years if (existingData && existingData.length > 0) { console.error(`✅ Found ${existingData.length} years in database!`); // Check if we have recent data (within last year) const currentYear = new Date().getFullYear(); const latestYear = Math.max(...existingData.map((d: any) => d.year)); if (latestYear >= currentYear - 1 && existingData.length >= 3) { console.error(`📊 Database has recent data (up to ${latestYear}) - no scraping needed!`); // Add AI analysis if requested let aiAnalysis = ''; if (params.include_analysis && existingData.length > 0) { console.error(`🤖 Generating AI-powered financial analysis...`); aiAnalysis = await generateFinancialAnalysis(company, existingData); } // Calculate growth if we have multiple years let growthAnalysis = ''; if (existingData.length >= 2) { const latest: any = existingData[0]; const oldest: any = existingData[existingData.length - 1]; if (latest.revenue && oldest.revenue && oldest.revenue > 0) { const totalGrowth = ((latest.revenue - oldest.revenue) / oldest.revenue) * 100; const years = latest.year - oldest.year; const cagr = (Math.pow(latest.revenue / oldest.revenue, 1 / years) - 1) * 100; growthAnalysis = ` 📈 ${years}-ÅRS VEKSTANALYSE: - Omsetningsvekst: ${totalGrowth > 0 ? '+' : ''}${totalGrowth.toFixed(1)}% - CAGR: ${cagr.toFixed(1)}% per år ${totalGrowth > 30 ? '🚀 HØYVEKST!' : totalGrowth > 15 ? '📈 God vekst' : '➡️ Stabil'}`; } } // Return cached data immediately return { content: [{ type: "text" as const, text: ` 🎯 DATA HENTET FRA DATABASE (INSTANT!) 📊 ${company?.name} - FINANSIELLE DATA: ${existingData.map((d: any) => `${d.year}: ${d.revenue ? (d.revenue / 1000000).toFixed(0) + 'M NOK' : 'N/A'} omsetning, ${d.profit ? (d.profit / 1000000).toFixed(0) + 'M NOK' : 'N/A'} resultat` ).join('\n')} ${growthAnalysis} ${aiAnalysis ? `\n🤖 AI-DREVET FINANSIELL ANALYSE:\n${aiAnalysis}` : ''} ✅ Data hentet fra database på < 1 sekund! 📅 Siste oppdatering: År ${latestYear} 💾 Totalt ${existingData.length} år lagret 💡 Tips: For å tvinge ny scraping, bruk parameter force_rescrape: true ` }] }; } else { console.error(`⚠️ Database data needs update (latest: ${latestYear}, count: ${existingData.length})`); // Add existing data to results to avoid re-scraping what we have results.push(...existingData); } } else { console.error(`📭 No existing data in database for ${params.org_nr}`); } } else { console.error(`🔄 Force rescrape enabled - bypassing database cache`); } // Try API first for latest year (faster) if (params.use_api_first) { try { const RegnskapClient = (await import('../apis/regnskap.js')).RegnskapClient; const regnskapClient = new RegnskapClient(); const apiData = await regnskapClient.getExtractedFinancials(params.org_nr); if (apiData) { console.error(`✅ Got ${apiData.year} from API (fast path)`); results.push(apiData); if (params.auto_import) { await db.insertFinancialSnapshot({ org_nr: apiData.org_nr, year: apiData.year, revenue: apiData.revenue, profit: apiData.profit, assets: apiData.assets, equity: apiData.equity, employees: null, source: 'regnskapsregisteret_api' }); } } } catch (error) { console.error('API fetch failed, will use scraping:', error); } } // Only scrape if we need more data or force_rescrape is enabled let scrapedData: any[] = []; const needsScraping = params.force_rescrape || results.length === 0 || (results.length > 0 && Math.max(...results.map(r => r.year)) < new Date().getFullYear() - 1); if (needsScraping) { console.error(`🤖 Starting intelligent scraper for ${params.org_nr}...`); console.error('⏳ Analyzing page structure to find all available years...'); try { // Use BrowserScraper for comprehensive PDF downloading with OpenAI Vision extraction const openaiApiKey = process.env.OPENAI_API_KEY; if (!openaiApiKey) { console.error('❌ OPENAI_API_KEY not found in environment variables!'); console.error(' Please ensure OPENAI_API_KEY is set in the .env file'); throw new Error('OpenAI API key is required. Please set OPENAI_API_KEY in .env file'); } console.error(`✅ Using OpenAI API key: ${openaiApiKey.substring(0, 20)}...`); const scraper = new BrowserScraper(openaiApiKey, params.org_nr); scrapedData = await Promise.race([ scraper.getAllFinancialYears(params.org_nr), new Promise<any[]>((_, reject) => setTimeout(() => reject(new Error('Scraping timeout after 5 minutes')), 300000) ) ]); console.error(`✅ Scraped and parsed ${scrapedData.length} years`); // Debug: Log what we got scrapedData.forEach(d => { console.error(` Year ${d.year}: Revenue=${d.revenue ? (d.revenue/1000000).toFixed(1)+'M' : 'null'}, Source=${d.source}`); }); } catch (scrapingError) { console.error(`❌ Scraping failed:`, scrapingError); // Continue with what we have from database/API } } else { console.error(`✅ Sufficient data already in database - skipping browser scraping`); } // Variables for discovered years (defined at outer scope) let yearsWithData: any[] = []; let yearsDiscovered: any[] = []; // Process scraped data if we have any if (scrapedData.length > 0) { // Import to database - ONLY years with actual data (not nulls) yearsWithData = scrapedData.filter(d => (d.revenue !== null || d.profit !== null || d.assets !== null) && d.source !== 'needs_manual_import' && d.source !== 'discovered_needs_data' ); yearsDiscovered = scrapedData.filter(d => d.source === 'needs_manual_import' || d.source === 'discovered_needs_data' ); console.error(`📊 Data summary: ${yearsWithData.length} years with data, ${yearsDiscovered.length} discovered`); if (params.auto_import) { console.error(`💾 Saving ${yearsWithData.length} years to database...`); for (const data of yearsWithData) { // Skip if we already got this year if (!results.find(r => r.year === data.year)) { try { await db.insertFinancialSnapshot({ org_nr: params.org_nr, year: data.year, revenue: data.revenue, profit: data.profit, assets: data.assets, equity: data.equity, employees: null, source: data.source }); results.push(data); console.error(` ✅ Saved ${data.year}: Revenue=${data.revenue?(data.revenue/1000000).toFixed(1)+'M':'N/A'}, Source=${data.source}`); } catch (error) { console.error(` ❌ Failed to save year ${data.year}:`, error); } } else { console.error(` ⏭️ Skipped ${data.year} - already in results from API`); } } console.error(`✅ Database import complete!`); } // Add the scraped data to results (if any) if (scrapedData.length > 0) { results.push(...scrapedData.filter(d => !results.find(r => r.year === d.year))); } } // Sort by year (newest first) results.sort((a, b) => b.year - a.year); // Generate AI analysis if requested let aiAnalysis = ''; if (params.include_analysis && company && results.length > 0) { console.error(`🤖 Generating AI-powered financial analysis...`); aiAnalysis = await generateFinancialAnalysis(company, results); } // Calculate growth if we have multiple years let growthAnalysis = ''; if (results.length >= 2) { const latest = results[0]; const oldest = results[results.length - 1]; if (latest.revenue && oldest.revenue && oldest.revenue > 0) { const totalGrowth = ((latest.revenue - oldest.revenue) / oldest.revenue) * 100; const years = latest.year - oldest.year; const cagr = (Math.pow(latest.revenue / oldest.revenue, 1 / years) - 1) * 100; growthAnalysis = ` 📈 ${years}-ÅRS VEKSTANALYSE: - Omsetningsvekst: ${oldest.year} → ${latest.year}: ${totalGrowth > 0 ? '+' : ''}${totalGrowth.toFixed(1)}% - CAGR: ${cagr.toFixed(1)}% per år ${totalGrowth > 30 ? '🚀 HØYVEKST!' : totalGrowth > 15 ? '📈 God vekst' : '➡️ Stabil'} `; } } // Generate CSV template for missing years let csvTemplate = ''; if (yearsDiscovered.length > 0) { csvTemplate = `\n📝 CSV TEMPLATE FOR MANGLENDE ÅR:\n\norg_nr,year,revenue,profit,assets,equity,source`; // Add the year we have data for first if (yearsWithData.length > 0) { const latest = yearsWithData[0]; csvTemplate += `\n${params.org_nr},${latest.year},${latest.revenue || ''},${latest.profit || ''},${latest.assets || ''},${latest.equity || ''},auto`; } // Add discovered years without data yearsDiscovered.forEach((y: any) => { csvTemplate += `\n${params.org_nr},${y.year},[fyll inn],[fyll inn],[fyll inn],[fyll inn],manual`; }); csvTemplate += `\n\n💡 Last ned PDFs fra: https://virksomhet.brreg.no/nb/oppslag/enheter/${params.org_nr}`; csvTemplate += `\n💡 Deretter: import_financials_from_file /path/to/file.csv format csv`; } const report = ` 🤖 FULLSTENDIG AUTOMATISK HENTING: ${company?.name} 🎉 HENTET ${results.length} ÅR MED REGNSKAPSDATA! 📊 OVERSIKT: ${results.map(r => `${r.year}: ${r.revenue ? (r.revenue / 1000000).toFixed(0) + 'M NOK' : 'N/A'} omsetning, ${r.profit ? (r.profit / 1000000).toFixed(0) + 'M NOK' : 'N/A'} resultat` ).join('\n')} ${growthAnalysis} ${aiAnalysis ? ` 🤖 AI-DREVET FINANSIELL ANALYSE: ${aiAnalysis} ` : ''} 📥 NEDLASTEDE FILER FOR ${params.org_nr}: 📁 PDF-mappe: ${process.cwd()}/data/pdfs/${params.org_nr}/ 📁 PNG-bilder: ${process.cwd()}/data/png_images/ 📁 JSON-data: ${process.cwd()}/data/extracted/${params.org_nr}/ Du kan åpne mappene og se: - Originale årsregnskap-PDFer (per selskap) - Konverterte PNG-bilder (brukt av AI) - Ekstraherte JSON-data med FULL regnskapsinfo 🌐 Last ned fra Brønnøysund: 🔗 https://virksomhet.brreg.no/nb/oppslag/enheter/${params.org_nr} Slik laster du ned manuelt: 1. Åpne lenken over 2. Skroll ned til "Årsregnskap" seksjonen 3. Klikk "Vis alle" for å se alle år 4. Klikk "Innsendt årsregnskap" for året du vil laste ned Ekstraherte år: ${results.map(r => `${r.year} - ${r.source === 'openai_vision_extraction' ? 'AI-ekstrahert fra PDF' : r.source === 'regnskapsregisteret_api' ? 'API' : r.source}`).join('\n')} 💡 Alle PDFer er automatisk lastet ned, analysert med AI og lagret! 📋 DETALJER SISTE ÅR (${results[0].year}): ${results[0].revenue ? `💰 Omsetning: ${(results[0].revenue / 1000000).toFixed(1)}M NOK` : ''} ${results[0].profit ? `📈 Resultat: ${(results[0].profit / 1000000).toFixed(1)}M NOK` : ''} ${results[0].revenue && results[0].profit ? `📊 Margin: ${((results[0].profit / results[0].revenue) * 100).toFixed(1)}%` : ''} ${results[0].assets ? `🏢 Eiendeler: ${(results[0].assets / 1000000).toFixed(1)}M NOK` : ''} ${results[0].equity ? `💎 Egenkapital: ${(results[0].equity / 1000000).toFixed(1)}M NOK` : ''} ${params.auto_import ? ` ✅ ALLE ${results.length} ÅR LAGRET I DATABASE 💡 VIDERE ANALYSE: - analyze_financials → Fullstendig risikoanalyse - analyze_growth → Automatisk vekstberegning med ${results.length} års data - Sammenligning med SSB bransjesnitt ` : ''} 🚀 100% AUTOMATISK! - Headless browser: ✅ - PDF nedlasting: ✅ (${results.length} filer) - Data-ekstraksjon: ✅ - Database lagring: ✅ ${results.some(r => !r.revenue) ? ` ⚠️ MERK: Noen tall kunne ikke ekstraheres automatisk fra PDF. Dette kan skyldes: - Ulike PDF-formater - Håndskrevet/scannet tekst - Spesielle regnskapsformater Bruk 'import_financials' for å manuelt korrigere manglende tall. ` : ''} ⏱️ Totaltid: ~${results.length * 10} sekunder 💰 Kostnad: GRATIS 🎯 Resultat: ${results.length} år med automatisk hentet data! `; return { content: [{ type: "text" as const, text: report }] }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [{ type: "text" as const, text: ` ❌ AUTOMATISK SCRAPING FEILET Feil: ${errorMessage} Dette kan skyldes: - Nettverksproblemer - Nettstedet er endret - Ingen årsregnskap tilgjengelig - Browser automation blokkert 💡 ALTERNATIV: Bruk 'build_financial_history' for guidet manuell prosess. ` }], isError: true }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/josuekongolo/companyiq-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server