// src/tools/run-audit.ts
// Unified audit tool - orchestrates the full audit workflow
import type { AuditStorage, StoredPageAnalysis } from '../utils/storage.js';
import type { AuditPlan, RoutePattern } from './plan-audit.js';
import { planAudit } from './plan-audit.js';
import { initAuditStorage, saveAuditReport, getAuditStats } from '../utils/storage.js';
import { capturePage, capturePages } from '../utils/page-capture.js';
import { closeBrowser } from '../utils/browser.js';
// ============================================================================
// Types
// ============================================================================
export interface RunAuditInput {
baseUrl: string;
reportsDir?: string; // Where to save reports (default: ./reports)
maxSitemaps?: number; // Max sitemaps to process (default: 15)
maxUrlsPerSitemap?: number; // Max URLs per sitemap (default: 2000)
samplesPerRouteType?: number; // Override samples per route (default: auto)
concurrency?: number; // Concurrent page captures (default: 2)
skipLighthouse?: boolean; // Skip Lighthouse audits (default: false)
onProgress?: (stage: string, progress: number, total: number, detail?: string) => void;
}
export interface AuditResult {
auditId: string;
baseUrl: string;
startedAt: string;
completedAt: string;
durationMs: number;
// Storage location
reportPath: string;
// Plan results
plan: AuditPlan;
// Capture results
captureStats: {
totalUrls: number;
captured: number;
cached: number;
failed: number;
};
// Analysis summary
summary: {
pagesAnalyzed: number;
issuesFound: number;
warningsFound: number;
criticalIssues: string[];
};
// Findings by category
findings: {
sitemaps: SitemapFindings;
seo: SeoFindings;
structuredData: StructuredDataFindings;
technical: TechnicalFindings;
socialGraph: SocialGraphFindings;
};
// Prioritized recommendations
recommendations: Recommendation[];
}
interface SitemapFindings {
sitemapsFound: number;
totalUrls: number;
urlsWithLastmod: number;
lastmodCoverage: number;
issues: string[];
}
interface SeoFindings {
pagesWithTitle: number;
pagesWithDescription: number;
pagesWithH1: number;
pagesWithCanonical: number;
duplicateTitles: number;
missingTitles: string[];
missingDescriptions: string[];
}
interface StructuredDataFindings {
pagesWithJobPosting: number;
jobPostingErrors: number;
jobPostingWarnings: number;
commonErrors: Array<{ message: string; count: number }>;
commonWarnings: Array<{ message: string; count: number }>;
}
interface TechnicalFindings {
pagesRequiringJs: number;
averageLoadTimeMs: number;
slowPages: string[];
mixedContentPages: string[];
frameworks: Record<string, number>;
}
interface SocialGraphFindings {
pagesWithOgTags: number;
pagesWithTwitterCards: number;
missingOgImage: string[];
}
interface Recommendation {
priority: 'critical' | 'high' | 'medium' | 'low';
category: string;
title: string;
description: string;
affectedPages?: number;
effort: 'low' | 'medium' | 'high';
}
// ============================================================================
// Main Audit Function
// ============================================================================
export async function runAudit(input: RunAuditInput): Promise<AuditResult> {
const {
baseUrl,
reportsDir = './reports',
maxSitemaps = 15,
maxUrlsPerSitemap = 2000,
samplesPerRouteType,
concurrency = 2,
onProgress,
} = input;
const startedAt = new Date().toISOString();
const startTime = Date.now();
// Stage 1: Initialize storage
onProgress?.('init', 0, 4, 'Initializing audit storage...');
const storage = await initAuditStorage(baseUrl, reportsDir);
console.error(`\n=== Starting Audit: ${baseUrl} ===`);
console.error(`Audit ID: ${storage.auditId}`);
// Stage 2: Plan the audit (sitemap discovery + route analysis)
onProgress?.('plan', 1, 4, 'Analyzing sitemaps and planning audit...');
console.error('\n[Stage 1/4] Planning audit...');
const plan = await planAudit({
baseUrl,
maxSitemapsToProcess: maxSitemaps,
maxUrlsPerSitemap,
});
// Stage 3: Select and capture sample pages
onProgress?.('capture', 2, 4, 'Capturing sample pages...');
console.error('\n[Stage 2/4] Capturing sample pages...');
const urlsToCapture = selectUrlsToCapture(plan, samplesPerRouteType);
console.error(` Capturing ${urlsToCapture.length} pages...`);
const captureResults = await capturePages(storage, urlsToCapture, {
concurrency,
timeout: 30000,
});
const captureStats = {
totalUrls: urlsToCapture.length,
captured: captureResults.filter(r => r.analysis && !r.cached).length,
cached: captureResults.filter(r => r.cached).length,
failed: captureResults.filter(r => !r.analysis).length,
};
// Stage 4: Analyze results
onProgress?.('analyze', 3, 4, 'Analyzing captured pages...');
console.error('\n[Stage 3/4] Analyzing results...');
const analyses = captureResults
.filter(r => r.analysis)
.map(r => r.analysis);
const findings = analyzeFindings(plan, analyses);
const recommendations = generateRecommendations(plan, findings, analyses);
// Stage 5: Generate report
onProgress?.('report', 4, 4, 'Generating report...');
console.error('\n[Stage 4/4] Generating report...');
// Clean up browser
await closeBrowser();
const completedAt = new Date().toISOString();
const durationMs = Date.now() - startTime;
// Build final result
const result: AuditResult = {
auditId: storage.auditId,
baseUrl,
startedAt,
completedAt,
durationMs,
reportPath: storage.paths.root,
plan,
captureStats,
summary: {
pagesAnalyzed: analyses.length,
issuesFound: countIssues(findings),
warningsFound: countWarnings(findings),
criticalIssues: findings.sitemaps.issues.filter(i => i.toLowerCase().includes('critical')),
},
findings,
recommendations,
};
// Save report
await saveAuditReport(storage, result, 'both');
console.error(`\n=== Audit Complete ===`);
console.error(`Duration: ${Math.round(durationMs / 1000)}s`);
console.error(`Report: ${storage.paths.root}/report/`);
return result;
}
// ============================================================================
// Helper Functions
// ============================================================================
/**
* Select URLs to capture based on plan and sampling strategy
*/
function selectUrlsToCapture(
plan: AuditPlan,
samplesOverride?: number
): string[] {
const urls: string[] = [];
// Add homepage
urls.push(plan.baseUrl);
// Add samples from each route pattern
for (const pattern of plan.routePatterns) {
const key = `${pattern.type}:${pattern.pattern}`;
const targetSamples = samplesOverride ??
plan.samplingStrategy.samplesPerRouteType[key] ??
5;
// Take samples from example URLs
const samplesToTake = Math.min(targetSamples, pattern.exampleUrls.length);
urls.push(...pattern.exampleUrls.slice(0, samplesToTake));
}
// Deduplicate
return [...new Set(urls)];
}
/**
* Analyze all findings from captured pages
*/
function analyzeFindings(
plan: AuditPlan,
analyses: StoredPageAnalysis[]
): AuditResult['findings'] {
// Sitemap findings
const sitemapFindings: SitemapFindings = {
sitemapsFound: plan.sitemaps.length,
totalUrls: plan.sitemaps.reduce((sum, s) => sum + s.stats.urlCount, 0),
urlsWithLastmod: plan.sitemaps.reduce((sum, s) => sum + s.stats.hasLastmod, 0),
lastmodCoverage: 0,
issues: [...plan.issues],
};
if (sitemapFindings.totalUrls > 0) {
sitemapFindings.lastmodCoverage = Math.round(
(sitemapFindings.urlsWithLastmod / sitemapFindings.totalUrls) * 100
);
}
// SEO findings
const seoFindings: SeoFindings = {
pagesWithTitle: analyses.filter(a => a.seo.title).length,
pagesWithDescription: analyses.filter(a => a.seo.metaDescription).length,
pagesWithH1: analyses.filter(a => a.seo.headings.h1.length > 0).length,
pagesWithCanonical: analyses.filter(a => a.seo.canonical).length,
duplicateTitles: countDuplicates(analyses.map(a => a.seo.title).filter(Boolean) as string[]),
missingTitles: analyses.filter(a => !a.seo.title).map(a => a.snapshot.url),
missingDescriptions: analyses.filter(a => !a.seo.metaDescription).map(a => a.snapshot.url),
};
// Structured data findings
const allErrors: string[] = [];
const allWarnings: string[] = [];
for (const a of analyses) {
for (const err of a.structuredData.jobPostingErrors) {
allErrors.push(err.message);
}
for (const warn of a.structuredData.jobPostingWarnings) {
allWarnings.push(warn.message);
}
}
const structuredDataFindings: StructuredDataFindings = {
pagesWithJobPosting: analyses.filter(a => a.structuredData.hasJobPosting).length,
jobPostingErrors: allErrors.length,
jobPostingWarnings: allWarnings.length,
commonErrors: countOccurrences(allErrors).slice(0, 10),
commonWarnings: countOccurrences(allWarnings).slice(0, 10),
};
// Technical findings
const loadTimes = analyses.map(a => a.technical.loadTimeMs);
const avgLoadTime = loadTimes.length > 0
? Math.round(loadTimes.reduce((a, b) => a + b, 0) / loadTimes.length)
: 0;
const frameworks: Record<string, number> = {};
for (const a of analyses) {
const fw = a.technical.framework || 'unknown';
frameworks[fw] = (frameworks[fw] || 0) + 1;
}
const technicalFindings: TechnicalFindings = {
pagesRequiringJs: analyses.filter(a => a.technical.jsRenderingRequired).length,
averageLoadTimeMs: avgLoadTime,
slowPages: analyses.filter(a => a.technical.loadTimeMs > 5000).map(a => a.snapshot.url),
mixedContentPages: analyses.filter(a => a.technical.hasMixedContent).map(a => a.snapshot.url),
frameworks,
};
// Social graph findings
const socialGraphFindings: SocialGraphFindings = {
pagesWithOgTags: analyses.filter(a => a.openGraph.title).length,
pagesWithTwitterCards: analyses.filter(a => a.twitter.card).length,
missingOgImage: analyses.filter(a => !a.openGraph.image).map(a => a.snapshot.url),
};
return {
sitemaps: sitemapFindings,
seo: seoFindings,
structuredData: structuredDataFindings,
technical: technicalFindings,
socialGraph: socialGraphFindings,
};
}
/**
* Generate prioritized recommendations
*/
function generateRecommendations(
plan: AuditPlan,
findings: AuditResult['findings'],
analyses: StoredPageAnalysis[]
): Recommendation[] {
const recommendations: Recommendation[] = [];
const seenTitles = new Set<string>(); // Track titles to avoid duplicates
// Helper to add recommendation if not duplicate
const addRecommendation = (rec: Recommendation) => {
// Normalize title for comparison (lowercase, remove extra spaces)
const normalizedTitle = rec.title.toLowerCase().trim();
if (!seenTitles.has(normalizedTitle)) {
seenTitles.add(normalizedTitle);
recommendations.push(rec);
}
};
// Sitemap recommendations - only add if very low coverage (plan-audit handles general case)
if (findings.sitemaps.lastmodCoverage === 0) {
addRecommendation({
priority: 'high',
category: 'Sitemaps',
title: 'Add lastmod dates to sitemap URLs',
description: `0% of URLs have lastmod dates. This is critical for search engines to prioritize crawling of updated content.`,
effort: 'low',
});
}
// SEO recommendations
if (findings.seo.missingTitles.length > 0) {
addRecommendation({
priority: 'critical',
category: 'SEO',
title: 'Add missing title tags',
description: `${findings.seo.missingTitles.length} pages are missing title tags, which is critical for SEO.`,
affectedPages: findings.seo.missingTitles.length,
effort: 'low',
});
}
if (findings.seo.missingDescriptions.length > 0) {
addRecommendation({
priority: 'high',
category: 'SEO',
title: 'Add missing meta descriptions',
description: `${findings.seo.missingDescriptions.length} pages are missing meta descriptions.`,
affectedPages: findings.seo.missingDescriptions.length,
effort: 'low',
});
}
// Structured data recommendations
if (findings.structuredData.commonWarnings.length > 0) {
const topWarning = findings.structuredData.commonWarnings[0];
addRecommendation({
priority: 'medium',
category: 'Structured Data',
title: `Fix JobPosting schema: ${topWarning.message}`,
description: `This warning appears on ${topWarning.count} job pages. Fixing it improves Google for Jobs visibility.`,
affectedPages: topWarning.count,
effort: 'medium',
});
}
// Technical recommendations
if (findings.technical.pagesRequiringJs > analyses.length * 0.5) {
addRecommendation({
priority: 'high',
category: 'Technical',
title: 'Implement server-side rendering',
description: `${findings.technical.pagesRequiringJs} pages require JavaScript to render content. This can impact SEO as search engines may not fully index JS-rendered content.`,
affectedPages: findings.technical.pagesRequiringJs,
effort: 'high',
});
}
if (findings.technical.slowPages.length > 0) {
addRecommendation({
priority: 'medium',
category: 'Performance',
title: 'Improve page load speed',
description: `${findings.technical.slowPages.length} pages take over 5 seconds to load. Target under 3 seconds for better UX and SEO.`,
affectedPages: findings.technical.slowPages.length,
effort: 'medium',
});
}
// Add plan recommendations (deduplicated)
for (const rec of plan.recommendations) {
addRecommendation({
priority: 'medium',
category: 'General',
title: rec,
description: rec,
effort: 'medium',
});
}
// Sort by priority
const priorityOrder = { critical: 0, high: 1, medium: 2, low: 3 };
recommendations.sort((a, b) => priorityOrder[a.priority] - priorityOrder[b.priority]);
return recommendations;
}
function countDuplicates(items: string[]): number {
const counts = new Map<string, number>();
for (const item of items) {
counts.set(item, (counts.get(item) || 0) + 1);
}
return Array.from(counts.values()).filter(c => c > 1).length;
}
function countOccurrences(items: string[]): Array<{ message: string; count: number }> {
const counts = new Map<string, number>();
for (const item of items) {
counts.set(item, (counts.get(item) || 0) + 1);
}
return Array.from(counts.entries())
.map(([message, count]) => ({ message, count }))
.sort((a, b) => b.count - a.count);
}
function countIssues(findings: AuditResult['findings']): number {
return (
findings.sitemaps.issues.length +
findings.seo.missingTitles.length +
findings.structuredData.jobPostingErrors +
findings.technical.mixedContentPages.length
);
}
function countWarnings(findings: AuditResult['findings']): number {
return (
findings.seo.missingDescriptions.length +
findings.structuredData.jobPostingWarnings +
findings.technical.slowPages.length +
findings.socialGraph.missingOgImage.length
);
}
export default runAudit;