// src/utils/storage.ts
// Storage utilities for audit reports, page snapshots, and HAR files
import * as fs from 'fs/promises';
import * as path from 'path';
// ============================================================================
// Types
// ============================================================================
export interface AuditStorage {
baseDir: string;
siteName: string;
auditId: string;
paths: {
root: string;
pages: string;
har: string;
snapshots: string;
results: string;
report: string;
};
}
export interface PageSnapshot {
url: string;
fetchedAt: string;
httpStatus: number;
headers: Record<string, string>;
initialHtml: string; // Before JS execution
renderedHtml: string; // After JS execution
harFile?: string; // Path to HAR file if captured
screenshotFile?: string; // Path to screenshot if captured
}
export interface StoredPageAnalysis {
snapshot: PageSnapshot;
seo: {
title: string | null;
metaDescription: string | null;
canonical: string | null;
robots: string | null;
headings: any;
viewport: string | null;
language: string | null;
};
structuredData: {
jsonLd: any[];
hasJobPosting: boolean;
jobPostings: any[];
jobPostingErrors: any[];
jobPostingWarnings: any[];
};
openGraph: {
title: string | null;
description: string | null;
image: string | null;
type: string | null;
url: string | null;
};
twitter: {
card: string | null;
title: string | null;
description: string | null;
image: string | null;
};
technical: {
isHttps: boolean;
hasMixedContent: boolean;
jsRenderingRequired: boolean;
framework: string | null;
loadTimeMs: number;
};
links: {
internal: any[];
external: any[];
totalCount: number;
};
images: {
total: number;
withAlt: number;
withoutAlt: number;
images: any[];
};
}
// ============================================================================
// Storage Management
// ============================================================================
const DEFAULT_REPORTS_DIR = './reports';
/**
* Initialize storage for an audit
*/
export async function initAuditStorage(
siteUrl: string,
reportsDir: string = DEFAULT_REPORTS_DIR
): Promise<AuditStorage> {
// Extract site name from URL
const url = new URL(siteUrl);
const siteName = url.hostname.replace(/^www\./, '').replace(/\./g, '-');
// Generate unique audit ID with timestamp
const auditId = `${siteName}-${new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19)}`;
const root = path.join(reportsDir, auditId);
const storage: AuditStorage = {
baseDir: reportsDir,
siteName,
auditId,
paths: {
root,
pages: path.join(root, 'pages'),
har: path.join(root, 'har'),
snapshots: path.join(root, 'snapshots'),
results: path.join(root, 'results'),
report: path.join(root, 'report'),
},
};
// Create directories
await fs.mkdir(storage.paths.pages, { recursive: true });
await fs.mkdir(storage.paths.har, { recursive: true });
await fs.mkdir(storage.paths.snapshots, { recursive: true });
await fs.mkdir(storage.paths.results, { recursive: true });
await fs.mkdir(storage.paths.report, { recursive: true });
// Write audit metadata
await fs.writeFile(
path.join(root, 'audit-meta.json'),
JSON.stringify({
siteUrl,
siteName,
auditId,
startedAt: new Date().toISOString(),
version: '0.2.0',
}, null, 2)
);
console.error(`Audit storage initialized: ${root}`);
return storage;
}
/**
* Generate a safe filename from a URL
*/
export function urlToFilename(url: string): string {
const parsed = new URL(url);
let filename = parsed.pathname + parsed.search;
// Replace unsafe characters
filename = filename
.replace(/^\//, '') // Remove leading slash
.replace(/\//g, '__') // Replace slashes
.replace(/[?&=]/g, '_') // Replace query string chars
.replace(/[^a-zA-Z0-9_-]/g, '') // Remove other unsafe chars
.slice(0, 200); // Limit length
return filename || 'index';
}
/**
* Save a page snapshot
*/
export async function savePageSnapshot(
storage: AuditStorage,
snapshot: PageSnapshot
): Promise<string> {
const filename = urlToFilename(snapshot.url);
const snapshotPath = path.join(storage.paths.snapshots, `${filename}.json`);
await fs.writeFile(snapshotPath, JSON.stringify(snapshot, null, 2));
// Also save raw HTML for inspection
const htmlPath = path.join(storage.paths.pages, `${filename}.html`);
await fs.writeFile(htmlPath, snapshot.renderedHtml);
return snapshotPath;
}
/**
* Load a page snapshot
*/
export async function loadPageSnapshot(
storage: AuditStorage,
url: string
): Promise<PageSnapshot | null> {
const filename = urlToFilename(url);
const snapshotPath = path.join(storage.paths.snapshots, `${filename}.json`);
try {
const content = await fs.readFile(snapshotPath, 'utf-8');
return JSON.parse(content);
} catch {
return null;
}
}
/**
* Check if a page has already been fetched
*/
export async function hasPageSnapshot(
storage: AuditStorage,
url: string
): Promise<boolean> {
const filename = urlToFilename(url);
const snapshotPath = path.join(storage.paths.snapshots, `${filename}.json`);
try {
await fs.access(snapshotPath);
return true;
} catch {
return false;
}
}
/**
* Save page analysis results
*/
export async function savePageAnalysis(
storage: AuditStorage,
url: string,
analysis: StoredPageAnalysis
): Promise<string> {
const filename = urlToFilename(url);
const resultPath = path.join(storage.paths.results, `${filename}.json`);
await fs.writeFile(resultPath, JSON.stringify(analysis, null, 2));
return resultPath;
}
/**
* Load page analysis results
*/
export async function loadPageAnalysis(
storage: AuditStorage,
url: string
): Promise<StoredPageAnalysis | null> {
const filename = urlToFilename(url);
const resultPath = path.join(storage.paths.results, `${filename}.json`);
try {
const content = await fs.readFile(resultPath, 'utf-8');
return JSON.parse(content);
} catch {
return null;
}
}
/**
* Save HAR file from page load
*/
export async function saveHarFile(
storage: AuditStorage,
url: string,
harContent: any
): Promise<string> {
const filename = urlToFilename(url);
const harPath = path.join(storage.paths.har, `${filename}.har`);
await fs.writeFile(harPath, JSON.stringify(harContent, null, 2));
return harPath;
}
/**
* Save the final audit report
*/
export async function saveAuditReport(
storage: AuditStorage,
report: any,
format: 'json' | 'markdown' | 'both' = 'both'
): Promise<{ json?: string; markdown?: string }> {
const result: { json?: string; markdown?: string } = {};
if (format === 'json' || format === 'both') {
const jsonPath = path.join(storage.paths.report, 'report.json');
await fs.writeFile(jsonPath, JSON.stringify(report, null, 2));
result.json = jsonPath;
}
if (format === 'markdown' || format === 'both') {
const mdPath = path.join(storage.paths.report, 'report.md');
const markdown = generateMarkdownReport(report, storage);
await fs.writeFile(mdPath, markdown);
result.markdown = mdPath;
}
// Update audit metadata with completion
const metaPath = path.join(storage.paths.root, 'audit-meta.json');
try {
const meta = JSON.parse(await fs.readFile(metaPath, 'utf-8'));
meta.completedAt = new Date().toISOString();
meta.reportPaths = result;
await fs.writeFile(metaPath, JSON.stringify(meta, null, 2));
} catch {
// Ignore if meta file doesn't exist
}
return result;
}
/**
* List all snapshots in storage
*/
export async function listSnapshots(storage: AuditStorage): Promise<string[]> {
try {
const files = await fs.readdir(storage.paths.snapshots);
return files.filter(f => f.endsWith('.json'));
} catch {
return [];
}
}
/**
* Get audit summary stats
*/
export async function getAuditStats(storage: AuditStorage): Promise<{
pagesSnapshotted: number;
pagesAnalyzed: number;
harFilesCapture: number;
}> {
const [snapshots, results, hars] = await Promise.all([
fs.readdir(storage.paths.snapshots).catch(() => []),
fs.readdir(storage.paths.results).catch(() => []),
fs.readdir(storage.paths.har).catch(() => []),
]);
return {
pagesSnapshotted: snapshots.filter(f => f.endsWith('.json')).length,
pagesAnalyzed: results.filter(f => f.endsWith('.json')).length,
harFilesCapture: hars.filter(f => f.endsWith('.har')).length,
};
}
// ============================================================================
// Report Generation
// ============================================================================
/**
* Generate a markdown report from audit data
*/
function generateMarkdownReport(report: any, storage: AuditStorage): string {
const lines: string[] = [];
lines.push(`# SEO Audit Report: ${storage.siteName}`);
lines.push('');
lines.push(`**Audit ID:** ${storage.auditId}`);
lines.push(`**Generated:** ${new Date().toISOString()}`);
lines.push('');
// Executive Summary
lines.push('## Executive Summary');
lines.push('');
if (report.summary) {
lines.push(`- **Pages Analyzed:** ${report.summary.pagesAnalyzed || 0}`);
lines.push(`- **Issues Found:** ${report.summary.issuesFound || 0}`);
lines.push(`- **Warnings:** ${report.summary.warningsFound || 0}`);
}
lines.push('');
// Critical Issues
if (report.criticalIssues && report.criticalIssues.length > 0) {
lines.push('## Critical Issues');
lines.push('');
for (const issue of report.criticalIssues) {
lines.push(`### ${issue.title}`);
lines.push('');
lines.push(issue.description);
lines.push('');
if (issue.affectedUrls) {
lines.push('**Affected URLs:**');
for (const url of issue.affectedUrls.slice(0, 5)) {
lines.push(`- ${url}`);
}
if (issue.affectedUrls.length > 5) {
lines.push(`- ... and ${issue.affectedUrls.length - 5} more`);
}
lines.push('');
}
}
}
// Sitemap Analysis
if (report.sitemaps) {
lines.push('## Sitemap Analysis');
lines.push('');
lines.push(`- **Sitemaps Found:** ${report.sitemaps.count || 0}`);
lines.push(`- **Total URLs:** ${report.sitemaps.totalUrls || 0}`);
lines.push(`- **URLs with lastmod:** ${report.sitemaps.urlsWithLastmod || 0}`);
lines.push('');
}
// Recommendations
if (report.recommendations && report.recommendations.length > 0) {
lines.push('## Recommendations');
lines.push('');
for (let i = 0; i < report.recommendations.length; i++) {
const rec = report.recommendations[i];
// Handle both string and object recommendations
if (typeof rec === 'string') {
lines.push(`${i + 1}. ${rec}`);
} else if (rec && typeof rec === 'object') {
const priority = rec.priority ? `[${rec.priority.toUpperCase()}]` : '';
const title = rec.title || 'Untitled recommendation';
lines.push(`${i + 1}. ${priority} **${title}**`);
if (rec.description && rec.description !== title) {
lines.push(` - ${rec.description}`);
}
if (rec.affectedPages) {
lines.push(` - Affected pages: ${rec.affectedPages}`);
}
if (rec.effort) {
lines.push(` - Effort: ${rec.effort}`);
}
}
}
lines.push('');
}
// Footer
lines.push('---');
lines.push('');
lines.push('*Generated by SEO Audit MCP Server v0.2.0*');
return lines.join('\n');
}
export default {
initAuditStorage,
savePageSnapshot,
loadPageSnapshot,
hasPageSnapshot,
savePageAnalysis,
loadPageAnalysis,
saveHarFile,
saveAuditReport,
listSnapshots,
getAuditStats,
urlToFilename,
};