/**
* Site Auditor
*
* Comprehensive SEO audit system for WordPress sites that analyzes technical SEO,
* content quality, site architecture, performance metrics, and accessibility.
* Provides actionable recommendations with priority scoring to guide optimization efforts.
*
* Features:
* - Technical SEO analysis (meta tags, structured data, crawlability)
* - Content quality assessment (readability, keyword optimization, freshness)
* - Site architecture evaluation (internal linking, navigation, URL structure)
* - Performance monitoring (page speed, Core Web Vitals, mobile optimization)
* - Accessibility compliance checking (WCAG guidelines)
* - Competitive analysis and benchmarking
* - Automated issue detection with severity scoring
* - Detailed reporting with actionable recommendations
*
* @since 2.7.0
*/
import { WordPressClient } from "@/client/api.js";
import { LoggerFactory } from "@/utils/logger.js";
import type { SiteAuditResult, SEOToolParams, AuditIssue, AuditSection } from "@/types/seo.js";
import type { WordPressPost, WordPressPage } from "@/types/wordpress.js";
/**
* Configuration for site audit behavior
*/
interface SiteAuditConfig {
/** Include technical SEO analysis */
includeTechnical: boolean;
/** Include content quality analysis */
includeContent: boolean;
/** Include site architecture analysis */
includeArchitecture: boolean;
/** Include performance analysis */
includePerformance: boolean;
/** Include accessibility analysis */
includeAccessibility: boolean;
/** Maximum pages to analyze for detailed content audit */
maxPagesForContentAudit: number;
/** Minimum severity level to include in results */
minSeverityLevel: "low" | "medium" | "high" | "critical";
/** Include competitor analysis */
includeCompetitorAnalysis: boolean;
/** Competitor URLs for analysis */
competitorUrls: string[];
/** Generate detailed recommendations */
includeRecommendations: boolean;
}
/**
* Site audit issue severity levels
*/
const IssueSeverity = {
LOW: "low" as const,
MEDIUM: "medium" as const,
HIGH: "high" as const,
CRITICAL: "critical" as const,
} as const;
/**
* Site audit categories
*/
const AuditCategory = {
TECHNICAL: "technical" as const,
CONTENT: "content" as const,
ARCHITECTURE: "architecture" as const,
PERFORMANCE: "performance" as const,
ACCESSIBILITY: "accessibility" as const,
} as const;
/**
* Comprehensive WordPress site SEO auditor
*/
export class SiteAuditor {
private logger = LoggerFactory.tool("site_auditor");
private config: SiteAuditConfig;
constructor(
private client: WordPressClient,
config: Partial<SiteAuditConfig> = {},
) {
this.config = {
includeTechnical: true,
includeContent: true,
includeArchitecture: true,
includePerformance: true,
includeAccessibility: false, // Requires external tools
maxPagesForContentAudit: 50,
minSeverityLevel: "medium",
includeCompetitorAnalysis: false,
competitorUrls: [],
includeRecommendations: true,
...config,
};
}
/**
* Get current audit configuration
*/
getConfig(): SiteAuditConfig {
return { ...this.config };
}
/**
* Update audit configuration
*/
updateConfig(updates: Partial<SiteAuditConfig>): void {
this.config = { ...this.config, ...updates };
this.logger.debug("Site audit configuration updated", updates);
}
/**
* Perform comprehensive site audit
*/
async performSiteAudit(params: SEOToolParams): Promise<SiteAuditResult> {
this.logger.info("Starting comprehensive site audit", {
site: params.site,
auditScope: {
technical: this.config.includeTechnical,
content: this.config.includeContent,
architecture: this.config.includeArchitecture,
performance: this.config.includePerformance,
accessibility: this.config.includeAccessibility,
},
});
const startTime = Date.now();
const issues: AuditIssue[] = [];
const sections: AuditSection[] = [];
const recommendations: string[] = [];
try {
// Get site data
const siteData = await this.collectSiteData(params);
// Technical SEO audit
if (this.config.includeTechnical) {
const technicalResults = await this.auditTechnicalSEO(siteData, params);
sections.push(technicalResults.section);
issues.push(...technicalResults.issues);
if (this.config.includeRecommendations) {
recommendations.push(...technicalResults.recommendations);
}
}
// Content quality audit
if (this.config.includeContent) {
const contentResults = await this.auditContentQuality(siteData, params);
sections.push(contentResults.section);
issues.push(...contentResults.issues);
if (this.config.includeRecommendations) {
recommendations.push(...contentResults.recommendations);
}
}
// Site architecture audit
if (this.config.includeArchitecture) {
const architectureResults = await this.auditSiteArchitecture(siteData, params);
sections.push(architectureResults.section);
issues.push(...architectureResults.issues);
if (this.config.includeRecommendations) {
recommendations.push(...architectureResults.recommendations);
}
}
// Performance audit
if (this.config.includePerformance) {
const performanceResults = await this.auditPerformance(siteData, params);
sections.push(performanceResults.section);
issues.push(...performanceResults.issues);
if (this.config.includeRecommendations) {
recommendations.push(...performanceResults.recommendations);
}
}
// Accessibility audit
if (this.config.includeAccessibility) {
const accessibilityResults = await this.auditAccessibility(siteData, params);
sections.push(accessibilityResults.section);
issues.push(...accessibilityResults.issues);
if (this.config.includeRecommendations) {
recommendations.push(...accessibilityResults.recommendations);
}
}
// Filter issues by severity
const filteredIssues = this.filterIssuesBySeverity(issues);
// Calculate overall score
const overallScore = this.calculateOverallScore(sections, filteredIssues);
const auditResult: SiteAuditResult = {
timestamp: new Date().toISOString(),
siteUrl: siteData.siteUrl,
overallScore,
sections,
issues: filteredIssues,
recommendations: this.config.includeRecommendations ? recommendations : [],
summary: this.generateAuditSummary(sections, filteredIssues),
processingTime: Date.now() - startTime,
};
this.logger.info("Site audit completed", {
overallScore,
totalIssues: filteredIssues.length,
criticalIssues: filteredIssues.filter((i) => i.severity === IssueSeverity.CRITICAL).length,
processingTime: auditResult.processingTime,
});
return auditResult;
} catch (_error) {
this.logger.error("Site audit failed", {
_error: _error instanceof Error ? _error.message : String(_error),
});
throw _error;
}
}
/**
* Collect comprehensive site data for analysis
*/
private async collectSiteData(params: SEOToolParams) {
this.logger.debug("Collecting site data for audit");
// Get all posts and pages
const posts = await this.client.getPosts({ per_page: this.config.maxPagesForContentAudit, status: ["publish"] });
const pages = await this.client.getPages({ per_page: this.config.maxPagesForContentAudit, status: ["publish"] });
// Get site info (mock for now)
const siteUrl = "https://example.com"; // Would come from WordPress REST API
return {
siteUrl,
posts: posts as WordPressPost[],
pages: pages as WordPressPage[],
totalContent: (posts?.length || 0) + (pages?.length || 0),
};
}
/**
* Audit technical SEO aspects
*/
private async auditTechnicalSEO(
siteData: { siteUrl: string; posts: WordPressPost[]; pages: WordPressPage[]; totalContent: number },
params: SEOToolParams,
) {
this.logger.debug("Auditing technical SEO");
const issues: AuditIssue[] = [];
const recommendations: string[] = [];
let score = 100;
// Check for missing meta descriptions
const itemsWithoutMeta = [...siteData.posts, ...siteData.pages].filter(
(item) => !item.excerpt?.rendered || item.excerpt.rendered.length < 120,
);
if (itemsWithoutMeta.length > 0) {
const severity = itemsWithoutMeta.length > 10 ? IssueSeverity.HIGH : IssueSeverity.MEDIUM;
score -= itemsWithoutMeta.length > 10 ? 15 : 8;
issues.push({
id: "missing-meta-descriptions",
title: "Missing or Short Meta Descriptions",
description: `${itemsWithoutMeta.length} pages lack proper meta descriptions (minimum 120 characters)`,
severity,
category: AuditCategory.TECHNICAL,
affectedItems: itemsWithoutMeta.map((item) => item.title?.rendered || "Untitled"),
impact: "Meta descriptions affect click-through rates from search results",
});
recommendations.push(
`Add compelling meta descriptions (150-160 characters) to ${itemsWithoutMeta.length} pages to improve search result click-through rates`,
);
}
// Check for duplicate titles
const titleMap = new Map<string, number>();
[...siteData.posts, ...siteData.pages].forEach((item) => {
const title = item.title?.rendered?.toLowerCase() || "";
titleMap.set(title, (titleMap.get(title) || 0) + 1);
});
const duplicateTitles = Array.from(titleMap.entries()).filter(([_, count]) => count > 1);
if (duplicateTitles.length > 0) {
score -= duplicateTitles.length * 5;
issues.push({
id: "duplicate-titles",
title: "Duplicate Page Titles",
description: `${duplicateTitles.length} titles are used on multiple pages`,
severity: IssueSeverity.HIGH,
category: AuditCategory.TECHNICAL,
affectedItems: duplicateTitles.map(([title]) => title),
impact: "Duplicate titles confuse search engines and reduce page rankings",
});
recommendations.push(
`Create unique, descriptive titles for all pages to improve search engine understanding and rankings`,
);
}
// Check for very short content
const shortContent = [...siteData.posts, ...siteData.pages].filter((item) => {
const content = this.extractTextContent(item.content?.rendered || "");
return this.countWords(content) < 300;
});
if (shortContent.length > 0) {
score -= Math.min(shortContent.length * 3, 20);
issues.push({
id: "thin-content",
title: "Thin Content Pages",
description: `${shortContent.length} pages have less than 300 words of content`,
severity: IssueSeverity.MEDIUM,
category: AuditCategory.TECHNICAL,
affectedItems: shortContent.map((item) => item.title?.rendered || "Untitled"),
impact: "Thin content may not rank well in search results",
});
recommendations.push(
`Expand content on ${shortContent.length} pages to at least 300 words for better search rankings`,
);
}
return {
section: {
name: "Technical SEO",
score: Math.max(0, score),
issues: issues.length,
passed: issues.length === 0,
},
issues,
recommendations,
};
}
/**
* Audit content quality
*/
private async auditContentQuality(
siteData: { siteUrl: string; posts: WordPressPost[]; pages: WordPressPage[]; totalContent: number },
params: SEOToolParams,
) {
this.logger.debug("Auditing content quality");
const issues: AuditIssue[] = [];
const recommendations: string[] = [];
let score = 100;
// Check content freshness
const oldContent = [...siteData.posts, ...siteData.pages].filter((item) => {
const lastModified = new Date(item.modified || item.date || 0);
const daysSinceModified = (Date.now() - lastModified.getTime()) / (1000 * 60 * 60 * 24);
return daysSinceModified > 365; // More than 1 year old
});
if (oldContent.length > 0) {
const ratio = oldContent.length / siteData.totalContent;
score -= Math.min(ratio * 30, 25);
issues.push({
id: "outdated-content",
title: "Outdated Content",
description: `${oldContent.length} pages (${(ratio * 100).toFixed(1)}%) haven't been updated in over a year`,
severity: ratio > 0.5 ? IssueSeverity.HIGH : IssueSeverity.MEDIUM,
category: AuditCategory.CONTENT,
affectedItems: oldContent.slice(0, 10).map((item) => item.title?.rendered || "Untitled"),
impact: "Outdated content may lose search rankings and user engagement",
});
recommendations.push(
`Review and update ${oldContent.length} outdated pages to maintain content freshness and search rankings`,
);
}
// Check for missing featured images
const noFeaturedImage = [...siteData.posts, ...siteData.pages].filter(
(item) => !item.featured_media || item.featured_media === 0,
);
if (noFeaturedImage.length > 0) {
score -= Math.min(noFeaturedImage.length * 2, 15);
issues.push({
id: "missing-featured-images",
title: "Missing Featured Images",
description: `${noFeaturedImage.length} posts/pages lack featured images`,
severity: IssueSeverity.LOW,
category: AuditCategory.CONTENT,
affectedItems: noFeaturedImage.slice(0, 10).map((item) => item.title?.rendered || "Untitled"),
impact: "Featured images improve social sharing and visual appeal",
});
recommendations.push(
`Add featured images to ${noFeaturedImage.length} posts/pages to improve social sharing and engagement`,
);
}
// Analyze readability (simplified scoring)
const hardToReadContent = [...siteData.posts, ...siteData.pages].filter((item) => {
const content = this.extractTextContent(item.content?.rendered || "");
const readabilityScore = this.calculateReadabilityScore(content);
return readabilityScore < 60; // Below average readability
});
if (hardToReadContent.length > 0) {
score -= Math.min(hardToReadContent.length * 3, 20);
issues.push({
id: "poor-readability",
title: "Poor Content Readability",
description: `${hardToReadContent.length} pages have below-average readability scores`,
severity: IssueSeverity.MEDIUM,
category: AuditCategory.CONTENT,
affectedItems: hardToReadContent.slice(0, 10).map((item) => item.title?.rendered || "Untitled"),
impact: "Poor readability reduces user engagement and time on page",
});
recommendations.push(
`Improve readability on ${hardToReadContent.length} pages using shorter sentences, simpler words, and better formatting`,
);
}
return {
section: {
name: "Content Quality",
score: Math.max(0, score),
issues: issues.length,
passed: issues.length === 0,
},
issues,
recommendations,
};
}
/**
* Audit site architecture
*/
private async auditSiteArchitecture(
siteData: { siteUrl: string; posts: WordPressPost[]; pages: WordPressPage[]; totalContent: number },
params: SEOToolParams,
) {
this.logger.debug("Auditing site architecture");
const issues: AuditIssue[] = [];
const recommendations: string[] = [];
let score = 100;
// Check for orphaned content (simplified - no actual link analysis)
const totalPages = siteData.totalContent;
const orphanedEstimate = Math.max(0, Math.floor(totalPages * 0.1)); // Estimate 10% orphaned
if (orphanedEstimate > 0) {
score -= Math.min(orphanedEstimate * 2, 15);
issues.push({
id: "orphaned-content",
title: "Potentially Orphaned Content",
description: `Estimated ${orphanedEstimate} pages may lack sufficient internal links`,
severity: IssueSeverity.MEDIUM,
category: AuditCategory.ARCHITECTURE,
affectedItems: ["Analysis requires full crawl to identify specific pages"],
impact: "Orphaned content may not be discoverable by users or search engines",
});
recommendations.push(
`Perform internal linking audit to identify and connect orphaned content to main site architecture`,
);
}
// Check URL structure (simplified analysis)
const longUrls = [...siteData.posts, ...siteData.pages].filter(
(item) => (item.link || item.slug || "").length > 100,
);
if (longUrls.length > 0) {
score -= Math.min(longUrls.length, 10);
issues.push({
id: "long-urls",
title: "Overly Long URLs",
description: `${longUrls.length} pages have URLs longer than 100 characters`,
severity: IssueSeverity.LOW,
category: AuditCategory.ARCHITECTURE,
affectedItems: longUrls.slice(0, 5).map((item) => item.title?.rendered || "Untitled"),
impact: "Long URLs are harder to share and may be truncated in search results",
});
recommendations.push(`Optimize ${longUrls.length} URLs to be shorter and more descriptive`);
}
// Check for proper heading structure (basic analysis)
const poorHeadingStructure = [...siteData.posts, ...siteData.pages].filter((item) => {
const content = item.content?.rendered || "";
const hasH1 = /<h1/i.test(content);
const hasMultipleH1 = (content.match(/<h1/gi) || []).length > 1;
return !hasH1 || hasMultipleH1;
});
if (poorHeadingStructure.length > 0) {
score -= Math.min(poorHeadingStructure.length * 2, 15);
issues.push({
id: "heading-structure",
title: "Poor Heading Structure",
description: `${poorHeadingStructure.length} pages have missing or multiple H1 tags`,
severity: IssueSeverity.MEDIUM,
category: AuditCategory.ARCHITECTURE,
affectedItems: poorHeadingStructure.slice(0, 10).map((item) => item.title?.rendered || "Untitled"),
impact: "Poor heading structure affects content hierarchy and SEO",
});
recommendations.push(
`Fix heading structure on ${poorHeadingStructure.length} pages: use exactly one H1 and logical heading hierarchy`,
);
}
return {
section: {
name: "Site Architecture",
score: Math.max(0, score),
issues: issues.length,
passed: issues.length === 0,
},
issues,
recommendations,
};
}
/**
* Audit performance aspects
*/
private async auditPerformance(
siteData: { siteUrl: string; posts: WordPressPost[]; pages: WordPressPage[]; totalContent: number },
params: SEOToolParams,
) {
this.logger.debug("Auditing performance");
const issues: AuditIssue[] = [];
const recommendations: string[] = [];
let score = 100;
// Check for large images (estimated based on content analysis)
const imagesInContent = [...siteData.posts, ...siteData.pages].reduce((count, item) => {
const content = item.content?.rendered || "";
const imageMatches = content.match(/<img[^>]*>/gi) || [];
return count + imageMatches.length;
}, 0);
if (imagesInContent > 20) {
score -= 10;
issues.push({
id: "image-optimization",
title: "Image Optimization Needed",
description: `${imagesInContent} images found - optimization may be needed`,
severity: IssueSeverity.MEDIUM,
category: AuditCategory.PERFORMANCE,
affectedItems: ["Requires detailed analysis of individual images"],
impact: "Unoptimized images slow page loading and affect user experience",
});
recommendations.push(
`Audit and optimize ${imagesInContent} images for web performance: compress, resize, and use modern formats`,
);
}
// Check for potential caching issues (heuristic)
const dynamicContentPages = siteData.posts.length;
if (dynamicContentPages > 50) {
score -= 15;
issues.push({
id: "caching-strategy",
title: "Caching Strategy Review Needed",
description: `${dynamicContentPages} dynamic pages may benefit from enhanced caching`,
severity: IssueSeverity.HIGH,
category: AuditCategory.PERFORMANCE,
affectedItems: ["All dynamic content pages"],
impact: "Inadequate caching increases server load and page response times",
});
recommendations.push(
`Implement comprehensive caching strategy for ${dynamicContentPages} pages to improve performance`,
);
}
// Check for external dependencies (basic analysis)
const externalDependencies = [...siteData.posts, ...siteData.pages].reduce((count, item) => {
const content = item.content?.rendered || "";
const externalLinks = content.match(/https?:\/\/(?!example\.com)[^"'\s>]*/gi) || [];
return count + externalLinks.length;
}, 0);
if (externalDependencies > 30) {
score -= 5;
issues.push({
id: "external-dependencies",
title: "Many External Dependencies",
description: `${externalDependencies} external resources may affect loading speed`,
severity: IssueSeverity.LOW,
category: AuditCategory.PERFORMANCE,
affectedItems: ["Various pages with external resources"],
impact: "External dependencies can slow page loading if third-party services are slow",
});
recommendations.push(
`Review ${externalDependencies} external dependencies and consider local alternatives where possible`,
);
}
return {
section: {
name: "Performance",
score: Math.max(0, score),
issues: issues.length,
passed: issues.length === 0,
},
issues,
recommendations,
};
}
/**
* Audit accessibility (basic implementation)
*/
private async auditAccessibility(
siteData: { siteUrl: string; posts: WordPressPost[]; pages: WordPressPage[]; totalContent: number },
params: SEOToolParams,
) {
this.logger.debug("Auditing accessibility");
const issues: AuditIssue[] = [];
const recommendations: string[] = [];
let score = 100;
// Check for missing alt text on images
const imagesWithoutAlt = [...siteData.posts, ...siteData.pages].filter((item) => {
const content = item.content?.rendered || "";
const imgTags = content.match(/<img[^>]*>/gi) || [];
return imgTags.some((img: string) => !img.includes("alt=") || img.includes('alt=""'));
});
if (imagesWithoutAlt.length > 0) {
score -= Math.min(imagesWithoutAlt.length * 5, 25);
issues.push({
id: "missing-alt-text",
title: "Missing Image Alt Text",
description: `${imagesWithoutAlt.length} pages contain images without proper alt text`,
severity: IssueSeverity.HIGH,
category: AuditCategory.ACCESSIBILITY,
affectedItems: imagesWithoutAlt.slice(0, 10).map((item) => item.title?.rendered || "Untitled"),
impact: "Missing alt text prevents screen readers from describing images to visually impaired users",
});
recommendations.push(
`Add descriptive alt text to images on ${imagesWithoutAlt.length} pages for accessibility compliance`,
);
}
return {
section: {
name: "Accessibility",
score: Math.max(0, score),
issues: issues.length,
passed: issues.length === 0,
},
issues,
recommendations,
};
}
/**
* Filter issues by configured severity level
*/
private filterIssuesBySeverity(issues: AuditIssue[]): AuditIssue[] {
const severityOrder = {
low: 0,
medium: 1,
high: 2,
critical: 3,
};
const minLevel = severityOrder[this.config.minSeverityLevel];
return issues.filter((issue) => severityOrder[issue.severity] >= minLevel);
}
/**
* Calculate overall site score
*/
private calculateOverallScore(sections: AuditSection[], issues: AuditIssue[]): number {
if (sections.length === 0) return 0;
// Calculate weighted average of section scores
const totalScore = sections.reduce((sum, section) => sum + section.score, 0);
const averageScore = totalScore / sections.length;
// Apply penalty for critical issues
const criticalIssues = issues.filter((issue) => issue.severity === IssueSeverity.CRITICAL).length;
const penalty = criticalIssues * 5;
return Math.max(0, Math.round(averageScore - penalty));
}
/**
* Generate audit summary
*/
private generateAuditSummary(sections: AuditSection[], issues: AuditIssue[]): string {
const totalIssues = issues.length;
const criticalIssues = issues.filter((i) => i.severity === IssueSeverity.CRITICAL).length;
const highIssues = issues.filter((i) => i.severity === IssueSeverity.HIGH).length;
const passedSections = sections.filter((s) => s.passed).length;
const totalSections = sections.length;
return `Site audit completed: ${passedSections}/${totalSections} sections passed. Found ${totalIssues} issues (${criticalIssues} critical, ${highIssues} high priority). Focus on addressing critical and high-priority issues first for maximum SEO impact.`;
}
/**
* Extract text content from HTML
*/
private extractTextContent(html: string): string {
// Apply repeatedly to handle nested/malformed tags
let result = html;
let previous = "";
while (result !== previous) {
previous = result;
result = result
.replace(/<script[^>]*>/gi, "")
.replace(/<\/script[^>]*>/gi, "")
.replace(/<style[^>]*>/gi, "")
.replace(/<\/style[^>]*>/gi, "");
}
return result
.replace(/<[^>]*>/g, " ")
.replace(/&[^;]+;/g, " ")
.replace(/\s+/g, " ")
.trim();
}
/**
* Count words in text content
*/
private countWords(text: string): number {
return text.split(/\s+/).filter((word) => word.length > 0).length;
}
/**
* Calculate readability score (simplified Flesch Reading Ease)
*/
private calculateReadabilityScore(text: string): number {
const words = this.countWords(text);
if (words === 0) return 0;
const sentences = text.split(/[.!?]+/).filter((s) => s.trim().length > 0).length;
const syllables = this.estimateSyllables(text);
if (sentences === 0) return 0;
const avgWordsPerSentence = words / sentences;
const avgSyllablesPerWord = syllables / words;
// Simplified Flesch Reading Ease formula
return Math.max(0, Math.min(100, 206.835 - 1.015 * avgWordsPerSentence - 84.6 * avgSyllablesPerWord));
}
/**
* Estimate syllable count for readability calculation
*/
private estimateSyllables(text: string): number {
const words = text.toLowerCase().match(/\b\w+\b/g) || [];
return words.reduce((total, word) => {
// Simple syllable estimation
let syllables = word.match(/[aeiouy]+/g)?.length || 1;
if (word.endsWith("e")) syllables--;
return total + Math.max(1, syllables);
}, 0);
}
}