// src/utils/browser.ts
// Playwright browser utilities
import { chromium, Browser, BrowserContext, Page } from 'playwright';
let browserInstance: Browser | null = null;
export interface BrowserOptions {
headless?: boolean;
timeout?: number;
}
export interface PageOptions {
userAgent?: string;
viewport?: { width: number; height: number };
device?: 'desktop' | 'mobile';
timeout?: number;
}
const MOBILE_VIEWPORT = { width: 412, height: 823 };
const DESKTOP_VIEWPORT = { width: 1920, height: 1080 };
const MOBILE_USER_AGENT = 'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36';
const DESKTOP_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
const BOT_USER_AGENT = 'Mozilla/5.0 (compatible; SEOAuditBot/1.0; +https://github.com/seo-audit-mcp)';
/**
* Get or create a browser instance (singleton pattern for efficiency)
*/
export async function getBrowser(options: BrowserOptions = {}): Promise<Browser> {
if (!browserInstance || !browserInstance.isConnected()) {
browserInstance = await chromium.launch({
headless: options.headless ?? true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
],
});
}
return browserInstance;
}
/**
* Close the browser instance
*/
export async function closeBrowser(): Promise<void> {
if (browserInstance) {
await browserInstance.close();
browserInstance = null;
}
}
/**
* Create a new page with appropriate settings
*/
export async function createPage(options: PageOptions = {}): Promise<{ context: BrowserContext; page: Page }> {
const browser = await getBrowser();
const device = options.device ?? 'desktop';
const viewport = device === 'mobile' ? MOBILE_VIEWPORT : DESKTOP_VIEWPORT;
const userAgent = options.userAgent ?? (device === 'mobile' ? MOBILE_USER_AGENT : DESKTOP_USER_AGENT);
const context = await browser.newContext({
userAgent,
viewport: options.viewport ?? viewport,
deviceScaleFactor: device === 'mobile' ? 2 : 1,
isMobile: device === 'mobile',
hasTouch: device === 'mobile',
});
const page = await context.newPage();
page.setDefaultTimeout(options.timeout ?? 30000);
return { context, page };
}
/**
* Navigate to a URL and wait for it to be ready
*/
export async function navigateToUrl(
page: Page,
url: string,
options: {
waitUntil?: 'load' | 'domcontentloaded' | 'networkidle';
waitForSelector?: string;
timeout?: number;
} = {}
): Promise<{
response: any;
loadTimeMs: number;
initialHtml: string;
redirectChain: Array<{ url: string; statusCode: number }>;
}> {
const { waitUntil = 'networkidle', waitForSelector, timeout = 30000 } = options;
let initialHtml = '';
const redirectChain: Array<{ url: string; statusCode: number }> = [];
// Capture initial HTML before JS execution
page.on('response', async (response) => {
if (response.url() === url && response.headers()['content-type']?.includes('text/html')) {
try {
initialHtml = await response.text();
} catch {}
}
});
const startTime = Date.now();
const response = await page.goto(url, {
waitUntil,
timeout,
});
// Build redirect chain
if (response) {
let req = response.request().redirectedFrom();
while (req) {
const resp = await req.response();
if (resp) {
redirectChain.unshift({
url: req.url(),
statusCode: resp.status(),
});
}
req = req.redirectedFrom();
}
}
// Wait for specific selector if provided
if (waitForSelector) {
await page.waitForSelector(waitForSelector, { timeout: 10000 }).catch(() => {});
}
// Additional wait for any late-loading content
await page.waitForLoadState('networkidle').catch(() => {});
const loadTimeMs = Date.now() - startTime;
return {
response,
loadTimeMs,
initialHtml,
redirectChain,
};
}
/**
* Extract meta tags from page
*/
export async function extractMetaTags(page: Page): Promise<{
title: string | null;
description: string | null;
canonical: string | null;
robots: string | null;
viewport: string | null;
charset: string | null;
ogTitle: string | null;
ogDescription: string | null;
ogImage: string | null;
}> {
return page.evaluate(() => {
const getMeta = (name: string): string | null => {
const el = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`);
return el?.getAttribute('content') ?? null;
};
return {
title: document.title || null,
description: getMeta('description'),
canonical: document.querySelector('link[rel="canonical"]')?.getAttribute('href') ?? null,
robots: getMeta('robots'),
viewport: getMeta('viewport'),
charset: document.characterSet || null,
ogTitle: getMeta('og:title'),
ogDescription: getMeta('og:description'),
ogImage: getMeta('og:image'),
};
});
}
/**
* Extract headings structure from page
*/
export async function extractHeadings(page: Page): Promise<{
h1: string[];
h2: string[];
h3: string[];
h4: string[];
h5: string[];
h6: string[];
issues: string[];
}> {
const headings = await page.evaluate(() => {
const getHeadings = (tag: string): string[] => {
return Array.from(document.querySelectorAll(tag))
.map(el => el.textContent?.trim() || '')
.filter(Boolean);
};
return {
h1: getHeadings('h1'),
h2: getHeadings('h2'),
h3: getHeadings('h3'),
h4: getHeadings('h4'),
h5: getHeadings('h5'),
h6: getHeadings('h6'),
};
});
// Analyze for issues
const issues: string[] = [];
if (headings.h1.length === 0) {
issues.push('No H1 tag found');
} else if (headings.h1.length > 1) {
issues.push(`Multiple H1 tags found (${headings.h1.length})`);
}
if (headings.h1.length > 0 && headings.h1[0].length > 70) {
issues.push('H1 is longer than recommended 70 characters');
}
return { ...headings, issues };
}
/**
* Extract all JSON-LD structured data from page
*/
export async function extractJsonLd(page: Page): Promise<any[]> {
return page.evaluate(() => {
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
const results: any[] = [];
scripts.forEach(script => {
try {
const data = JSON.parse(script.textContent || '');
// Handle @graph arrays
if (data['@graph']) {
results.push(...data['@graph']);
} else {
results.push(data);
}
} catch {
// Invalid JSON, skip
}
});
return results;
});
}
/**
* Extract all links from page
*/
export async function extractLinks(page: Page, baseUrl: string): Promise<{
internal: Array<{ href: string; text: string; rel?: string }>;
external: Array<{ href: string; text: string; rel?: string }>;
nofollow: Array<{ href: string; text: string }>;
}> {
const baseHost = new URL(baseUrl).host;
return page.evaluate((host) => {
const links = Array.from(document.querySelectorAll('a[href]'));
const internal: Array<{ href: string; text: string; rel?: string }> = [];
const external: Array<{ href: string; text: string; rel?: string }> = [];
const nofollow: Array<{ href: string; text: string }> = [];
links.forEach(link => {
const href = link.getAttribute('href');
if (!href || href.startsWith('#') || href.startsWith('javascript:') || href.startsWith('mailto:') || href.startsWith('tel:')) {
return;
}
const text = link.textContent?.trim() || '';
const rel = link.getAttribute('rel') || undefined;
try {
const url = new URL(href, window.location.href);
const linkData = { href: url.href, text, rel };
if (rel?.includes('nofollow')) {
nofollow.push({ href: url.href, text });
}
if (url.host === host) {
internal.push(linkData);
} else {
external.push(linkData);
}
} catch {
// Invalid URL, skip
}
});
return { internal, external, nofollow };
}, baseHost);
}
/**
* Extract image information from page
*/
export async function extractImages(page: Page): Promise<{
total: number;
withAlt: number;
withoutAlt: number;
lazyLoaded: number;
images: Array<{ src: string; alt: string | null; loading?: string; width?: number; height?: number }>;
}> {
const images = await page.evaluate(() => {
return Array.from(document.querySelectorAll('img')).map(img => ({
src: img.src,
alt: img.alt || null,
loading: img.loading || undefined,
width: img.naturalWidth || undefined,
height: img.naturalHeight || undefined,
}));
});
return {
total: images.length,
withAlt: images.filter(img => img.alt).length,
withoutAlt: images.filter(img => !img.alt).length,
lazyLoaded: images.filter(img => img.loading === 'lazy').length,
images,
};
}
/**
* Check for mixed content on HTTPS pages
*/
export async function checkMixedContent(page: Page): Promise<boolean> {
const pageUrl = page.url();
if (!pageUrl.startsWith('https://')) {
return false;
}
return page.evaluate(() => {
const elements = document.querySelectorAll('[src], [href]');
return Array.from(elements).some(el => {
const src = el.getAttribute('src') || el.getAttribute('href') || '';
return src.startsWith('http://') && !src.startsWith('http://localhost');
});
});
}
/**
* Detect JavaScript framework used
*/
export async function detectFramework(page: Page): Promise<'react' | 'vue' | 'angular' | 'next' | 'nuxt' | 'unknown'> {
return page.evaluate(() => {
// Check for Next.js
if (document.querySelector('#__next') || (window as any).__NEXT_DATA__) {
return 'next';
}
// Check for Nuxt
if (document.querySelector('#__nuxt') || (window as any).__NUXT__) {
return 'nuxt';
}
// Check for React
if ((window as any).__REACT_DEVTOOLS_GLOBAL_HOOK__ ||
document.querySelector('[data-reactroot]') ||
(document.querySelector('#root') as any)?.__reactContainer) {
return 'react';
}
// Check for Vue
if ((window as any).__VUE__ || document.querySelector('[data-v-]')) {
return 'vue';
}
// Check for Angular
if ((window as any).ng || document.querySelector('[ng-version]')) {
return 'angular';
}
return 'unknown';
});
}
/**
* Take screenshots of the page
*/
export async function takeScreenshots(
page: Page,
outputDir: string,
urlSlug: string
): Promise<{ desktop?: string; mobile?: string; fullPage?: string }> {
const results: { desktop?: string; mobile?: string; fullPage?: string } = {};
// Current viewport screenshot
const currentPath = `${outputDir}/${urlSlug}-current.png`;
await page.screenshot({ path: currentPath });
results.desktop = currentPath;
// Full page screenshot
const fullPagePath = `${outputDir}/${urlSlug}-fullpage.png`;
await page.screenshot({ path: fullPagePath, fullPage: true });
results.fullPage = fullPagePath;
return results;
}
/**
* Get the rendered HTML content
*/
export async function getRenderedHtml(page: Page): Promise<string> {
return page.content();
}
/**
* Check if critical content exists in initial HTML (before JS)
*/
export function checkCriticalContent(
initialHtml: string,
patterns: string[]
): boolean {
const lowerHtml = initialHtml.toLowerCase();
return patterns.some(pattern => lowerHtml.includes(pattern.toLowerCase()));
}