Skip to main content
Glama
RichardDillman

SEO Audit MCP Server

crawl-page.ts8.07 kB
// src/tools/crawl-page.ts // Single page SEO analysis tool import type { PageAnalysis, StructuredDataResult, JobPostingSchema, SchemaError, AnalyzePageInput } from '../types/index.js'; import { createPage, navigateToUrl, extractMetaTags, extractHeadings, extractJsonLd, extractLinks, extractImages, checkMixedContent, detectFramework, getRenderedHtml, checkCriticalContent, } from '../utils/browser.js'; /** * Analyze a single page for SEO factors */ export async function analyzePage(input: AnalyzePageInput): Promise<PageAnalysis> { const { url, waitForSelector, timeout = 30000, checkLinks = false, device = 'desktop', } = input; const { context, page } = await createPage({ device, timeout }); try { // Navigate and capture timing const { response, loadTimeMs, initialHtml, redirectChain } = await navigateToUrl( page, url, { waitForSelector, timeout } ); const httpStatus = response?.status() || 0; // Get rendered HTML const renderedHtml = await getRenderedHtml(page); // Extract all SEO elements in parallel const [ metaTags, headings, jsonLd, links, images, hasMixedContent, framework, ] = await Promise.all([ extractMetaTags(page), extractHeadings(page), extractJsonLd(page), extractLinks(page, url), extractImages(page), checkMixedContent(page), detectFramework(page), ]); // Analyze structured data const structuredData = analyzeStructuredData(jsonLd); // Analyze rendering const jsRenderingRequired = renderedHtml.length > initialHtml.length * 1.5; const criticalPatterns = ['job', 'position', 'career', 'apply', 'salary']; const criticalContentInInitialHtml = checkCriticalContent(initialHtml, criticalPatterns); // Get page language const language = await page.$eval('html', el => el.getAttribute('lang')).catch(() => null); const analysis: PageAnalysis = { url, timestamp: new Date().toISOString(), httpStatus, redirectChain, responseTime: loadTimeMs, title: metaTags.title, metaDescription: metaTags.description, canonicalUrl: metaTags.canonical, robotsMeta: metaTags.robots, headings, structuredData, isHttps: url.startsWith('https'), hasMixedContent, viewport: metaTags.viewport, charset: metaTags.charset, language, rendering: { initialHtmlLength: initialHtml.length, renderedHtmlLength: renderedHtml.length, jsRenderingRequired, jsRenderingRatio: initialHtml.length > 0 ? renderedHtml.length / initialHtml.length : 0, criticalContentInInitialHtml, framework, }, links: { internal: links.internal, external: links.external, broken: [], // Populated if checkLinks is true nofollow: links.nofollow, totalCount: links.internal.length + links.external.length, }, images: { total: images.total, withAlt: images.withAlt, withoutAlt: images.withoutAlt, lazyLoaded: images.lazyLoaded, oversized: [], // Would need size checking images: images.images.slice(0, 50), // Limit for response size }, loadTimeMs, }; return analysis; } finally { await context.close(); } } /** * Analyze structured data, with special focus on JobPosting */ function analyzeStructuredData(jsonLd: any[]): StructuredDataResult { const result: StructuredDataResult = { jsonLd, microdata: [], // Would need separate extraction rdfa: [], hasJobPosting: false, jobPostings: [], jobPostingErrors: [], jobPostingWarnings: [], hasOrganization: false, hasBreadcrumb: false, hasWebSite: false, hasSearchAction: false, }; for (const item of jsonLd) { const type = item['@type']; if (type === 'JobPosting') { result.hasJobPosting = true; const { schema, errors, warnings } = validateJobPosting(item); result.jobPostings.push(schema); result.jobPostingErrors.push(...errors); result.jobPostingWarnings.push(...warnings); } else if (type === 'Organization' || type === 'Corporation') { result.hasOrganization = true; } else if (type === 'BreadcrumbList') { result.hasBreadcrumb = true; } else if (type === 'WebSite') { result.hasWebSite = true; if (item.potentialAction?.['@type'] === 'SearchAction') { result.hasSearchAction = true; } } } return result; } /** * Validate JobPosting schema against Google requirements */ function validateJobPosting(schema: any): { schema: JobPostingSchema; errors: SchemaError[]; warnings: SchemaError[]; } { const errors: SchemaError[] = []; const warnings: SchemaError[] = []; // Required fields per Google const requiredFields = [ 'title', 'description', 'datePosted', 'hiringOrganization', 'jobLocation', ]; for (const field of requiredFields) { if (!schema[field]) { errors.push({ field, message: `Missing required field: ${field}`, severity: 'error', }); } } // Recommended fields const recommendedFields = [ 'validThrough', 'baseSalary', 'employmentType', 'identifier', 'directApply', ]; for (const field of recommendedFields) { if (!schema[field]) { warnings.push({ field, message: `Missing recommended field: ${field}`, severity: 'warning', }); } } // Specific validations if (schema.validThrough) { const expiry = new Date(schema.validThrough); if (isNaN(expiry.getTime())) { errors.push({ field: 'validThrough', message: 'Invalid date format for validThrough', severity: 'error', }); } else if (expiry < new Date()) { warnings.push({ field: 'validThrough', message: 'Job posting has expired (validThrough is in the past)', severity: 'warning', }); } } if (schema.description) { if (typeof schema.description === 'string' && schema.description.length < 100) { warnings.push({ field: 'description', message: 'Description is shorter than recommended (should be comprehensive)', severity: 'warning', }); } } if (schema.hiringOrganization && !schema.hiringOrganization.name) { errors.push({ field: 'hiringOrganization.name', message: 'hiringOrganization must include name', severity: 'error', }); } // Remote job validation if (schema.jobLocationType === 'TELECOMMUTE' && !schema.applicantLocationRequirements) { warnings.push({ field: 'applicantLocationRequirements', message: 'Remote jobs should specify applicantLocationRequirements', severity: 'warning', }); } // Salary validation if (schema.baseSalary) { if (!schema.baseSalary.currency) { warnings.push({ field: 'baseSalary.currency', message: 'Salary should include currency', severity: 'warning', }); } } const parsedSchema: JobPostingSchema = { title: schema.title, description: typeof schema.description === 'string' ? schema.description.substring(0, 500) + '...' // Truncate for response : undefined, datePosted: schema.datePosted, validThrough: schema.validThrough, employmentType: schema.employmentType, hiringOrganization: schema.hiringOrganization, jobLocation: schema.jobLocation, jobLocationType: schema.jobLocationType, applicantLocationRequirements: schema.applicantLocationRequirements, baseSalary: schema.baseSalary, directApply: schema.directApply, raw: schema, }; return { schema: parsedSchema, errors, warnings }; } export default analyzePage;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RichardDillman/seo-audit-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server