Skip to main content
Glama
RichardDillman

SEO Audit MCP Server

index.ts10.8 kB
// src/tools/index.ts // Tool registry - exports all available SEO audit tools export { analyzePage } from './crawl-page.js'; export { crawlSite } from './crawl-site.js'; export { runLighthouse, checkLighthouseInstalled } from './lighthouse.js'; export { analyzeSiteAccess, analyzeSitemap, analyzeRobots, fetchRobots, fetchSitemap, } from './sitemap.js'; export { planAudit } from './plan-audit.js'; export { samplePages } from './sample-pages.js'; export { runAudit } from './run-audit.js'; // Re-export types export type { PageAnalysis, SiteCrawlResult, LighthouseResult, SitemapAnalysis, RobotsAnalysis, AnalyzePageInput, CrawlSiteInput, RunLighthouseInput, AnalyzeSitemapInput, } from '../types/index.js'; /** * Tool definitions for MCP server registration */ export const TOOL_DEFINITIONS = [ { name: 'analyze_page', description: `Analyze a single web page for SEO factors including: - Meta tags (title, description, canonical, robots) - Heading structure (H1-H6) - Structured data (JSON-LD, with special focus on JobPosting schema) - JavaScript rendering analysis (CSR vs SSR detection) - Link analysis (internal, external, nofollow) - Image analysis (alt tags, lazy loading) - Mixed content detection - Basic load time measurement Use this for detailed analysis of specific pages like job detail pages, landing pages, or homepage.`, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to analyze', }, waitForSelector: { type: 'string', description: 'Optional CSS selector to wait for before analysis (useful for JS-heavy pages)', }, timeout: { type: 'number', description: 'Timeout in milliseconds (default: 30000)', }, device: { type: 'string', enum: ['desktop', 'mobile'], description: 'Device type to emulate (default: desktop)', }, }, required: ['url'], }, }, { name: 'crawl_site', description: `Crawl multiple pages of a website starting from a URL. Discovers internal links and analyzes each page. Returns: - Aggregated statistics (pages with titles, meta descriptions, schema, etc.) - Page type classification (job detail, category landing, location pages, etc.) - Duplicate detection (titles, descriptions) - Critical issues and warnings - All individual page analyses Use this for comprehensive site audits. Respects crawl limits and delays.`, inputSchema: { type: 'object', properties: { startUrl: { type: 'string', description: 'The URL to start crawling from', }, maxPages: { type: 'number', description: 'Maximum pages to crawl (default: 50)', }, maxDepth: { type: 'number', description: 'Maximum link depth to follow (default: 5)', }, includePatterns: { type: 'array', items: { type: 'string' }, description: 'Regex patterns - only crawl URLs matching these patterns', }, excludePatterns: { type: 'array', items: { type: 'string' }, description: 'Regex patterns - skip URLs matching these patterns', }, }, required: ['startUrl'], }, }, { name: 'run_lighthouse', description: `Run a Lighthouse performance audit on a URL. Returns: - Performance, Accessibility, Best Practices, and SEO scores - Core Web Vitals (LCP, CLS, TBT/INP proxy, FCP, TTFB) - Optimization opportunities with estimated savings - Diagnostics (long tasks, layout shifts, etc.) - SEO audit results (crawlability, meta tags, etc.) Use this for performance analysis. Run separately for mobile and desktop if both matter. Note: Requires Lighthouse CLI to be installed (npm install -g lighthouse).`, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to audit', }, device: { type: 'string', enum: ['mobile', 'desktop'], description: 'Device type to emulate (default: mobile)', }, categories: { type: 'array', items: { type: 'string', enum: ['performance', 'accessibility', 'best-practices', 'seo'], }, description: 'Categories to audit (default: all)', }, saveReport: { type: 'boolean', description: 'Save HTML report to disk (default: false)', }, }, required: ['url'], }, }, { name: 'analyze_sitemap', description: `Analyze a site's robots.txt and XML sitemaps. Returns: - robots.txt rules and any blocking issues - All discovered sitemaps (from robots.txt and common locations) - URL counts and job-specific URL detection - Sitemap freshness analysis - Recommendations for job boards (Indexing API, job sitemaps) Use this as a first step to understand site structure before crawling.`, inputSchema: { type: 'object', properties: { baseUrl: { type: 'string', description: 'The base URL of the site (e.g., https://example.com)', }, includeSitemapUrls: { type: 'boolean', description: 'Include full URL list from sitemaps (default: true)', }, maxUrls: { type: 'number', description: 'Maximum URLs to process per sitemap (default: 1000)', }, }, required: ['baseUrl'], }, }, { name: 'check_urls', description: `Check HTTP status codes for a list of URLs. Returns status code, redirect destination (if redirected), and response time for each URL. Use this to: - Verify expired job pages are handled correctly - Check for broken links - Analyze redirect chains`, inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'List of URLs to check', }, timeout: { type: 'number', description: 'Timeout per URL in milliseconds (default: 10000)', }, }, required: ['urls'], }, }, { name: 'plan_audit', description: `**RECOMMENDED FIRST STEP** - Analyze sitemaps and create an intelligent sampling strategy for large sites. This tool is essential for job boards and large sites with 100k+ pages. Instead of crawling everything, it: 1. Discovers and validates all sitemaps (robots.txt + common locations) 2. Identifies distinct route patterns (job pages, category pages, location pages, etc.) 3. Estimates total pages per route type 4. Generates a smart sampling strategy 5. Recommends which pages to analyze with Lighthouse Returns: - Sitemap validation (URL limits, lastmod coverage, compression) - Route pattern classification with estimated counts - Sampling strategy (how many pages to sample per type) - Issues, warnings, and recommendations Use this BEFORE crawl_site or sample_pages to understand site structure.`, inputSchema: { type: 'object', properties: { baseUrl: { type: 'string', description: 'The base URL of the site (e.g., https://talent.com)', }, maxSitemapsToProcess: { type: 'number', description: 'Maximum sitemaps to analyze (default: 20)', }, maxUrlsPerSitemap: { type: 'number', description: 'Maximum URLs to process per sitemap for pattern analysis (default: 5000)', }, }, required: ['baseUrl'], }, }, { name: 'sample_pages', description: `Intelligently sample and analyze pages based on an audit plan. Use this AFTER plan_audit to analyze representative pages from each route type. For a site with 500k job pages, instead of crawling all of them, this will: - Sample 30-50 job detail pages (random + oldest + newest) - Sample 10-20 category landing pages - Sample 10-20 location pages - Sample company pages, static pages, etc. Returns: - Detailed analysis of each sampled page - Aggregated issues per route type - Cross-cutting findings (% missing titles, schema errors, etc.) - Common issues ranked by frequency This approach finds template-level issues that affect all pages of that type.`, inputSchema: { type: 'object', properties: { plan: { type: 'object', description: 'The audit plan from plan_audit tool', }, routeTypes: { type: 'array', items: { type: 'string' }, description: 'Specific route types to sample (default: all). E.g., ["job_detail", "category"]', }, samplesOverride: { type: 'object', description: 'Override sample counts per route type. E.g., {"job_detail": 10, "category": 5}', }, concurrency: { type: 'number', description: 'Concurrent page analyses (default: 2)', }, }, required: ['plan'], }, }, { name: 'run_audit', description: `**FULL AUDIT** - Run a complete SEO audit with automatic sampling, caching, and report generation. This is the main audit tool that orchestrates the entire workflow: 1. Discovers and analyzes sitemaps 2. Identifies route patterns and creates sampling strategy 3. Captures sample pages (cached - only fetches once) 4. Analyzes SEO, structured data, technical issues, social graph 5. Generates prioritized recommendations 6. Saves everything to reports/[sitename]/ folder The audit captures pages ONCE and stores: - HTML snapshots for inspection - Full analysis data as JSON - Final report as JSON + Markdown Returns comprehensive findings and prioritized fix recommendations.`, inputSchema: { type: 'object', properties: { baseUrl: { type: 'string', description: 'The base URL of the site to audit (e.g., https://talent.com)', }, reportsDir: { type: 'string', description: 'Directory to save reports (default: ./reports)', }, maxSitemaps: { type: 'number', description: 'Maximum sitemaps to process (default: 15)', }, maxUrlsPerSitemap: { type: 'number', description: 'Maximum URLs per sitemap for pattern analysis (default: 2000)', }, samplesPerRouteType: { type: 'number', description: 'Override samples per route type (default: auto based on route importance)', }, concurrency: { type: 'number', description: 'Concurrent page captures (default: 2)', }, }, required: ['baseUrl'], }, }, ];

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RichardDillman/seo-audit-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server