// src/tools/index.ts
// Tool registry - exports all available SEO audit tools
export { analyzePage } from './crawl-page.js';
export { crawlSite } from './crawl-site.js';
export { runLighthouse, checkLighthouseInstalled } from './lighthouse.js';
export {
analyzeSiteAccess,
analyzeSitemap,
analyzeRobots,
fetchRobots,
fetchSitemap,
} from './sitemap.js';
export { planAudit } from './plan-audit.js';
export { samplePages } from './sample-pages.js';
export { runAudit } from './run-audit.js';
// Re-export types
export type {
PageAnalysis,
SiteCrawlResult,
LighthouseResult,
SitemapAnalysis,
RobotsAnalysis,
AnalyzePageInput,
CrawlSiteInput,
RunLighthouseInput,
AnalyzeSitemapInput,
} from '../types/index.js';
/**
* Tool definitions for MCP server registration
*/
export const TOOL_DEFINITIONS = [
{
name: 'analyze_page',
description: `Analyze a single web page for SEO factors including:
- Meta tags (title, description, canonical, robots)
- Heading structure (H1-H6)
- Structured data (JSON-LD, with special focus on JobPosting schema)
- JavaScript rendering analysis (CSR vs SSR detection)
- Link analysis (internal, external, nofollow)
- Image analysis (alt tags, lazy loading)
- Mixed content detection
- Basic load time measurement
Use this for detailed analysis of specific pages like job detail pages, landing pages, or homepage.`,
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL to analyze',
},
waitForSelector: {
type: 'string',
description: 'Optional CSS selector to wait for before analysis (useful for JS-heavy pages)',
},
timeout: {
type: 'number',
description: 'Timeout in milliseconds (default: 30000)',
},
device: {
type: 'string',
enum: ['desktop', 'mobile'],
description: 'Device type to emulate (default: desktop)',
},
},
required: ['url'],
},
},
{
name: 'crawl_site',
description: `Crawl multiple pages of a website starting from a URL. Discovers internal links and analyzes each page.
Returns:
- Aggregated statistics (pages with titles, meta descriptions, schema, etc.)
- Page type classification (job detail, category landing, location pages, etc.)
- Duplicate detection (titles, descriptions)
- Critical issues and warnings
- All individual page analyses
Use this for comprehensive site audits. Respects crawl limits and delays.`,
inputSchema: {
type: 'object',
properties: {
startUrl: {
type: 'string',
description: 'The URL to start crawling from',
},
maxPages: {
type: 'number',
description: 'Maximum pages to crawl (default: 50)',
},
maxDepth: {
type: 'number',
description: 'Maximum link depth to follow (default: 5)',
},
includePatterns: {
type: 'array',
items: { type: 'string' },
description: 'Regex patterns - only crawl URLs matching these patterns',
},
excludePatterns: {
type: 'array',
items: { type: 'string' },
description: 'Regex patterns - skip URLs matching these patterns',
},
},
required: ['startUrl'],
},
},
{
name: 'run_lighthouse',
description: `Run a Lighthouse performance audit on a URL.
Returns:
- Performance, Accessibility, Best Practices, and SEO scores
- Core Web Vitals (LCP, CLS, TBT/INP proxy, FCP, TTFB)
- Optimization opportunities with estimated savings
- Diagnostics (long tasks, layout shifts, etc.)
- SEO audit results (crawlability, meta tags, etc.)
Use this for performance analysis. Run separately for mobile and desktop if both matter.
Note: Requires Lighthouse CLI to be installed (npm install -g lighthouse).`,
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL to audit',
},
device: {
type: 'string',
enum: ['mobile', 'desktop'],
description: 'Device type to emulate (default: mobile)',
},
categories: {
type: 'array',
items: {
type: 'string',
enum: ['performance', 'accessibility', 'best-practices', 'seo'],
},
description: 'Categories to audit (default: all)',
},
saveReport: {
type: 'boolean',
description: 'Save HTML report to disk (default: false)',
},
},
required: ['url'],
},
},
{
name: 'analyze_sitemap',
description: `Analyze a site's robots.txt and XML sitemaps.
Returns:
- robots.txt rules and any blocking issues
- All discovered sitemaps (from robots.txt and common locations)
- URL counts and job-specific URL detection
- Sitemap freshness analysis
- Recommendations for job boards (Indexing API, job sitemaps)
Use this as a first step to understand site structure before crawling.`,
inputSchema: {
type: 'object',
properties: {
baseUrl: {
type: 'string',
description: 'The base URL of the site (e.g., https://example.com)',
},
includeSitemapUrls: {
type: 'boolean',
description: 'Include full URL list from sitemaps (default: true)',
},
maxUrls: {
type: 'number',
description: 'Maximum URLs to process per sitemap (default: 1000)',
},
},
required: ['baseUrl'],
},
},
{
name: 'check_urls',
description: `Check HTTP status codes for a list of URLs.
Returns status code, redirect destination (if redirected), and response time for each URL.
Use this to:
- Verify expired job pages are handled correctly
- Check for broken links
- Analyze redirect chains`,
inputSchema: {
type: 'object',
properties: {
urls: {
type: 'array',
items: { type: 'string' },
description: 'List of URLs to check',
},
timeout: {
type: 'number',
description: 'Timeout per URL in milliseconds (default: 10000)',
},
},
required: ['urls'],
},
},
{
name: 'plan_audit',
description: `**RECOMMENDED FIRST STEP** - Analyze sitemaps and create an intelligent sampling strategy for large sites.
This tool is essential for job boards and large sites with 100k+ pages. Instead of crawling everything, it:
1. Discovers and validates all sitemaps (robots.txt + common locations)
2. Identifies distinct route patterns (job pages, category pages, location pages, etc.)
3. Estimates total pages per route type
4. Generates a smart sampling strategy
5. Recommends which pages to analyze with Lighthouse
Returns:
- Sitemap validation (URL limits, lastmod coverage, compression)
- Route pattern classification with estimated counts
- Sampling strategy (how many pages to sample per type)
- Issues, warnings, and recommendations
Use this BEFORE crawl_site or sample_pages to understand site structure.`,
inputSchema: {
type: 'object',
properties: {
baseUrl: {
type: 'string',
description: 'The base URL of the site (e.g., https://talent.com)',
},
maxSitemapsToProcess: {
type: 'number',
description: 'Maximum sitemaps to analyze (default: 20)',
},
maxUrlsPerSitemap: {
type: 'number',
description: 'Maximum URLs to process per sitemap for pattern analysis (default: 5000)',
},
},
required: ['baseUrl'],
},
},
{
name: 'sample_pages',
description: `Intelligently sample and analyze pages based on an audit plan.
Use this AFTER plan_audit to analyze representative pages from each route type.
For a site with 500k job pages, instead of crawling all of them, this will:
- Sample 30-50 job detail pages (random + oldest + newest)
- Sample 10-20 category landing pages
- Sample 10-20 location pages
- Sample company pages, static pages, etc.
Returns:
- Detailed analysis of each sampled page
- Aggregated issues per route type
- Cross-cutting findings (% missing titles, schema errors, etc.)
- Common issues ranked by frequency
This approach finds template-level issues that affect all pages of that type.`,
inputSchema: {
type: 'object',
properties: {
plan: {
type: 'object',
description: 'The audit plan from plan_audit tool',
},
routeTypes: {
type: 'array',
items: { type: 'string' },
description: 'Specific route types to sample (default: all). E.g., ["job_detail", "category"]',
},
samplesOverride: {
type: 'object',
description: 'Override sample counts per route type. E.g., {"job_detail": 10, "category": 5}',
},
concurrency: {
type: 'number',
description: 'Concurrent page analyses (default: 2)',
},
},
required: ['plan'],
},
},
{
name: 'run_audit',
description: `**FULL AUDIT** - Run a complete SEO audit with automatic sampling, caching, and report generation.
This is the main audit tool that orchestrates the entire workflow:
1. Discovers and analyzes sitemaps
2. Identifies route patterns and creates sampling strategy
3. Captures sample pages (cached - only fetches once)
4. Analyzes SEO, structured data, technical issues, social graph
5. Generates prioritized recommendations
6. Saves everything to reports/[sitename]/ folder
The audit captures pages ONCE and stores:
- HTML snapshots for inspection
- Full analysis data as JSON
- Final report as JSON + Markdown
Returns comprehensive findings and prioritized fix recommendations.`,
inputSchema: {
type: 'object',
properties: {
baseUrl: {
type: 'string',
description: 'The base URL of the site to audit (e.g., https://talent.com)',
},
reportsDir: {
type: 'string',
description: 'Directory to save reports (default: ./reports)',
},
maxSitemaps: {
type: 'number',
description: 'Maximum sitemaps to process (default: 15)',
},
maxUrlsPerSitemap: {
type: 'number',
description: 'Maximum URLs per sitemap for pattern analysis (default: 2000)',
},
samplesPerRouteType: {
type: 'number',
description: 'Override samples per route type (default: auto based on route importance)',
},
concurrency: {
type: 'number',
description: 'Concurrent page captures (default: 2)',
},
},
required: ['baseUrl'],
},
},
];