Skip to main content
Glama
RichardDillman

SEO Audit MCP Server

sitemap.ts4.94 kB
// src/tools/sitemap.ts // Sitemap and robots.txt analysis tool import type { RobotsAnalysis, SitemapAnalysis, AnalyzeSitemapInput, } from '../types/index.js'; import { fetchRobots, fetchSitemap, discoverSitemaps, isUrlBlocked, } from '../utils/http.js'; export interface SiteAccessAnalysis { baseUrl: string; timestamp: string; robots: RobotsAnalysis; sitemaps: SitemapAnalysis[]; summary: { robotsFound: boolean; sitemapsFound: number; totalUrlsInSitemaps: number; jobUrlsFound: number; issues: string[]; warnings: string[]; recommendations: string[]; }; // Sample URLs for further analysis sampleJobUrls: string[]; sampleCategoryUrls: string[]; } /** * Comprehensive sitemap and robots.txt analysis */ export async function analyzeSiteAccess(input: AnalyzeSitemapInput): Promise<SiteAccessAnalysis> { const { baseUrl, includeSitemapUrls = true, maxUrls = 1000 } = input; console.error(`Analyzing site access for: ${baseUrl}`); // Fetch robots.txt const robots = await fetchRobots(baseUrl); // Discover and fetch all sitemaps const sitemapDiscovery = await discoverSitemaps(baseUrl); // Aggregate stats const issues: string[] = [...robots.issues]; const warnings: string[] = [...robots.warnings]; const recommendations: string[] = []; let totalUrls = 0; let jobUrls: string[] = []; for (const sitemap of sitemapDiscovery.allSitemaps) { issues.push(...sitemap.issues); warnings.push(...sitemap.warnings); totalUrls += sitemap.urlCount; jobUrls.push(...sitemap.jobUrls); } // Check for common issues if (sitemapDiscovery.allSitemaps.length === 0) { issues.push('No XML sitemap found'); recommendations.push('Create and submit an XML sitemap to help search engines discover your pages'); } if (robots.found && robots.sitemaps.length === 0) { warnings.push('robots.txt exists but does not reference any sitemap'); recommendations.push('Add Sitemap: directive to robots.txt'); } // Check if job URLs might be blocked if (robots.found && jobUrls.length > 0) { const sampleJobUrl = jobUrls[0]; if (isUrlBlocked(sampleJobUrl, robots.rules)) { issues.push('Job URLs may be blocked by robots.txt'); recommendations.push('Review robots.txt rules to ensure job pages are crawlable'); } } // Analyze URL patterns const sampleCategoryUrls = extractCategoryPatterns( sitemapDiscovery.allSitemaps.flatMap(s => s.urls.map(u => u.loc)) ); // Check for job sitemap const hasJobSitemap = sitemapDiscovery.allSitemaps.some( s => s.url.includes('job') || s.jobUrlCount > 0 ); if (!hasJobSitemap && jobUrls.length === 0) { recommendations.push('Consider creating a dedicated jobs sitemap for better indexing'); } // Check sitemap freshness for (const sitemap of sitemapDiscovery.allSitemaps) { if (sitemap.newestLastmod) { const daysSinceUpdate = Math.floor( (Date.now() - new Date(sitemap.newestLastmod).getTime()) / (1000 * 60 * 60 * 24) ); if (daysSinceUpdate > 7) { warnings.push(`Sitemap ${sitemap.url} hasn't been updated in ${daysSinceUpdate} days`); } } } // Indexing API recommendation for job boards recommendations.push('Consider implementing Google Indexing API for faster job posting indexation'); return { baseUrl, timestamp: new Date().toISOString(), robots, sitemaps: sitemapDiscovery.allSitemaps, summary: { robotsFound: robots.found, sitemapsFound: sitemapDiscovery.allSitemaps.filter(s => s.found).length, totalUrlsInSitemaps: totalUrls, jobUrlsFound: jobUrls.length, issues, warnings, recommendations, }, sampleJobUrls: jobUrls.slice(0, 10), sampleCategoryUrls: sampleCategoryUrls.slice(0, 10), }; } /** * Extract likely category/landing page URLs from sitemap */ function extractCategoryPatterns(urls: string[]): string[] { const categoryPatterns = [ /\/jobs\/[a-z-]+\/?$/i, /\/category\/[a-z-]+/i, /\/location\/[a-z-]+/i, /\/[a-z]+-jobs\/?$/i, /\/jobs-in-[a-z-]+/i, ]; const categoryUrls = new Set<string>(); for (const url of urls) { for (const pattern of categoryPatterns) { if (pattern.test(url)) { categoryUrls.add(url); break; } } // Stop once we have enough samples if (categoryUrls.size >= 20) break; } return Array.from(categoryUrls); } /** * Analyze a specific sitemap */ export async function analyzeSitemap(url: string): Promise<SitemapAnalysis> { return fetchSitemap(url); } /** * Analyze robots.txt */ export async function analyzeRobots(baseUrl: string): Promise<RobotsAnalysis> { return fetchRobots(baseUrl); } export { fetchRobots, fetchSitemap, discoverSitemaps }; export default analyzeSiteAccess;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RichardDillman/seo-audit-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server