generate_robots
Generate robots.txt files to control web crawler access by blocking specific bots, including AI crawlers, and adding sitemap URLs with custom rules.
Instructions
Generate a robots.txt file with specified blocked bots, sitemap URLs, and custom rules.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| blocked_bots | No | Array of bot user-agent strings to block (e.g. ['GPTBot', 'ClaudeBot', 'CCBot']). Use list_ai_bots to see available user-agents. | |
| block_all_ai | No | If true, blocks all known AI crawlers (but not search engines like Googlebot) | |
| sitemap_urls | No | Array of sitemap URLs to include (e.g. ['https://example.com/sitemap.xml']) | |
| custom_rules | No | Additional custom robots.txt rules to append (raw robots.txt syntax) |
Implementation Reference
- mcp-server/src/index.ts:585-615 (handler)The handler for the generate_robots tool, which processes input parameters, builds the blocked state, and calls the generateRobotsTxt utility.
async ({ blocked_bots, block_all_ai, sitemap_urls, custom_rules }) => { const blockedState: BotToggleState = {}; if (block_all_ai) { for (const bot of AI_BOTS) { blockedState[bot.userAgent] = true; } } if (blocked_bots) { for (const ua of blocked_bots) { blockedState[ua] = true; } } const generated = generateRobotsTxt( blockedState, sitemap_urls || [], custom_rules || "" ); const blockedCount = Object.values(blockedState).filter(Boolean).length; return { content: [ { type: "text" as const, text: `# Generated robots.txt\n\nBlocking **${blockedCount}** bot(s).\n\n\`\`\`\n${generated}\`\`\``, }, ], }; - mcp-server/src/index.ts:556-584 (registration)Registration of the generate_robots tool with its input schema definition using Zod.
server.tool( "generate_robots", "Generate a robots.txt file with specified blocked bots, sitemap URLs, and custom rules.", { blocked_bots: z .array(z.string()) .optional() .describe( "Array of bot user-agent strings to block (e.g. ['GPTBot', 'ClaudeBot', 'CCBot']). Use list_ai_bots to see available user-agents." ), block_all_ai: z .boolean() .optional() .describe( "If true, blocks all known AI crawlers (but not search engines like Googlebot)" ), sitemap_urls: z .array(z.string()) .optional() .describe( "Array of sitemap URLs to include (e.g. ['https://example.com/sitemap.xml'])" ), custom_rules: z .string() .optional() .describe( "Additional custom robots.txt rules to append (raw robots.txt syntax)" ), }, - src/lib/parser.ts:88-130 (helper)Core helper function that implements the logic for generating the robots.txt file content.
export function generateRobotsTxt( blockedBots: BotToggleState, sitemapUrls: string[], customRules: string ): string { const lines: string[] = []; lines.push("# robots.txt generated by robotstxt.ai"); lines.push(`# Generated: ${new Date().toISOString().split("T")[0]}`); lines.push(""); // Wildcard rule — allow all by default lines.push("# Allow all crawlers by default"); lines.push("User-agent: *"); lines.push("Allow: /"); lines.push(""); // Group blocked bots const blocked = Object.entries(blockedBots).filter(([, isBlocked]) => isBlocked); if (blocked.length > 0) { lines.push("# AI Crawlers - Blocked"); for (const [userAgent] of blocked) { lines.push(`User-agent: ${userAgent}`); lines.push("Disallow: /"); lines.push(""); } } // Custom rules if (customRules.trim()) { lines.push("# Custom Rules"); lines.push(customRules.trim()); lines.push(""); } // Sitemaps if (sitemapUrls.length > 0) { lines.push("# Sitemaps"); for (const url of sitemapUrls) { if (url.trim()) { lines.push(`Sitemap: ${url.trim()}`); }