firecrawl_generate_llmstxt
Generate a standardized llms.txt file that defines how large language models interact with your website. Specify allowed URLs and permissions for AI agents.
Instructions
Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.
Best for: Creating machine-readable permission guidelines for AI models. Not recommended for: General content extraction or research. Arguments:
url (string, required): The base URL of the website to analyze.
maxUrls (number, optional): Max number of URLs to include (default: 10).
showFullText (boolean, optional): Whether to include llms-full.txt contents in the response. Prompt Example: "Generate an LLMs.txt file for example.com." Usage Example:
{
"name": "firecrawl_generate_llmstxt",
"arguments": {
"url": "https://example.com",
"maxUrls": 20,
"showFullText": true
}
}Returns: LLMs.txt file contents (and optionally llms-full.txt).
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| url | Yes | The URL to generate LLMs.txt from | |
| maxUrls | No | Maximum number of URLs to process (1-100, default: 10) | |
| showFullText | No | Whether to show the full LLMs-full.txt in the response |
Implementation Reference
- src/index.ts:633-965 (registration)Registration of the firecrawl_generate_llmstxt tool in the tools list, defining its name and description. Also included in the server's tool list at line 964.
const GENERATE_LLMSTXT_TOOL: Tool = { name: 'firecrawl_generate_llmstxt', description: ` Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site. **Best for:** Creating machine-readable permission guidelines for AI models. **Not recommended for:** General content extraction or research. **Arguments:** - url (string, required): The base URL of the website to analyze. - maxUrls (number, optional): Max number of URLs to include (default: 10). - showFullText (boolean, optional): Whether to include llms-full.txt contents in the response. **Prompt Example:** "Generate an LLMs.txt file for example.com." **Usage Example:** \`\`\`json { "name": "firecrawl_generate_llmstxt", "arguments": { "url": "https://example.com", "maxUrls": 20, "showFullText": true } } \`\`\` **Returns:** LLMs.txt file contents (and optionally llms-full.txt). `, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to generate LLMs.txt from', }, maxUrls: { type: 'number', description: 'Maximum number of URLs to process (1-100, default: 10)', }, showFullText: { type: 'boolean', description: 'Whether to show the full LLMs-full.txt in the response', }, }, required: ['url'], }, }; /** * Parameters for LLMs.txt generation operations. */ interface GenerateLLMsTextParams { /** * Maximum number of URLs to process (1-100) * @default 10 */ maxUrls?: number; /** * Whether to show the full LLMs-full.txt in the response * @default false */ showFullText?: boolean; /** * Experimental flag for streaming */ __experimental_stream?: boolean; } /** * Response interface for LLMs.txt generation operations. */ // interface GenerateLLMsTextResponse { // success: boolean; // id: string; // } /** * Status response interface for LLMs.txt generation operations. */ // interface GenerateLLMsTextStatusResponse { // success: boolean; // data: { // llmstxt: string; // llmsfulltxt?: string; // }; // status: 'processing' | 'completed' | 'failed'; // error?: string; // expiresAt: string; // } interface StatusCheckOptions { id: string; } interface SearchOptions { query: string; limit?: number; lang?: string; country?: string; tbs?: string; filter?: string; location?: { country?: string; languages?: string[]; }; scrapeOptions?: { formats?: string[]; onlyMainContent?: boolean; waitFor?: number; includeTags?: string[]; excludeTags?: string[]; timeout?: number; }; } // Add after other interfaces interface ExtractParams<T = any> { prompt?: string; systemPrompt?: string; schema?: T | object; allowExternalLinks?: boolean; enableWebSearch?: boolean; includeSubdomains?: boolean; origin?: string; } interface ExtractArgs { urls: string[]; prompt?: string; systemPrompt?: string; schema?: object; allowExternalLinks?: boolean; enableWebSearch?: boolean; includeSubdomains?: boolean; origin?: string; } interface ExtractResponse<T = any> { success: boolean; data: T; error?: string; warning?: string; creditsUsed?: number; } // Type guards function isScrapeOptions( args: unknown ): args is ScrapeParams & { url: string } { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } function isMapOptions(args: unknown): args is MapParams & { url: string } { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } function isStatusCheckOptions(args: unknown): args is StatusCheckOptions { return ( typeof args === 'object' && args !== null && 'id' in args && typeof (args as { id: unknown }).id === 'string' ); } function isSearchOptions(args: unknown): args is SearchOptions { return ( typeof args === 'object' && args !== null && 'query' in args && typeof (args as { query: unknown }).query === 'string' ); } function isExtractOptions(args: unknown): args is ExtractArgs { if (typeof args !== 'object' || args === null) return false; const { urls } = args as { urls?: unknown }; return ( Array.isArray(urls) && urls.every((url): url is string => typeof url === 'string') ); } function isGenerateLLMsTextOptions( args: unknown ): args is { url: string } & Partial<GenerateLLMsTextParams> { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } // Server implementation const server = new Server( { name: 'firecrawl-mcp', version: '1.7.0', }, { capabilities: { tools: {}, logging: {}, }, } ); // Get optional API URL const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL; const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY; // Check if API key is required (only for cloud service) if ( process.env.CLOUD_SERVICE !== 'true' && !FIRECRAWL_API_URL && !FIRECRAWL_API_KEY ) { console.error( 'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service' ); process.exit(1); } // Initialize Firecrawl client with optional API URL // Configuration for retries and monitoring const CONFIG = { retry: { maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3, initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000, maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000, backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2, }, credit: { warningThreshold: Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000, criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100, }, }; // Add utility function for delay function delay(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)); } let isStdioTransport = false; function safeLog( level: | 'error' | 'debug' | 'info' | 'notice' | 'warning' | 'critical' | 'alert' | 'emergency', data: any ): void { if (isStdioTransport) { // For stdio transport, log to stderr to avoid protocol interference console.error( `[${level}] ${typeof data === 'object' ? JSON.stringify(data) : data}` ); } else { // For other transport types, use the normal logging mechanism server.sendLoggingMessage({ level, data }); } } // Add retry logic with exponential backoff async function withRetry<T>( operation: () => Promise<T>, context: string, attempt = 1 ): Promise<T> { try { return await operation(); } catch (error) { const isRateLimit = error instanceof Error && (error.message.includes('rate limit') || error.message.includes('429')); if (isRateLimit && attempt < CONFIG.retry.maxAttempts) { const delayMs = Math.min( CONFIG.retry.initialDelay * Math.pow(CONFIG.retry.backoffFactor, attempt - 1), CONFIG.retry.maxDelay ); safeLog( 'warning', `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms` ); await delay(delayMs); return withRetry(operation, context, attempt + 1); } throw error; } } // Tool handlers server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ SCRAPE_TOOL, MAP_TOOL, CRAWL_TOOL, CHECK_CRAWL_STATUS_TOOL, SEARCH_TOOL, EXTRACT_TOOL, DEEP_RESEARCH_TOOL, GENERATE_LLMSTXT_TOOL, ], - src/index.ts:681-696 (schema)TypeScript interface GenerateLLMsTextParams defining the optional input parameters (maxUrls, showFullText, __experimental_stream) for the generate LLMs text operation.
interface GenerateLLMsTextParams { /** * Maximum number of URLs to process (1-100) * @default 10 */ maxUrls?: number; /** * Whether to show the full LLMs-full.txt in the response * @default false */ showFullText?: boolean; /** * Experimental flag for streaming */ __experimental_stream?: boolean; } - src/index.ts:652-675 (schema)JSON schema for the firecrawl_generate_llmstxt tool's input arguments, including required url and optional maxUrls and showFullText.
"showFullText": true } } \`\`\` **Returns:** LLMs.txt file contents (and optionally llms-full.txt). `, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to generate LLMs.txt from', }, maxUrls: { type: 'number', description: 'Maximum number of URLs to process (1-100, default: 10)', }, showFullText: { type: 'boolean', description: 'Whether to show the full LLMs-full.txt in the response', }, }, required: ['url'], }, - src/index.ts:1359-1411 (handler)Main handler implementation for firecrawl_generate_llmstxt - validates args, calls client.generateLLMsText(), formats response with llmstxt and optionally llmsfulltxt content.
case 'firecrawl_generate_llmstxt': { if (!isGenerateLLMsTextOptions(args)) { throw new Error('Invalid arguments for firecrawl_generate_llmstxt'); } try { const { url, ...params } = args; const generateStartTime = Date.now(); safeLog('info', `Starting LLMs.txt generation for URL: ${url}`); // Start the generation process const response = await withRetry( async () => // @ts-expect-error Extended API options including origin client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation' ); if (!response.success) { throw new Error(response.error || 'LLMs.txt generation failed'); } // Log performance metrics safeLog( 'info', `LLMs.txt generation completed in ${Date.now() - generateStartTime}ms` ); // Format the response let resultText = ''; if ('data' in response) { resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`; if (args.showFullText && response.data.llmsfulltxt) { resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`; } } return { content: [{ type: 'text', text: trimResponseText(resultText) }], isError: false, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [{ type: 'text', text: trimResponseText(errorMessage) }], isError: true, }; } } - src/index.ts:832-841 (helper)Type guard function isGenerateLLMsTextOptions that validates the arguments have the required 'url' string property before the handler executes.
function isGenerateLLMsTextOptions( args: unknown ): args is { url: string } & Partial<GenerateLLMsTextParams> { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); }