Skip to main content
Glama

mcp-server-firecrawl

by mendableai
MIT License
7,117
3,355
  • Apple
  • Linux
#!/usr/bin/env node import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js'; import { Tool, CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; import FirecrawlApp, { type ScrapeParams, type MapParams, type CrawlParams, type FirecrawlDocument, } from '@mendable/firecrawl-js'; import express, { Request, Response } from 'express'; import dotenv from 'dotenv'; dotenv.config(); // Tool definitions const SCRAPE_TOOL: Tool = { name: 'firecrawl_scrape', description: ` Scrape content from a single URL with advanced options. **Best for:** Single page content extraction, when you know exactly which page contains the information. **Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract). **Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead). **Prompt Example:** "Get the content of the page at https://example.com." **Usage Example:** \`\`\`json { "name": "firecrawl_scrape", "arguments": { "url": "https://example.com", "formats": ["markdown"] } } \`\`\` **Returns:** Markdown, HTML, or other formats as specified. `, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to scrape', }, formats: { type: 'array', items: { type: 'string', enum: [ 'markdown', 'html', 'rawHtml', 'screenshot', 'links', 'screenshot@fullPage', 'extract', ], }, default: ['markdown'], description: "Content formats to extract (default: ['markdown'])", }, onlyMainContent: { type: 'boolean', description: 'Extract only the main content, filtering out navigation, footers, etc.', }, includeTags: { type: 'array', items: { type: 'string' }, description: 'HTML tags to specifically include in extraction', }, excludeTags: { type: 'array', items: { type: 'string' }, description: 'HTML tags to exclude from extraction', }, waitFor: { type: 'number', description: 'Time in milliseconds to wait for dynamic content to load', }, timeout: { type: 'number', description: 'Maximum time in milliseconds to wait for the page to load', }, actions: { type: 'array', items: { type: 'object', properties: { type: { type: 'string', enum: [ 'wait', 'click', 'screenshot', 'write', 'press', 'scroll', 'scrape', 'executeJavascript', ], description: 'Type of action to perform', }, selector: { type: 'string', description: 'CSS selector for the target element', }, milliseconds: { type: 'number', description: 'Time to wait in milliseconds (for wait action)', }, text: { type: 'string', description: 'Text to write (for write action)', }, key: { type: 'string', description: 'Key to press (for press action)', }, direction: { type: 'string', enum: ['up', 'down'], description: 'Scroll direction', }, script: { type: 'string', description: 'JavaScript code to execute', }, fullPage: { type: 'boolean', description: 'Take full page screenshot', }, }, required: ['type'], }, description: 'List of actions to perform before scraping', }, extract: { type: 'object', properties: { schema: { type: 'object', description: 'Schema for structured data extraction', }, systemPrompt: { type: 'string', description: 'System prompt for LLM extraction', }, prompt: { type: 'string', description: 'User prompt for LLM extraction', }, }, description: 'Configuration for structured data extraction', }, mobile: { type: 'boolean', description: 'Use mobile viewport', }, skipTlsVerification: { type: 'boolean', description: 'Skip TLS certificate verification', }, removeBase64Images: { type: 'boolean', description: 'Remove base64 encoded images from output', }, location: { type: 'object', properties: { country: { type: 'string', description: 'Country code for geolocation', }, languages: { type: 'array', items: { type: 'string' }, description: 'Language codes for content', }, }, description: 'Location settings for scraping', }, }, required: ['url'], }, }; const MAP_TOOL: Tool = { name: 'firecrawl_map', description: ` Map a website to discover all indexed URLs on the site. **Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website. **Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping). **Common mistakes:** Using crawl to discover URLs instead of map. **Prompt Example:** "List all URLs on example.com." **Usage Example:** \`\`\`json { "name": "firecrawl_map", "arguments": { "url": "https://example.com" } } \`\`\` **Returns:** Array of URLs found on the site. `, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Starting URL for URL discovery', }, search: { type: 'string', description: 'Optional search term to filter URLs', }, ignoreSitemap: { type: 'boolean', description: 'Skip sitemap.xml discovery and only use HTML links', }, sitemapOnly: { type: 'boolean', description: 'Only use sitemap.xml for discovery, ignore HTML links', }, includeSubdomains: { type: 'boolean', description: 'Include URLs from subdomains in results', }, limit: { type: 'number', description: 'Maximum number of URLs to return', }, }, required: ['url'], }, }; const CRAWL_TOOL: Tool = { name: 'firecrawl_crawl', description: ` Starts an asynchronous crawl job on a website and extracts content from all pages. **Best for:** Extracting content from multiple related pages, when you need comprehensive coverage. **Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow). **Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control. **Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead). **Prompt Example:** "Get all blog posts from the first two levels of example.com/blog." **Usage Example:** \`\`\`json { "name": "firecrawl_crawl", "arguments": { "url": "https://example.com/blog/*", "maxDepth": 2, "limit": 100, "allowExternalLinks": false, "deduplicateSimilarURLs": true } } \`\`\` **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress. `, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'Starting URL for the crawl', }, excludePaths: { type: 'array', items: { type: 'string' }, description: 'URL paths to exclude from crawling', }, includePaths: { type: 'array', items: { type: 'string' }, description: 'Only crawl these URL paths', }, maxDepth: { type: 'number', description: 'Maximum link depth to crawl', }, ignoreSitemap: { type: 'boolean', description: 'Skip sitemap.xml discovery', }, limit: { type: 'number', description: 'Maximum number of pages to crawl', }, allowBackwardLinks: { type: 'boolean', description: 'Allow crawling links that point to parent directories', }, allowExternalLinks: { type: 'boolean', description: 'Allow crawling links to external domains', }, webhook: { oneOf: [ { type: 'string', description: 'Webhook URL to notify when crawl is complete', }, { type: 'object', properties: { url: { type: 'string', description: 'Webhook URL', }, headers: { type: 'object', description: 'Custom headers for webhook requests', }, }, required: ['url'], }, ], }, deduplicateSimilarURLs: { type: 'boolean', description: 'Remove similar URLs during crawl', }, ignoreQueryParameters: { type: 'boolean', description: 'Ignore query parameters when comparing URLs', }, scrapeOptions: { type: 'object', properties: { formats: { type: 'array', items: { type: 'string', enum: [ 'markdown', 'html', 'rawHtml', 'screenshot', 'links', 'screenshot@fullPage', 'extract', ], }, }, onlyMainContent: { type: 'boolean', }, includeTags: { type: 'array', items: { type: 'string' }, }, excludeTags: { type: 'array', items: { type: 'string' }, }, waitFor: { type: 'number', }, }, description: 'Options for scraping each page', }, }, required: ['url'], }, }; const CHECK_CRAWL_STATUS_TOOL: Tool = { name: 'firecrawl_check_crawl_status', description: ` Check the status of a crawl job. **Usage Example:** \`\`\`json { "name": "firecrawl_check_crawl_status", "arguments": { "id": "550e8400-e29b-41d4-a716-446655440000" } } \`\`\` **Returns:** Status and progress of the crawl job, including results if available. `, inputSchema: { type: 'object', properties: { id: { type: 'string', description: 'Crawl job ID to check', }, }, required: ['id'], }, }; const SEARCH_TOOL: Tool = { name: 'firecrawl_search', description: ` Search the web and optionally extract content from search results. **Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query. **Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl). **Common mistakes:** Using crawl or map for open-ended questions (use search instead). **Prompt Example:** "Find the latest research papers on AI published in 2023." **Usage Example:** \`\`\`json { "name": "firecrawl_search", "arguments": { "query": "latest AI research papers 2023", "limit": 5, "lang": "en", "country": "us", "scrapeOptions": { "formats": ["markdown"], "onlyMainContent": true } } } \`\`\` **Returns:** Array of search results (with optional scraped content). `, inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query string', }, limit: { type: 'number', description: 'Maximum number of results to return (default: 5)', }, lang: { type: 'string', description: 'Language code for search results (default: en)', }, country: { type: 'string', description: 'Country code for search results (default: us)', }, tbs: { type: 'string', description: 'Time-based search filter', }, filter: { type: 'string', description: 'Search filter', }, location: { type: 'object', properties: { country: { type: 'string', description: 'Country code for geolocation', }, languages: { type: 'array', items: { type: 'string' }, description: 'Language codes for content', }, }, description: 'Location settings for search', }, scrapeOptions: { type: 'object', properties: { formats: { type: 'array', items: { type: 'string', enum: ['markdown', 'html', 'rawHtml'], }, description: 'Content formats to extract from search results', }, onlyMainContent: { type: 'boolean', description: 'Extract only the main content from results', }, waitFor: { type: 'number', description: 'Time in milliseconds to wait for dynamic content', }, }, description: 'Options for scraping search results', }, }, required: ['query'], }, }; const EXTRACT_TOOL: Tool = { name: 'firecrawl_extract', description: ` Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction. **Best for:** Extracting specific structured data like prices, names, details. **Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data. **Arguments:** - urls: Array of URLs to extract information from - prompt: Custom prompt for the LLM extraction - systemPrompt: System prompt to guide the LLM - schema: JSON schema for structured data extraction - allowExternalLinks: Allow extraction from external links - enableWebSearch: Enable web search for additional context - includeSubdomains: Include subdomains in extraction **Prompt Example:** "Extract the product name, price, and description from these product pages." **Usage Example:** \`\`\`json { "name": "firecrawl_extract", "arguments": { "urls": ["https://example.com/page1", "https://example.com/page2"], "prompt": "Extract product information including name, price, and description", "systemPrompt": "You are a helpful assistant that extracts product information", "schema": { "type": "object", "properties": { "name": { "type": "string" }, "price": { "type": "number" }, "description": { "type": "string" } }, "required": ["name", "price"] }, "allowExternalLinks": false, "enableWebSearch": false, "includeSubdomains": false } } \`\`\` **Returns:** Extracted structured data as defined by your schema. `, inputSchema: { type: 'object', properties: { urls: { type: 'array', items: { type: 'string' }, description: 'List of URLs to extract information from', }, prompt: { type: 'string', description: 'Prompt for the LLM extraction', }, systemPrompt: { type: 'string', description: 'System prompt for LLM extraction', }, schema: { type: 'object', description: 'JSON schema for structured data extraction', }, allowExternalLinks: { type: 'boolean', description: 'Allow extraction from external links', }, enableWebSearch: { type: 'boolean', description: 'Enable web search for additional context', }, includeSubdomains: { type: 'boolean', description: 'Include subdomains in extraction', }, }, required: ['urls'], }, }; const DEEP_RESEARCH_TOOL: Tool = { name: 'firecrawl_deep_research', description: ` Conduct deep web research on a query using intelligent crawling, search, and LLM analysis. **Best for:** Complex research questions requiring multiple sources, in-depth analysis. **Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time). **Arguments:** - query (string, required): The research question or topic to explore. - maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3). - timeLimit (number, optional): Time limit in seconds for the research session (default: 120). - maxUrls (number, optional): Maximum number of URLs to analyze (default: 50). **Prompt Example:** "Research the environmental impact of electric vehicles versus gasoline vehicles." **Usage Example:** \`\`\`json { "name": "firecrawl_deep_research", "arguments": { "query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?", "maxDepth": 3, "timeLimit": 120, "maxUrls": 50 } } \`\`\` **Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process. `, inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'The query to research', }, maxDepth: { type: 'number', description: 'Maximum depth of research iterations (1-10)', }, timeLimit: { type: 'number', description: 'Time limit in seconds (30-300)', }, maxUrls: { type: 'number', description: 'Maximum number of URLs to analyze (1-1000)', }, }, required: ['query'], }, }; const GENERATE_LLMSTXT_TOOL: Tool = { name: 'firecrawl_generate_llmstxt', description: ` Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site. **Best for:** Creating machine-readable permission guidelines for AI models. **Not recommended for:** General content extraction or research. **Arguments:** - url (string, required): The base URL of the website to analyze. - maxUrls (number, optional): Max number of URLs to include (default: 10). - showFullText (boolean, optional): Whether to include llms-full.txt contents in the response. **Prompt Example:** "Generate an LLMs.txt file for example.com." **Usage Example:** \`\`\`json { "name": "firecrawl_generate_llmstxt", "arguments": { "url": "https://example.com", "maxUrls": 20, "showFullText": true } } \`\`\` **Returns:** LLMs.txt file contents (and optionally llms-full.txt). `, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to generate LLMs.txt from', }, maxUrls: { type: 'number', description: 'Maximum number of URLs to process (1-100, default: 10)', }, showFullText: { type: 'boolean', description: 'Whether to show the full LLMs-full.txt in the response', }, }, required: ['url'], }, }; /** * Parameters for LLMs.txt generation operations. */ interface GenerateLLMsTextParams { /** * Maximum number of URLs to process (1-100) * @default 10 */ maxUrls?: number; /** * Whether to show the full LLMs-full.txt in the response * @default false */ showFullText?: boolean; /** * Experimental flag for streaming */ __experimental_stream?: boolean; } /** * Response interface for LLMs.txt generation operations. */ // interface GenerateLLMsTextResponse { // success: boolean; // id: string; // } /** * Status response interface for LLMs.txt generation operations. */ // interface GenerateLLMsTextStatusResponse { // success: boolean; // data: { // llmstxt: string; // llmsfulltxt?: string; // }; // status: 'processing' | 'completed' | 'failed'; // error?: string; // expiresAt: string; // } interface StatusCheckOptions { id: string; } interface SearchOptions { query: string; limit?: number; lang?: string; country?: string; tbs?: string; filter?: string; location?: { country?: string; languages?: string[]; }; scrapeOptions?: { formats?: string[]; onlyMainContent?: boolean; waitFor?: number; includeTags?: string[]; excludeTags?: string[]; timeout?: number; }; } // Add after other interfaces interface ExtractParams<T = any> { prompt?: string; systemPrompt?: string; schema?: T | object; allowExternalLinks?: boolean; enableWebSearch?: boolean; includeSubdomains?: boolean; origin?: string; } interface ExtractArgs { urls: string[]; prompt?: string; systemPrompt?: string; schema?: object; allowExternalLinks?: boolean; enableWebSearch?: boolean; includeSubdomains?: boolean; origin?: string; } interface ExtractResponse<T = any> { success: boolean; data: T; error?: string; warning?: string; creditsUsed?: number; } // Type guards function isScrapeOptions( args: unknown ): args is ScrapeParams & { url: string } { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } function isMapOptions(args: unknown): args is MapParams & { url: string } { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } function isStatusCheckOptions(args: unknown): args is StatusCheckOptions { return ( typeof args === 'object' && args !== null && 'id' in args && typeof (args as { id: unknown }).id === 'string' ); } function isSearchOptions(args: unknown): args is SearchOptions { return ( typeof args === 'object' && args !== null && 'query' in args && typeof (args as { query: unknown }).query === 'string' ); } function isExtractOptions(args: unknown): args is ExtractArgs { if (typeof args !== 'object' || args === null) return false; const { urls } = args as { urls?: unknown }; return ( Array.isArray(urls) && urls.every((url): url is string => typeof url === 'string') ); } function isGenerateLLMsTextOptions( args: unknown ): args is { url: string } & Partial<GenerateLLMsTextParams> { return ( typeof args === 'object' && args !== null && 'url' in args && typeof (args as { url: unknown }).url === 'string' ); } // Server implementation const server = new Server( { name: 'firecrawl-mcp', version: '1.7.0', }, { capabilities: { tools: {}, logging: {}, }, } ); // Get optional API URL const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL; const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY; // Check if API key is required (only for cloud service) if ( process.env.CLOUD_SERVICE !== 'true' && !FIRECRAWL_API_URL && !FIRECRAWL_API_KEY ) { console.error( 'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service' ); process.exit(1); } // Initialize Firecrawl client with optional API URL // Configuration for retries and monitoring const CONFIG = { retry: { maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3, initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000, maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000, backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2, }, credit: { warningThreshold: Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000, criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100, }, }; // Add utility function for delay function delay(ms: number): Promise<void> { return new Promise((resolve) => setTimeout(resolve, ms)); } let isStdioTransport = false; function safeLog( level: | 'error' | 'debug' | 'info' | 'notice' | 'warning' | 'critical' | 'alert' | 'emergency', data: any ): void { if (isStdioTransport) { // For stdio transport, log to stderr to avoid protocol interference console.error( `[${level}] ${typeof data === 'object' ? JSON.stringify(data) : data}` ); } else { // For other transport types, use the normal logging mechanism server.sendLoggingMessage({ level, data }); } } // Add retry logic with exponential backoff async function withRetry<T>( operation: () => Promise<T>, context: string, attempt = 1 ): Promise<T> { try { return await operation(); } catch (error) { const isRateLimit = error instanceof Error && (error.message.includes('rate limit') || error.message.includes('429')); if (isRateLimit && attempt < CONFIG.retry.maxAttempts) { const delayMs = Math.min( CONFIG.retry.initialDelay * Math.pow(CONFIG.retry.backoffFactor, attempt - 1), CONFIG.retry.maxDelay ); safeLog( 'warning', `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms` ); await delay(delayMs); return withRetry(operation, context, attempt + 1); } throw error; } } // Tool handlers server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ SCRAPE_TOOL, MAP_TOOL, CRAWL_TOOL, CHECK_CRAWL_STATUS_TOOL, SEARCH_TOOL, EXTRACT_TOOL, DEEP_RESEARCH_TOOL, GENERATE_LLMSTXT_TOOL, ], })); server.setRequestHandler(CallToolRequestSchema, async (request) => { const startTime = Date.now(); try { const { name, arguments: args } = request.params; const apiKey = process.env.CLOUD_SERVICE ? (request.params._meta?.apiKey as string) : FIRECRAWL_API_KEY; if (process.env.CLOUD_SERVICE && !apiKey) { throw new Error('No API key provided'); } const client = new FirecrawlApp({ apiKey, ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}), }); // Log incoming request with timestamp safeLog( 'info', `[${new Date().toISOString()}] Received request for tool: ${name}` ); if (!args) { throw new Error('No arguments provided'); } switch (name) { case 'firecrawl_scrape': { if (!isScrapeOptions(args)) { throw new Error('Invalid arguments for firecrawl_scrape'); } const { url, ...options } = args; try { const scrapeStartTime = Date.now(); safeLog( 'info', `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}` ); const response = await client.scrapeUrl(url, { ...options, // @ts-expect-error Extended API options including origin origin: 'mcp-server', }); // Log performance metrics safeLog( 'info', `Scrape completed in ${Date.now() - scrapeStartTime}ms` ); if ('success' in response && !response.success) { throw new Error(response.error || 'Scraping failed'); } // Format content based on requested formats const contentParts = []; if (options.formats?.includes('markdown') && response.markdown) { contentParts.push(response.markdown); } if (options.formats?.includes('html') && response.html) { contentParts.push(response.html); } if (options.formats?.includes('rawHtml') && response.rawHtml) { contentParts.push(response.rawHtml); } if (options.formats?.includes('links') && response.links) { contentParts.push(response.links.join('\n')); } if (options.formats?.includes('screenshot') && response.screenshot) { contentParts.push(response.screenshot); } if (options.formats?.includes('extract') && response.extract) { contentParts.push(JSON.stringify(response.extract, null, 2)); } // If options.formats is empty, default to markdown if (!options.formats || options.formats.length === 0) { options.formats = ['markdown']; } // Add warning to response if present if (response.warning) { safeLog('warning', response.warning); } return { content: [ { type: 'text', text: trimResponseText( contentParts.join('\n\n') || 'No content available' ), }, ], isError: false, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [{ type: 'text', text: trimResponseText(errorMessage) }], isError: true, }; } } case 'firecrawl_map': { if (!isMapOptions(args)) { throw new Error('Invalid arguments for firecrawl_map'); } const { url, ...options } = args; const response = await client.mapUrl(url, { ...options, // @ts-expect-error Extended API options including origin origin: 'mcp-server', }); if ('error' in response) { throw new Error(response.error); } if (!response.links) { throw new Error('No links received from Firecrawl API'); } return { content: [ { type: 'text', text: trimResponseText(response.links.join('\n')) }, ], isError: false, }; } case 'firecrawl_crawl': { if (!isCrawlOptions(args)) { throw new Error('Invalid arguments for firecrawl_crawl'); } const { url, ...options } = args; const response = await withRetry( async () => // @ts-expect-error Extended API options including origin client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }), 'crawl operation' ); if (!response.success) { throw new Error(response.error); } return { content: [ { type: 'text', text: trimResponseText( `Started crawl for ${url} with job ID: ${response.id}. Use firecrawl_check_crawl_status to check progress.` ), }, ], isError: false, }; } case 'firecrawl_check_crawl_status': { if (!isStatusCheckOptions(args)) { throw new Error('Invalid arguments for firecrawl_check_crawl_status'); } const response = await client.checkCrawlStatus(args.id); if (!response.success) { throw new Error(response.error); } const status = `Crawl Status: Status: ${response.status} Progress: ${response.completed}/${response.total} Credits Used: ${response.creditsUsed} Expires At: ${response.expiresAt} ${ response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : '' }`; return { content: [{ type: 'text', text: trimResponseText(status) }], isError: false, }; } case 'firecrawl_search': { if (!isSearchOptions(args)) { throw new Error('Invalid arguments for firecrawl_search'); } try { const response = await withRetry( async () => client.search(args.query, { ...args, origin: 'mcp-server' }), 'search operation' ); if (!response.success) { throw new Error( `Search failed: ${response.error || 'Unknown error'}` ); } // Format the results const results = response.data .map( (result) => `URL: ${result.url} Title: ${result.title || 'No title'} Description: ${result.description || 'No description'} ${result.markdown ? `\nContent:\n${result.markdown}` : ''}` ) .join('\n\n'); return { content: [{ type: 'text', text: trimResponseText(results) }], isError: false, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : `Search failed: ${JSON.stringify(error)}`; return { content: [{ type: 'text', text: trimResponseText(errorMessage) }], isError: true, }; } } case 'firecrawl_extract': { if (!isExtractOptions(args)) { throw new Error('Invalid arguments for firecrawl_extract'); } try { const extractStartTime = Date.now(); safeLog( 'info', `Starting extraction for URLs: ${args.urls.join(', ')}` ); // Log if using self-hosted instance if (FIRECRAWL_API_URL) { safeLog('info', 'Using self-hosted instance for extraction'); } const extractResponse = await withRetry( async () => client.extract(args.urls, { prompt: args.prompt, systemPrompt: args.systemPrompt, schema: args.schema, allowExternalLinks: args.allowExternalLinks, enableWebSearch: args.enableWebSearch, includeSubdomains: args.includeSubdomains, origin: 'mcp-server', } as ExtractParams), 'extract operation' ); // Type guard for successful response if (!('success' in extractResponse) || !extractResponse.success) { throw new Error(extractResponse.error || 'Extraction failed'); } const response = extractResponse as ExtractResponse; // Log performance metrics safeLog( 'info', `Extraction completed in ${Date.now() - extractStartTime}ms` ); // Add warning to response if present const result = { content: [ { type: 'text', text: trimResponseText(JSON.stringify(response.data, null, 2)), }, ], isError: false, }; if (response.warning) { safeLog('warning', response.warning); } return result; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); // Special handling for self-hosted instance errors if ( FIRECRAWL_API_URL && errorMessage.toLowerCase().includes('not supported') ) { safeLog( 'error', 'Extraction is not supported by this self-hosted instance' ); return { content: [ { type: 'text', text: trimResponseText( 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.' ), }, ], isError: true, }; } return { content: [{ type: 'text', text: trimResponseText(errorMessage) }], isError: true, }; } } case 'firecrawl_deep_research': { if (!args || typeof args !== 'object' || !('query' in args)) { throw new Error('Invalid arguments for firecrawl_deep_research'); } try { const researchStartTime = Date.now(); safeLog('info', `Starting deep research for query: ${args.query}`); const response = await client.deepResearch( args.query as string, { maxDepth: args.maxDepth as number, timeLimit: args.timeLimit as number, maxUrls: args.maxUrls as number, // @ts-expect-error Extended API options including origin origin: 'mcp-server', }, // Activity callback (activity) => { safeLog( 'info', `Research activity: ${activity.message} (Depth: ${activity.depth})` ); }, // Source callback (source) => { safeLog( 'info', `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}` ); } ); // Log performance metrics safeLog( 'info', `Deep research completed in ${Date.now() - researchStartTime}ms` ); if (!response.success) { throw new Error(response.error || 'Deep research failed'); } // Format the results const formattedResponse = { finalAnalysis: response.data.finalAnalysis, activities: response.data.activities, sources: response.data.sources, }; return { content: [ { type: 'text', text: trimResponseText(formattedResponse.finalAnalysis), }, ], isError: false, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [{ type: 'text', text: trimResponseText(errorMessage) }], isError: true, }; } } case 'firecrawl_generate_llmstxt': { if (!isGenerateLLMsTextOptions(args)) { throw new Error('Invalid arguments for firecrawl_generate_llmstxt'); } try { const { url, ...params } = args; const generateStartTime = Date.now(); safeLog('info', `Starting LLMs.txt generation for URL: ${url}`); // Start the generation process const response = await withRetry( async () => // @ts-expect-error Extended API options including origin client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation' ); if (!response.success) { throw new Error(response.error || 'LLMs.txt generation failed'); } // Log performance metrics safeLog( 'info', `LLMs.txt generation completed in ${Date.now() - generateStartTime}ms` ); // Format the response let resultText = ''; if ('data' in response) { resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`; if (args.showFullText && response.data.llmsfulltxt) { resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`; } } return { content: [{ type: 'text', text: trimResponseText(resultText) }], isError: false, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [{ type: 'text', text: trimResponseText(errorMessage) }], isError: true, }; } } default: return { content: [ { type: 'text', text: trimResponseText(`Unknown tool: ${name}`) }, ], isError: true, }; } } catch (error) { // Log detailed error information safeLog('error', { message: `Request failed: ${ error instanceof Error ? error.message : String(error) }`, tool: request.params.name, arguments: request.params.arguments, timestamp: new Date().toISOString(), duration: Date.now() - startTime, }); return { content: [ { type: 'text', text: trimResponseText( `Error: ${error instanceof Error ? error.message : String(error)}` ), }, ], isError: true, }; } finally { // Log request completion with performance metrics safeLog('info', `Request completed in ${Date.now() - startTime}ms`); } }); // Helper function to format results function formatResults(data: FirecrawlDocument[]): string { return data .map((doc) => { const content = doc.markdown || doc.html || doc.rawHtml || 'No content'; return `URL: ${doc.url || 'Unknown URL'} Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''} ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`; }) .join('\n\n'); } // Utility function to trim trailing whitespace from text responses // This prevents Claude API errors with "final assistant content cannot end with trailing whitespace" function trimResponseText(text: string): string { return text.trim(); } // Server startup async function runLocalServer() { try { console.error('Initializing Firecrawl MCP Server...'); const transport = new StdioServerTransport(); // Detect if we're using stdio transport isStdioTransport = transport instanceof StdioServerTransport; if (isStdioTransport) { console.error( 'Running in stdio mode, logging will be directed to stderr' ); } await server.connect(transport); // Now that we're connected, we can send logging messages safeLog('info', 'Firecrawl MCP Server initialized successfully'); safeLog( 'info', `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}` ); console.error('Firecrawl MCP Server running on stdio'); } catch (error) { console.error('Fatal error running server:', error); process.exit(1); } } async function runSSELocalServer() { let transport: SSEServerTransport | null = null; const app = express(); app.get('/sse', async (req, res) => { transport = new SSEServerTransport(`/messages`, res); res.on('close', () => { transport = null; }); await server.connect(transport); }); // Endpoint for the client to POST messages // Remove express.json() middleware - let the transport handle the body app.post('/messages', (req, res) => { if (transport) { transport.handlePostMessage(req, res); } }); const PORT = process.env.PORT || 3000; console.log('Starting server on port', PORT); try { app.listen(PORT, () => { console.log(`MCP SSE Server listening on http://localhost:${PORT}`); console.log(`SSE endpoint: http://localhost:${PORT}/sse`); console.log(`Message endpoint: http://localhost:${PORT}/messages`); }); } catch (error) { console.error('Error starting server:', error); } } async function runSSECloudServer() { const transports: { [sessionId: string]: SSEServerTransport } = {}; const app = express(); app.get('/health', (req, res) => { res.status(200).send('OK'); }); app.get('/:apiKey/sse', async (req, res) => { const apiKey = req.params.apiKey; const transport = new SSEServerTransport(`/${apiKey}/messages`, res); //todo: validate api key, close if invalid const compositeKey = `${apiKey}-${transport.sessionId}`; transports[compositeKey] = transport; res.on('close', () => { delete transports[compositeKey]; }); await server.connect(transport); }); // Endpoint for the client to POST messages // Remove express.json() middleware - let the transport handle the body app.post( '/:apiKey/messages', express.json(), async (req: Request, res: Response) => { const apiKey = req.params.apiKey; const body = req.body; const enrichedBody = { ...body, }; if (enrichedBody && enrichedBody.params && !enrichedBody.params._meta) { enrichedBody.params._meta = { apiKey }; } else if ( enrichedBody && enrichedBody.params && enrichedBody.params._meta ) { enrichedBody.params._meta.apiKey = apiKey; } console.log('enrichedBody', enrichedBody); const sessionId = req.query.sessionId as string; const compositeKey = `${apiKey}-${sessionId}`; const transport = transports[compositeKey]; if (transport) { await transport.handlePostMessage(req, res, enrichedBody); } else { res.status(400).send('No transport found for sessionId'); } } ); const PORT = 3000; app.listen(PORT, () => { console.log(`MCP SSE Server listening on http://localhost:${PORT}`); console.log(`SSE endpoint: http://localhost:${PORT}/sse`); console.log(`Message endpoint: http://localhost:${PORT}/messages`); }); } if (process.env.CLOUD_SERVICE === 'true') { runSSECloudServer().catch((error: any) => { console.error('Fatal error running server:', error); process.exit(1); }); } else if (process.env.SSE_LOCAL === 'true') { runSSELocalServer().catch((error: any) => { console.error('Fatal error running server:', error); process.exit(1); }); } else { runLocalServer().catch((error: any) => { console.error('Fatal error running server:', error); process.exit(1); }); }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mendableai/firecrawl-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server