mcp-server-firecrawl
by mendableai
#!/usr/bin/env node
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
Tool,
CallToolRequestSchema,
ListToolsRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';
import FirecrawlApp, {
type ScrapeParams,
type MapParams,
type CrawlParams,
type FirecrawlDocument,
} from '@mendable/firecrawl-js';
import PQueue from 'p-queue';
import dotenv from 'dotenv';
dotenv.config();
// Tool definitions
const SCRAPE_TOOL: Tool = {
name: 'firecrawl_scrape',
description:
'Scrape a single webpage with advanced options for content extraction. ' +
'Supports various formats including markdown, HTML, and screenshots. ' +
'Can execute custom actions like clicking or scrolling before scraping.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL to scrape',
},
formats: {
type: 'array',
items: {
type: 'string',
enum: [
'markdown',
'html',
'rawHtml',
'screenshot',
'links',
'screenshot@fullPage',
'extract',
],
},
description: "Content formats to extract (default: ['markdown'])",
},
onlyMainContent: {
type: 'boolean',
description:
'Extract only the main content, filtering out navigation, footers, etc.',
},
includeTags: {
type: 'array',
items: { type: 'string' },
description: 'HTML tags to specifically include in extraction',
},
excludeTags: {
type: 'array',
items: { type: 'string' },
description: 'HTML tags to exclude from extraction',
},
waitFor: {
type: 'number',
description: 'Time in milliseconds to wait for dynamic content to load',
},
timeout: {
type: 'number',
description:
'Maximum time in milliseconds to wait for the page to load',
},
actions: {
type: 'array',
items: {
type: 'object',
properties: {
type: {
type: 'string',
enum: [
'wait',
'click',
'screenshot',
'write',
'press',
'scroll',
'scrape',
'executeJavascript',
],
description: 'Type of action to perform',
},
selector: {
type: 'string',
description: 'CSS selector for the target element',
},
milliseconds: {
type: 'number',
description: 'Time to wait in milliseconds (for wait action)',
},
text: {
type: 'string',
description: 'Text to write (for write action)',
},
key: {
type: 'string',
description: 'Key to press (for press action)',
},
direction: {
type: 'string',
enum: ['up', 'down'],
description: 'Scroll direction',
},
script: {
type: 'string',
description: 'JavaScript code to execute',
},
fullPage: {
type: 'boolean',
description: 'Take full page screenshot',
},
},
required: ['type'],
},
description: 'List of actions to perform before scraping',
},
extract: {
type: 'object',
properties: {
schema: {
type: 'object',
description: 'Schema for structured data extraction',
},
systemPrompt: {
type: 'string',
description: 'System prompt for LLM extraction',
},
prompt: {
type: 'string',
description: 'User prompt for LLM extraction',
},
},
description: 'Configuration for structured data extraction',
},
mobile: {
type: 'boolean',
description: 'Use mobile viewport',
},
skipTlsVerification: {
type: 'boolean',
description: 'Skip TLS certificate verification',
},
removeBase64Images: {
type: 'boolean',
description: 'Remove base64 encoded images from output',
},
location: {
type: 'object',
properties: {
country: {
type: 'string',
description: 'Country code for geolocation',
},
languages: {
type: 'array',
items: { type: 'string' },
description: 'Language codes for content',
},
},
description: 'Location settings for scraping',
},
},
required: ['url'],
},
};
const MAP_TOOL: Tool = {
name: 'firecrawl_map',
description:
'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'Starting URL for URL discovery',
},
search: {
type: 'string',
description: 'Optional search term to filter URLs',
},
ignoreSitemap: {
type: 'boolean',
description: 'Skip sitemap.xml discovery and only use HTML links',
},
sitemapOnly: {
type: 'boolean',
description: 'Only use sitemap.xml for discovery, ignore HTML links',
},
includeSubdomains: {
type: 'boolean',
description: 'Include URLs from subdomains in results',
},
limit: {
type: 'number',
description: 'Maximum number of URLs to return',
},
},
required: ['url'],
},
};
const CRAWL_TOOL: Tool = {
name: 'firecrawl_crawl',
description:
'Start an asynchronous crawl of multiple pages from a starting URL. ' +
'Supports depth control, path filtering, and webhook notifications.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'Starting URL for the crawl',
},
excludePaths: {
type: 'array',
items: { type: 'string' },
description: 'URL paths to exclude from crawling',
},
includePaths: {
type: 'array',
items: { type: 'string' },
description: 'Only crawl these URL paths',
},
maxDepth: {
type: 'number',
description: 'Maximum link depth to crawl',
},
ignoreSitemap: {
type: 'boolean',
description: 'Skip sitemap.xml discovery',
},
limit: {
type: 'number',
description: 'Maximum number of pages to crawl',
},
allowBackwardLinks: {
type: 'boolean',
description: 'Allow crawling links that point to parent directories',
},
allowExternalLinks: {
type: 'boolean',
description: 'Allow crawling links to external domains',
},
webhook: {
oneOf: [
{
type: 'string',
description: 'Webhook URL to notify when crawl is complete',
},
{
type: 'object',
properties: {
url: {
type: 'string',
description: 'Webhook URL',
},
headers: {
type: 'object',
description: 'Custom headers for webhook requests',
},
},
required: ['url'],
},
],
},
deduplicateSimilarURLs: {
type: 'boolean',
description: 'Remove similar URLs during crawl',
},
ignoreQueryParameters: {
type: 'boolean',
description: 'Ignore query parameters when comparing URLs',
},
scrapeOptions: {
type: 'object',
properties: {
formats: {
type: 'array',
items: {
type: 'string',
enum: [
'markdown',
'html',
'rawHtml',
'screenshot',
'links',
'screenshot@fullPage',
'extract',
],
},
},
onlyMainContent: {
type: 'boolean',
},
includeTags: {
type: 'array',
items: { type: 'string' },
},
excludeTags: {
type: 'array',
items: { type: 'string' },
},
waitFor: {
type: 'number',
},
},
description: 'Options for scraping each page',
},
},
required: ['url'],
},
};
const BATCH_SCRAPE_TOOL: Tool = {
name: 'firecrawl_batch_scrape',
description:
'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
inputSchema: {
type: 'object',
properties: {
urls: {
type: 'array',
items: { type: 'string' },
description: 'List of URLs to scrape',
},
options: {
type: 'object',
properties: {
formats: {
type: 'array',
items: {
type: 'string',
enum: [
'markdown',
'html',
'rawHtml',
'screenshot',
'links',
'screenshot@fullPage',
'extract',
],
},
},
onlyMainContent: {
type: 'boolean',
},
includeTags: {
type: 'array',
items: { type: 'string' },
},
excludeTags: {
type: 'array',
items: { type: 'string' },
},
waitFor: {
type: 'number',
},
},
},
},
required: ['urls'],
},
};
const CHECK_BATCH_STATUS_TOOL: Tool = {
name: 'firecrawl_check_batch_status',
description: 'Check the status of a batch scraping job.',
inputSchema: {
type: 'object',
properties: {
id: {
type: 'string',
description: 'Batch job ID to check',
},
},
required: ['id'],
},
};
const CHECK_CRAWL_STATUS_TOOL: Tool = {
name: 'firecrawl_check_crawl_status',
description: 'Check the status of a crawl job.',
inputSchema: {
type: 'object',
properties: {
id: {
type: 'string',
description: 'Crawl job ID to check',
},
},
required: ['id'],
},
};
const SEARCH_TOOL: Tool = {
name: 'firecrawl_search',
description:
'Search and retrieve content from web pages with optional scraping. ' +
'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query string',
},
limit: {
type: 'number',
description: 'Maximum number of results to return (default: 5)',
},
lang: {
type: 'string',
description: 'Language code for search results (default: en)',
},
country: {
type: 'string',
description: 'Country code for search results (default: us)',
},
tbs: {
type: 'string',
description: 'Time-based search filter',
},
filter: {
type: 'string',
description: 'Search filter',
},
location: {
type: 'object',
properties: {
country: {
type: 'string',
description: 'Country code for geolocation',
},
languages: {
type: 'array',
items: { type: 'string' },
description: 'Language codes for content',
},
},
description: 'Location settings for search',
},
scrapeOptions: {
type: 'object',
properties: {
formats: {
type: 'array',
items: {
type: 'string',
enum: ['markdown', 'html', 'rawHtml'],
},
description: 'Content formats to extract from search results',
},
onlyMainContent: {
type: 'boolean',
description: 'Extract only the main content from results',
},
waitFor: {
type: 'number',
description: 'Time in milliseconds to wait for dynamic content',
},
},
description: 'Options for scraping search results',
},
},
required: ['query'],
},
};
const EXTRACT_TOOL: Tool = {
name: 'firecrawl_extract',
description:
'Extract structured information from web pages using LLM. ' +
'Supports both cloud AI and self-hosted LLM extraction.',
inputSchema: {
type: 'object',
properties: {
urls: {
type: 'array',
items: { type: 'string' },
description: 'List of URLs to extract information from',
},
prompt: {
type: 'string',
description: 'Prompt for the LLM extraction',
},
systemPrompt: {
type: 'string',
description: 'System prompt for LLM extraction',
},
schema: {
type: 'object',
description: 'JSON schema for structured data extraction',
},
allowExternalLinks: {
type: 'boolean',
description: 'Allow extraction from external links',
},
enableWebSearch: {
type: 'boolean',
description: 'Enable web search for additional context',
},
includeSubdomains: {
type: 'boolean',
description: 'Include subdomains in extraction',
},
},
required: ['urls'],
},
};
const DEEP_RESEARCH_TOOL: Tool = {
name: 'firecrawl_deep_research',
description:
'Conduct deep research on a query using web crawling, search, and AI analysis.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'The query to research',
},
maxDepth: {
type: 'number',
description: 'Maximum depth of research iterations (1-10)',
},
timeLimit: {
type: 'number',
description: 'Time limit in seconds (30-300)',
},
maxUrls: {
type: 'number',
description: 'Maximum number of URLs to analyze (1-1000)',
},
},
required: ['query'],
},
};
const GENERATE_LLMSTXT_TOOL: Tool = {
name: 'firecrawl_generate_llmstxt',
description:
'Generate standardized LLMs.txt file for a given URL, which provides context about how LLMs should interact with the website.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL to generate LLMs.txt from',
},
maxUrls: {
type: 'number',
description: 'Maximum number of URLs to process (1-100, default: 10)',
},
showFullText: {
type: 'boolean',
description: 'Whether to show the full LLMs-full.txt in the response',
},
},
required: ['url'],
},
};
// Type definitions
interface BatchScrapeOptions {
urls: string[];
options?: Omit<ScrapeParams, 'url'>;
}
/**
* Parameters for LLMs.txt generation operations.
*/
interface GenerateLLMsTextParams {
/**
* Maximum number of URLs to process (1-100)
* @default 10
*/
maxUrls?: number;
/**
* Whether to show the full LLMs-full.txt in the response
* @default false
*/
showFullText?: boolean;
/**
* Experimental flag for streaming
*/
__experimental_stream?: boolean;
}
/**
* Response interface for LLMs.txt generation operations.
*/
// interface GenerateLLMsTextResponse {
// success: boolean;
// id: string;
// }
/**
* Status response interface for LLMs.txt generation operations.
*/
// interface GenerateLLMsTextStatusResponse {
// success: boolean;
// data: {
// llmstxt: string;
// llmsfulltxt?: string;
// };
// status: 'processing' | 'completed' | 'failed';
// error?: string;
// expiresAt: string;
// }
interface StatusCheckOptions {
id: string;
}
interface SearchOptions {
query: string;
limit?: number;
lang?: string;
country?: string;
tbs?: string;
filter?: string;
location?: {
country?: string;
languages?: string[];
};
scrapeOptions?: {
formats?: string[];
onlyMainContent?: boolean;
waitFor?: number;
};
}
// Add after other interfaces
interface ExtractParams<T = any> {
prompt?: string;
systemPrompt?: string;
schema?: T | object;
allowExternalLinks?: boolean;
enableWebSearch?: boolean;
includeSubdomains?: boolean;
origin?: string;
}
interface ExtractArgs {
urls: string[];
prompt?: string;
systemPrompt?: string;
schema?: object;
allowExternalLinks?: boolean;
enableWebSearch?: boolean;
includeSubdomains?: boolean;
origin?: string;
}
interface ExtractResponse<T = any> {
success: boolean;
data: T;
error?: string;
warning?: string;
creditsUsed?: number;
}
// Type guards
function isScrapeOptions(
args: unknown
): args is ScrapeParams & { url: string } {
return (
typeof args === 'object' &&
args !== null &&
'url' in args &&
typeof (args as { url: unknown }).url === 'string'
);
}
function isMapOptions(args: unknown): args is MapParams & { url: string } {
return (
typeof args === 'object' &&
args !== null &&
'url' in args &&
typeof (args as { url: unknown }).url === 'string'
);
}
function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } {
return (
typeof args === 'object' &&
args !== null &&
'url' in args &&
typeof (args as { url: unknown }).url === 'string'
);
}
function isBatchScrapeOptions(args: unknown): args is BatchScrapeOptions {
return (
typeof args === 'object' &&
args !== null &&
'urls' in args &&
Array.isArray((args as { urls: unknown }).urls) &&
(args as { urls: unknown[] }).urls.every((url) => typeof url === 'string')
);
}
function isStatusCheckOptions(args: unknown): args is StatusCheckOptions {
return (
typeof args === 'object' &&
args !== null &&
'id' in args &&
typeof (args as { id: unknown }).id === 'string'
);
}
function isSearchOptions(args: unknown): args is SearchOptions {
return (
typeof args === 'object' &&
args !== null &&
'query' in args &&
typeof (args as { query: unknown }).query === 'string'
);
}
function isExtractOptions(args: unknown): args is ExtractArgs {
if (typeof args !== 'object' || args === null) return false;
const { urls } = args as { urls?: unknown };
return (
Array.isArray(urls) &&
urls.every((url): url is string => typeof url === 'string')
);
}
function isGenerateLLMsTextOptions(
args: unknown
): args is { url: string } & Partial<GenerateLLMsTextParams> {
return (
typeof args === 'object' &&
args !== null &&
'url' in args &&
typeof (args as { url: unknown }).url === 'string'
);
}
// Server implementation
const server = new Server(
{
name: 'firecrawl-mcp',
version: '1.7.0',
},
{
capabilities: {
tools: {},
logging: {},
},
}
);
// Get optional API URL
const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
// Check if API key is required (only for cloud service)
if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
console.error(
'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service'
);
process.exit(1);
}
// Initialize FireCrawl client with optional API URL
const client = new FirecrawlApp({
apiKey: FIRECRAWL_API_KEY || '',
...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
});
// Configuration for retries and monitoring
const CONFIG = {
retry: {
maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
},
credit: {
warningThreshold:
Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
criticalThreshold:
Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
},
};
// Add credit tracking
interface CreditUsage {
total: number;
lastCheck: number;
}
const creditUsage: CreditUsage = {
total: 0,
lastCheck: Date.now(),
};
// Add utility function for delay
function delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
let isStdioTransport = false;
function safeLog(
level:
| 'error'
| 'debug'
| 'info'
| 'notice'
| 'warning'
| 'critical'
| 'alert'
| 'emergency',
data: any
): void {
if (isStdioTransport) {
// For stdio transport, log to stderr to avoid protocol interference
console.error(
`[${level}] ${typeof data === 'object' ? JSON.stringify(data) : data}`
);
} else {
// For other transport types, use the normal logging mechanism
server.sendLoggingMessage({ level, data });
}
}
// Add retry logic with exponential backoff
async function withRetry<T>(
operation: () => Promise<T>,
context: string,
attempt = 1
): Promise<T> {
try {
return await operation();
} catch (error) {
const isRateLimit =
error instanceof Error &&
(error.message.includes('rate limit') || error.message.includes('429'));
if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
const delayMs = Math.min(
CONFIG.retry.initialDelay *
Math.pow(CONFIG.retry.backoffFactor, attempt - 1),
CONFIG.retry.maxDelay
);
safeLog(
'warning',
`Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`
);
await delay(delayMs);
return withRetry(operation, context, attempt + 1);
}
throw error;
}
}
// Add credit monitoring
async function updateCreditUsage(creditsUsed: number): Promise<void> {
creditUsage.total += creditsUsed;
// Log credit usage
safeLog('info', `Credit usage: ${creditUsage.total} credits used total`);
// Check thresholds
if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
safeLog('error', `CRITICAL: Credit usage has reached ${creditUsage.total}`);
} else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
safeLog(
'warning',
`WARNING: Credit usage has reached ${creditUsage.total}`
);
}
}
// Add before server implementation
interface QueuedBatchOperation {
id: string;
urls: string[];
options?: any;
status: 'pending' | 'processing' | 'completed' | 'failed';
progress: {
completed: number;
total: number;
};
result?: any;
error?: string;
}
// Initialize queue system
const batchQueue = new PQueue({ concurrency: 1 });
const batchOperations = new Map<string, QueuedBatchOperation>();
let operationCounter = 0;
async function processBatchOperation(
operation: QueuedBatchOperation
): Promise<void> {
try {
operation.status = 'processing';
let totalCreditsUsed = 0;
// Use library's built-in batch processing
const response = await withRetry(
async () =>
client.asyncBatchScrapeUrls(operation.urls, operation.options),
`batch ${operation.id} processing`
);
if (!response.success) {
throw new Error(response.error || 'Batch operation failed');
}
// Track credits if using cloud API
if (!FIRECRAWL_API_URL && hasCredits(response)) {
totalCreditsUsed += response.creditsUsed;
await updateCreditUsage(response.creditsUsed);
}
operation.status = 'completed';
operation.result = response;
// Log final credit usage for the batch
if (!FIRECRAWL_API_URL) {
safeLog(
'info',
`Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`
);
}
} catch (error) {
operation.status = 'failed';
operation.error = error instanceof Error ? error.message : String(error);
safeLog('error', `Batch ${operation.id} failed: ${operation.error}`);
}
}
// Tool handlers
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [
SCRAPE_TOOL,
MAP_TOOL,
CRAWL_TOOL,
BATCH_SCRAPE_TOOL,
CHECK_BATCH_STATUS_TOOL,
CHECK_CRAWL_STATUS_TOOL,
SEARCH_TOOL,
EXTRACT_TOOL,
DEEP_RESEARCH_TOOL,
GENERATE_LLMSTXT_TOOL,
],
}));
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const startTime = Date.now();
try {
const { name, arguments: args } = request.params;
// Log incoming request with timestamp
safeLog(
'info',
`[${new Date().toISOString()}] Received request for tool: ${name}`
);
if (!args) {
throw new Error('No arguments provided');
}
switch (name) {
case 'firecrawl_scrape': {
if (!isScrapeOptions(args)) {
throw new Error('Invalid arguments for firecrawl_scrape');
}
const { url, ...options } = args;
try {
const scrapeStartTime = Date.now();
safeLog(
'info',
`Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`
);
//@ts-ignore
const response = await client.scrapeUrl(url, { ...options, origin: 'mcp-server' });
// Log performance metrics
safeLog(
'info',
`Scrape completed in ${Date.now() - scrapeStartTime}ms`
);
if ('success' in response && !response.success) {
throw new Error(response.error || 'Scraping failed');
}
// Format content based on requested formats
const contentParts = [];
if (options.formats?.includes('markdown') && response.markdown) {
contentParts.push(response.markdown);
}
if (options.formats?.includes('html') && response.html) {
contentParts.push(response.html);
}
if (options.formats?.includes('rawHtml') && response.rawHtml) {
contentParts.push(response.rawHtml);
}
if (options.formats?.includes('links') && response.links) {
contentParts.push(response.links.join('\n'));
}
if (options.formats?.includes('screenshot') && response.screenshot) {
contentParts.push(response.screenshot);
}
if (options.formats?.includes('extract') && response.extract) {
contentParts.push(JSON.stringify(response.extract, null, 2));
}
// If options.formats is empty, default to markdown
if (!options.formats || options.formats.length === 0) {
options.formats = ['markdown'];
}
// Add warning to response if present
if (response.warning) {
safeLog('warning', response.warning);
}
return {
content: [
{
type: 'text',
text: trimResponseText(
contentParts.join('\n\n') || 'No content available'
),
},
],
isError: false,
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
return {
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
isError: true,
};
}
}
case 'firecrawl_map': {
if (!isMapOptions(args)) {
throw new Error('Invalid arguments for firecrawl_map');
}
const { url, ...options } = args;
//@ts-ignore
const response = await client.mapUrl(url, { ...options, origin: 'mcp-server' });
if ('error' in response) {
throw new Error(response.error);
}
if (!response.links) {
throw new Error('No links received from FireCrawl API');
}
return {
content: [
{ type: 'text', text: trimResponseText(response.links.join('\n')) },
],
isError: false,
};
}
case 'firecrawl_batch_scrape': {
if (!isBatchScrapeOptions(args)) {
throw new Error('Invalid arguments for firecrawl_batch_scrape');
}
try {
const operationId = `batch_${++operationCounter}`;
const operation: QueuedBatchOperation = {
id: operationId,
urls: args.urls,
options: args.options,
status: 'pending',
progress: {
completed: 0,
total: args.urls.length,
},
};
batchOperations.set(operationId, operation);
// Queue the operation
batchQueue.add(() => processBatchOperation(operation));
safeLog(
'info',
`Queued batch operation ${operationId} with ${args.urls.length} URLs`
);
return {
content: [
{
type: 'text',
text: trimResponseText(
`Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`
),
},
],
isError: false,
};
} catch (error) {
const errorMessage =
error instanceof Error
? error.message
: `Batch operation failed: ${JSON.stringify(error)}`;
return {
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
isError: true,
};
}
}
case 'firecrawl_check_batch_status': {
if (!isStatusCheckOptions(args)) {
throw new Error('Invalid arguments for firecrawl_check_batch_status');
}
const operation = batchOperations.get(args.id);
if (!operation) {
return {
content: [
{
type: 'text',
text: trimResponseText(
`No batch operation found with ID: ${args.id}`
),
},
],
isError: true,
};
}
const status = `Batch Status:
Status: ${operation.status}
Progress: ${operation.progress.completed}/${operation.progress.total}
${operation.error ? `Error: ${operation.error}` : ''}
${
operation.result
? `Results: ${JSON.stringify(operation.result, null, 2)}`
: ''
}`;
return {
content: [{ type: 'text', text: trimResponseText(status) }],
isError: false,
};
}
case 'firecrawl_crawl': {
if (!isCrawlOptions(args)) {
throw new Error('Invalid arguments for firecrawl_crawl');
}
const { url, ...options } = args;
const response = await withRetry(
//@ts-ignore
async () => client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }),
'crawl operation'
);
if (!response.success) {
throw new Error(response.error);
}
// Monitor credits for cloud API
if (!FIRECRAWL_API_URL && hasCredits(response)) {
await updateCreditUsage(response.creditsUsed);
}
return {
content: [
{
type: 'text',
text: trimResponseText(
`Started crawl for ${url} with job ID: ${response.id}`
),
},
],
isError: false,
};
}
case 'firecrawl_check_crawl_status': {
if (!isStatusCheckOptions(args)) {
throw new Error('Invalid arguments for firecrawl_check_crawl_status');
}
const response = await client.checkCrawlStatus(args.id);
if (!response.success) {
throw new Error(response.error);
}
const status = `Crawl Status:
Status: ${response.status}
Progress: ${response.completed}/${response.total}
Credits Used: ${response.creditsUsed}
Expires At: ${response.expiresAt}
${
response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''
}`;
return {
content: [{ type: 'text', text: trimResponseText(status) }],
isError: false,
};
}
case 'firecrawl_search': {
if (!isSearchOptions(args)) {
throw new Error('Invalid arguments for firecrawl_search');
}
try {
const response = await withRetry(
async () => client.search(args.query, { ...args, origin: 'mcp-server' }),
'search operation'
);
if (!response.success) {
throw new Error(
`Search failed: ${response.error || 'Unknown error'}`
);
}
// Monitor credits for cloud API
if (!FIRECRAWL_API_URL && hasCredits(response)) {
await updateCreditUsage(response.creditsUsed);
}
// Format the results
const results = response.data
.map(
(result) =>
`URL: ${result.url}
Title: ${result.title || 'No title'}
Description: ${result.description || 'No description'}
${result.markdown ? `\nContent:\n${result.markdown}` : ''}`
)
.join('\n\n');
return {
content: [{ type: 'text', text: trimResponseText(results) }],
isError: false,
};
} catch (error) {
const errorMessage =
error instanceof Error
? error.message
: `Search failed: ${JSON.stringify(error)}`;
return {
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
isError: true,
};
}
}
case 'firecrawl_extract': {
if (!isExtractOptions(args)) {
throw new Error('Invalid arguments for firecrawl_extract');
}
try {
const extractStartTime = Date.now();
safeLog(
'info',
`Starting extraction for URLs: ${args.urls.join(', ')}`
);
// Log if using self-hosted instance
if (FIRECRAWL_API_URL) {
safeLog('info', 'Using self-hosted instance for extraction');
}
const extractResponse = await withRetry(
async () =>
client.extract(args.urls, {
prompt: args.prompt,
systemPrompt: args.systemPrompt,
schema: args.schema,
allowExternalLinks: args.allowExternalLinks,
enableWebSearch: args.enableWebSearch,
includeSubdomains: args.includeSubdomains,
origin: 'mcp-server',
} as ExtractParams),
'extract operation'
);
// Type guard for successful response
if (!('success' in extractResponse) || !extractResponse.success) {
throw new Error(extractResponse.error || 'Extraction failed');
}
const response = extractResponse as ExtractResponse;
// Monitor credits for cloud API
if (!FIRECRAWL_API_URL && hasCredits(response)) {
await updateCreditUsage(response.creditsUsed || 0);
}
// Log performance metrics
safeLog(
'info',
`Extraction completed in ${Date.now() - extractStartTime}ms`
);
// Add warning to response if present
const result = {
content: [
{
type: 'text',
text: trimResponseText(JSON.stringify(response.data, null, 2)),
},
],
isError: false,
};
if (response.warning) {
safeLog('warning', response.warning);
}
return result;
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
// Special handling for self-hosted instance errors
if (
FIRECRAWL_API_URL &&
errorMessage.toLowerCase().includes('not supported')
) {
safeLog(
'error',
'Extraction is not supported by this self-hosted instance'
);
return {
content: [
{
type: 'text',
text: trimResponseText(
'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.'
),
},
],
isError: true,
};
}
return {
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
isError: true,
};
}
}
case 'firecrawl_deep_research': {
if (!args || typeof args !== 'object' || !('query' in args)) {
throw new Error('Invalid arguments for firecrawl_deep_research');
}
try {
const researchStartTime = Date.now();
safeLog('info', `Starting deep research for query: ${args.query}`);
const response = await client.deepResearch(
args.query as string,
{
maxDepth: args.maxDepth as number,
timeLimit: args.timeLimit as number,
maxUrls: args.maxUrls as number,
//@ts-ignore
origin: 'mcp-server',
},
// Activity callback
(activity) => {
safeLog(
'info',
`Research activity: ${activity.message} (Depth: ${activity.depth})`
);
},
// Source callback
(source) => {
safeLog(
'info',
`Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`
);
}
);
// Log performance metrics
safeLog(
'info',
`Deep research completed in ${Date.now() - researchStartTime}ms`
);
if (!response.success) {
throw new Error(response.error || 'Deep research failed');
}
// Format the results
const formattedResponse = {
finalAnalysis: response.data.finalAnalysis,
activities: response.data.activities,
sources: response.data.sources,
};
return {
content: [
{
type: 'text',
text: trimResponseText(formattedResponse.finalAnalysis),
},
],
isError: false,
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
return {
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
isError: true,
};
}
}
case 'firecrawl_generate_llmstxt': {
if (!isGenerateLLMsTextOptions(args)) {
throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
}
try {
const { url, ...params } = args;
const generateStartTime = Date.now();
safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
// Start the generation process
const response = await withRetry(
//@ts-ignore
async () => client.generateLLMsText(url, { ...params, origin: 'mcp-server' }),
'LLMs.txt generation'
);
if (!response.success) {
throw new Error(response.error || 'LLMs.txt generation failed');
}
// Log performance metrics
safeLog(
'info',
`LLMs.txt generation completed in ${Date.now() - generateStartTime}ms`
);
// Format the response
let resultText = '';
if ('data' in response) {
resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`;
if (args.showFullText && response.data.llmsfulltxt) {
resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`;
}
}
return {
content: [{ type: 'text', text: trimResponseText(resultText) }],
isError: false,
};
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
return {
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
isError: true,
};
}
}
default:
return {
content: [
{ type: 'text', text: trimResponseText(`Unknown tool: ${name}`) },
],
isError: true,
};
}
} catch (error) {
// Log detailed error information
safeLog('error', {
message: `Request failed: ${
error instanceof Error ? error.message : String(error)
}`,
tool: request.params.name,
arguments: request.params.arguments,
timestamp: new Date().toISOString(),
duration: Date.now() - startTime,
});
return {
content: [
{
type: 'text',
text: trimResponseText(
`Error: ${error instanceof Error ? error.message : String(error)}`
),
},
],
isError: true,
};
} finally {
// Log request completion with performance metrics
safeLog('info', `Request completed in ${Date.now() - startTime}ms`);
}
});
// Helper function to format results
function formatResults(data: FirecrawlDocument[]): string {
return data
.map((doc) => {
const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
return `URL: ${doc.url || 'Unknown URL'}
Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
})
.join('\n\n');
}
// Add type guard for credit usage
function hasCredits(response: any): response is { creditsUsed: number } {
return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
}
// Utility function to trim trailing whitespace from text responses
// This prevents Claude API errors with "final assistant content cannot end with trailing whitespace"
function trimResponseText(text: string): string {
return text.trim();
}
// Server startup
async function runServer() {
try {
console.error('Initializing FireCrawl MCP Server...');
const transport = new StdioServerTransport();
// Detect if we're using stdio transport
isStdioTransport = transport instanceof StdioServerTransport;
if (isStdioTransport) {
console.error(
'Running in stdio mode, logging will be directed to stderr'
);
}
await server.connect(transport);
// Now that we're connected, we can send logging messages
safeLog('info', 'FireCrawl MCP Server initialized successfully');
safeLog(
'info',
`Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`
);
console.error('FireCrawl MCP Server running on stdio');
} catch (error) {
console.error('Fatal error running server:', error);
process.exit(1);
}
}
runServer().catch((error: any) => {
console.error('Fatal error running server:', error);
process.exit(1);
});