Firecrawl MCP Server

Verified
MIT License
9,757
Overview InspectNew Schema Related Servers Reviews Score
firecrawl-mcp-server
src
#!/usr/bin/env node

import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import {
  Tool,
  CallToolRequestSchema,
  ListToolsRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';
import FirecrawlApp, {
  type ScrapeParams,
  type MapParams,
  type CrawlParams,
  type FirecrawlDocument,
} from '@mendable/firecrawl-js';
import PQueue from 'p-queue';

import dotenv from 'dotenv';

dotenv.config();

// Tool definitions
const SCRAPE_TOOL: Tool = {
  name: 'firecrawl_scrape',
  description:
    'Scrape a single webpage with advanced options for content extraction. ' +
    'Supports various formats including markdown, HTML, and screenshots. ' +
    'Can execute custom actions like clicking or scrolling before scraping.',
  inputSchema: {
    type: 'object',
    properties: {
      url: {
        type: 'string',
        description: 'The URL to scrape',
      },
      formats: {
        type: 'array',
        items: {
          type: 'string',
          enum: [
            'markdown',
            'html',
            'rawHtml',
            'screenshot',
            'links',
            'screenshot@fullPage',
            'extract',
          ],
        },
        description: "Content formats to extract (default: ['markdown'])",
      },
      onlyMainContent: {
        type: 'boolean',
        description:
          'Extract only the main content, filtering out navigation, footers, etc.',
      },
      includeTags: {
        type: 'array',
        items: { type: 'string' },
        description: 'HTML tags to specifically include in extraction',
      },
      excludeTags: {
        type: 'array',
        items: { type: 'string' },
        description: 'HTML tags to exclude from extraction',
      },
      waitFor: {
        type: 'number',
        description: 'Time in milliseconds to wait for dynamic content to load',
      },
      timeout: {
        type: 'number',
        description:
          'Maximum time in milliseconds to wait for the page to load',
      },
      actions: {
        type: 'array',
        items: {
          type: 'object',
          properties: {
            type: {
              type: 'string',
              enum: [
                'wait',
                'click',
                'screenshot',
                'write',
                'press',
                'scroll',
                'scrape',
                'executeJavascript',
              ],
              description: 'Type of action to perform',
            },
            selector: {
              type: 'string',
              description: 'CSS selector for the target element',
            },
            milliseconds: {
              type: 'number',
              description: 'Time to wait in milliseconds (for wait action)',
            },
            text: {
              type: 'string',
              description: 'Text to write (for write action)',
            },
            key: {
              type: 'string',
              description: 'Key to press (for press action)',
            },
            direction: {
              type: 'string',
              enum: ['up', 'down'],
              description: 'Scroll direction',
            },
            script: {
              type: 'string',
              description: 'JavaScript code to execute',
            },
            fullPage: {
              type: 'boolean',
              description: 'Take full page screenshot',
            },
          },
          required: ['type'],
        },
        description: 'List of actions to perform before scraping',
      },
      extract: {
        type: 'object',
        properties: {
          schema: {
            type: 'object',
            description: 'Schema for structured data extraction',
          },
          systemPrompt: {
            type: 'string',
            description: 'System prompt for LLM extraction',
          },
          prompt: {
            type: 'string',
            description: 'User prompt for LLM extraction',
          },
        },
        description: 'Configuration for structured data extraction',
      },
      mobile: {
        type: 'boolean',
        description: 'Use mobile viewport',
      },
      skipTlsVerification: {
        type: 'boolean',
        description: 'Skip TLS certificate verification',
      },
      removeBase64Images: {
        type: 'boolean',
        description: 'Remove base64 encoded images from output',
      },
      location: {
        type: 'object',
        properties: {
          country: {
            type: 'string',
            description: 'Country code for geolocation',
          },
          languages: {
            type: 'array',
            items: { type: 'string' },
            description: 'Language codes for content',
          },
        },
        description: 'Location settings for scraping',
      },
    },
    required: ['url'],
  },
};

const MAP_TOOL: Tool = {
  name: 'firecrawl_map',
  description:
    'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
  inputSchema: {
    type: 'object',
    properties: {
      url: {
        type: 'string',
        description: 'Starting URL for URL discovery',
      },
      search: {
        type: 'string',
        description: 'Optional search term to filter URLs',
      },
      ignoreSitemap: {
        type: 'boolean',
        description: 'Skip sitemap.xml discovery and only use HTML links',
      },
      sitemapOnly: {
        type: 'boolean',
        description: 'Only use sitemap.xml for discovery, ignore HTML links',
      },
      includeSubdomains: {
        type: 'boolean',
        description: 'Include URLs from subdomains in results',
      },
      limit: {
        type: 'number',
        description: 'Maximum number of URLs to return',
      },
    },
    required: ['url'],
  },
};

const CRAWL_TOOL: Tool = {
  name: 'firecrawl_crawl',
  description:
    'Start an asynchronous crawl of multiple pages from a starting URL. ' +
    'Supports depth control, path filtering, and webhook notifications.',
  inputSchema: {
    type: 'object',
    properties: {
      url: {
        type: 'string',
        description: 'Starting URL for the crawl',
      },
      excludePaths: {
        type: 'array',
        items: { type: 'string' },
        description: 'URL paths to exclude from crawling',
      },
      includePaths: {
        type: 'array',
        items: { type: 'string' },
        description: 'Only crawl these URL paths',
      },
      maxDepth: {
        type: 'number',
        description: 'Maximum link depth to crawl',
      },
      ignoreSitemap: {
        type: 'boolean',
        description: 'Skip sitemap.xml discovery',
      },
      limit: {
        type: 'number',
        description: 'Maximum number of pages to crawl',
      },
      allowBackwardLinks: {
        type: 'boolean',
        description: 'Allow crawling links that point to parent directories',
      },
      allowExternalLinks: {
        type: 'boolean',
        description: 'Allow crawling links to external domains',
      },
      webhook: {
        oneOf: [
          {
            type: 'string',
            description: 'Webhook URL to notify when crawl is complete',
          },
          {
            type: 'object',
            properties: {
              url: {
                type: 'string',
                description: 'Webhook URL',
              },
              headers: {
                type: 'object',
                description: 'Custom headers for webhook requests',
              },
            },
            required: ['url'],
          },
        ],
      },
      deduplicateSimilarURLs: {
        type: 'boolean',
        description: 'Remove similar URLs during crawl',
      },
      ignoreQueryParameters: {
        type: 'boolean',
        description: 'Ignore query parameters when comparing URLs',
      },
      scrapeOptions: {
        type: 'object',
        properties: {
          formats: {
            type: 'array',
            items: {
              type: 'string',
              enum: [
                'markdown',
                'html',
                'rawHtml',
                'screenshot',
                'links',
                'screenshot@fullPage',
                'extract',
              ],
            },
          },
          onlyMainContent: {
            type: 'boolean',
          },
          includeTags: {
            type: 'array',
            items: { type: 'string' },
          },
          excludeTags: {
            type: 'array',
            items: { type: 'string' },
          },
          waitFor: {
            type: 'number',
          },
        },
        description: 'Options for scraping each page',
      },
    },
    required: ['url'],
  },
};

const BATCH_SCRAPE_TOOL: Tool = {
  name: 'firecrawl_batch_scrape',
  description:
    'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
  inputSchema: {
    type: 'object',
    properties: {
      urls: {
        type: 'array',
        items: { type: 'string' },
        description: 'List of URLs to scrape',
      },
      options: {
        type: 'object',
        properties: {
          formats: {
            type: 'array',
            items: {
              type: 'string',
              enum: [
                'markdown',
                'html',
                'rawHtml',
                'screenshot',
                'links',
                'screenshot@fullPage',
                'extract',
              ],
            },
          },
          onlyMainContent: {
            type: 'boolean',
          },
          includeTags: {
            type: 'array',
            items: { type: 'string' },
          },
          excludeTags: {
            type: 'array',
            items: { type: 'string' },
          },
          waitFor: {
            type: 'number',
          },
        },
      },
    },
    required: ['urls'],
  },
};

const CHECK_BATCH_STATUS_TOOL: Tool = {
  name: 'firecrawl_check_batch_status',
  description: 'Check the status of a batch scraping job.',
  inputSchema: {
    type: 'object',
    properties: {
      id: {
        type: 'string',
        description: 'Batch job ID to check',
      },
    },
    required: ['id'],
  },
};

const CHECK_CRAWL_STATUS_TOOL: Tool = {
  name: 'firecrawl_check_crawl_status',
  description: 'Check the status of a crawl job.',
  inputSchema: {
    type: 'object',
    properties: {
      id: {
        type: 'string',
        description: 'Crawl job ID to check',
      },
    },
    required: ['id'],
  },
};

const SEARCH_TOOL: Tool = {
  name: 'firecrawl_search',
  description:
    'Search and retrieve content from web pages with optional scraping. ' +
    'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.',
  inputSchema: {
    type: 'object',
    properties: {
      query: {
        type: 'string',
        description: 'Search query string',
      },
      limit: {
        type: 'number',
        description: 'Maximum number of results to return (default: 5)',
      },
      lang: {
        type: 'string',
        description: 'Language code for search results (default: en)',
      },
      country: {
        type: 'string',
        description: 'Country code for search results (default: us)',
      },
      tbs: {
        type: 'string',
        description: 'Time-based search filter',
      },
      filter: {
        type: 'string',
        description: 'Search filter',
      },
      location: {
        type: 'object',
        properties: {
          country: {
            type: 'string',
            description: 'Country code for geolocation',
          },
          languages: {
            type: 'array',
            items: { type: 'string' },
            description: 'Language codes for content',
          },
        },
        description: 'Location settings for search',
      },
      scrapeOptions: {
        type: 'object',
        properties: {
          formats: {
            type: 'array',
            items: {
              type: 'string',
              enum: ['markdown', 'html', 'rawHtml'],
            },
            description: 'Content formats to extract from search results',
          },
          onlyMainContent: {
            type: 'boolean',
            description: 'Extract only the main content from results',
          },
          waitFor: {
            type: 'number',
            description: 'Time in milliseconds to wait for dynamic content',
          },
        },
        description: 'Options for scraping search results',
      },
    },
    required: ['query'],
  },
};

const EXTRACT_TOOL: Tool = {
  name: 'firecrawl_extract',
  description:
    'Extract structured information from web pages using LLM. ' +
    'Supports both cloud AI and self-hosted LLM extraction.',
  inputSchema: {
    type: 'object',
    properties: {
      urls: {
        type: 'array',
        items: { type: 'string' },
        description: 'List of URLs to extract information from',
      },
      prompt: {
        type: 'string',
        description: 'Prompt for the LLM extraction',
      },
      systemPrompt: {
        type: 'string',
        description: 'System prompt for LLM extraction',
      },
      schema: {
        type: 'object',
        description: 'JSON schema for structured data extraction',
      },
      allowExternalLinks: {
        type: 'boolean',
        description: 'Allow extraction from external links',
      },
      enableWebSearch: {
        type: 'boolean',
        description: 'Enable web search for additional context',
      },
      includeSubdomains: {
        type: 'boolean',
        description: 'Include subdomains in extraction',
      },
    },
    required: ['urls'],
  },
};

const DEEP_RESEARCH_TOOL: Tool = {
  name: 'firecrawl_deep_research',
  description: 'Conduct deep research on a query using web crawling, search, and AI analysis.',
  inputSchema: {
    type: 'object',
    properties: {
      query: {
        type: 'string',
        description: 'The query to research',
      },
      maxDepth: {
        type: 'number',
        description: 'Maximum depth of research iterations (1-10)',
      },
      timeLimit: {
        type: 'number',
        description: 'Time limit in seconds (30-300)',
      },
      maxUrls: {
        type: 'number',
        description: 'Maximum number of URLs to analyze (1-1000)',
      }
    },
    required: ['query'],
  },
};

// Type definitions
interface BatchScrapeOptions {
  urls: string[];
  options?: Omit<ScrapeParams, 'url'>;
}

interface StatusCheckOptions {
  id: string;
}

interface SearchOptions {
  query: string;
  limit?: number;
  lang?: string;
  country?: string;
  tbs?: string;
  filter?: string;
  location?: {
    country?: string;
    languages?: string[];
  };
  scrapeOptions?: {
    formats?: string[];
    onlyMainContent?: boolean;
    waitFor?: number;
  };
}

// Add after other interfaces
interface ExtractParams<T = any> {
  prompt?: string;
  systemPrompt?: string;
  schema?: T | object;
  allowExternalLinks?: boolean;
  enableWebSearch?: boolean;
  includeSubdomains?: boolean;
  origin?: string;
}

interface ExtractArgs {
  urls: string[];
  prompt?: string;
  systemPrompt?: string;
  schema?: object;
  allowExternalLinks?: boolean;
  enableWebSearch?: boolean;
  includeSubdomains?: boolean;
  origin?: string;
}

interface ExtractResponse<T = any> {
  success: boolean;
  data: T;
  error?: string;
  warning?: string;
  creditsUsed?: number;
}

// Type guards
function isScrapeOptions(
  args: unknown
): args is ScrapeParams & { url: string } {
  return (
    typeof args === 'object' &&
    args !== null &&
    'url' in args &&
    typeof (args as { url: unknown }).url === 'string'
  );
}

function isMapOptions(args: unknown): args is MapParams & { url: string } {
  return (
    typeof args === 'object' &&
    args !== null &&
    'url' in args &&
    typeof (args as { url: unknown }).url === 'string'
  );
}

function isCrawlOptions(args: unknown): args is CrawlParams & { url: string } {
  return (
    typeof args === 'object' &&
    args !== null &&
    'url' in args &&
    typeof (args as { url: unknown }).url === 'string'
  );
}

function isBatchScrapeOptions(args: unknown): args is BatchScrapeOptions {
  return (
    typeof args === 'object' &&
    args !== null &&
    'urls' in args &&
    Array.isArray((args as { urls: unknown }).urls) &&
    (args as { urls: unknown[] }).urls.every((url) => typeof url === 'string')
  );
}

function isStatusCheckOptions(args: unknown): args is StatusCheckOptions {
  return (
    typeof args === 'object' &&
    args !== null &&
    'id' in args &&
    typeof (args as { id: unknown }).id === 'string'
  );
}

function isSearchOptions(args: unknown): args is SearchOptions {
  return (
    typeof args === 'object' &&
    args !== null &&
    'query' in args &&
    typeof (args as { query: unknown }).query === 'string'
  );
}

function isExtractOptions(args: unknown): args is ExtractArgs {
  if (typeof args !== 'object' || args === null) return false;
  const { urls } = args as { urls?: unknown };
  return (
    Array.isArray(urls) &&
    urls.every((url): url is string => typeof url === 'string')
  );
}

// Server implementation
const server = new Server(
  {
    name: 'firecrawl-mcp',
    version: '1.3.2',
  },
  {
    capabilities: {
      tools: {},
      logging: {},
    },
  }
);

// Get optional API URL
const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;

// Check if API key is required (only for cloud service)
if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
  console.error(
    'Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service'
  );
  process.exit(1);
}

// Initialize FireCrawl client with optional API URL
const client = new FirecrawlApp({
  apiKey: FIRECRAWL_API_KEY || '',
  ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
});

// Configuration for retries and monitoring
const CONFIG = {
  retry: {
    maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
    initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
    maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
    backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
  },
  credit: {
    warningThreshold:
      Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
    criticalThreshold:
      Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
  },
};

// Add credit tracking
interface CreditUsage {
  total: number;
  lastCheck: number;
}

const creditUsage: CreditUsage = {
  total: 0,
  lastCheck: Date.now(),
};

// Add utility function for delay
function delay(ms: number): Promise<void> {
  return new Promise((resolve) => setTimeout(resolve, ms));
}

// Add retry logic with exponential backoff
async function withRetry<T>(
  operation: () => Promise<T>,
  context: string,
  attempt = 1
): Promise<T> {
  try {
    return await operation();
  } catch (error) {
    const isRateLimit =
      error instanceof Error &&
      (error.message.includes('rate limit') || error.message.includes('429'));

    if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
      const delayMs = Math.min(
        CONFIG.retry.initialDelay *
          Math.pow(CONFIG.retry.backoffFactor, attempt - 1),
        CONFIG.retry.maxDelay
      );

      server.sendLoggingMessage({
        level: 'warning',
        data: `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`,
      });

      await delay(delayMs);
      return withRetry(operation, context, attempt + 1);
    }

    throw error;
  }
}

// Add credit monitoring
async function updateCreditUsage(creditsUsed: number): Promise<void> {
  creditUsage.total += creditsUsed;

  // Log credit usage
  server.sendLoggingMessage({
    level: 'info',
    data: `Credit usage: ${creditUsage.total} credits used total`,
  });

  // Check thresholds
  if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
    server.sendLoggingMessage({
      level: 'error',
      data: `CRITICAL: Credit usage has reached ${creditUsage.total}`,
    });
  } else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
    server.sendLoggingMessage({
      level: 'warning',
      data: `WARNING: Credit usage has reached ${creditUsage.total}`,
    });
  }
}

// Add before server implementation
interface QueuedBatchOperation {
  id: string;
  urls: string[];
  options?: any;
  status: 'pending' | 'processing' | 'completed' | 'failed';
  progress: {
    completed: number;
    total: number;
  };
  result?: any;
  error?: string;
}

// Initialize queue system
const batchQueue = new PQueue({ concurrency: 1 });
const batchOperations = new Map<string, QueuedBatchOperation>();
let operationCounter = 0;

async function processBatchOperation(
  operation: QueuedBatchOperation
): Promise<void> {
  try {
    operation.status = 'processing';
    let totalCreditsUsed = 0;

    // Use library's built-in batch processing
    const response = await withRetry(
      async () =>
        client.asyncBatchScrapeUrls(operation.urls, operation.options),
      `batch ${operation.id} processing`
    );

    if (!response.success) {
      throw new Error(response.error || 'Batch operation failed');
    }

    // Track credits if using cloud API
    if (!FIRECRAWL_API_URL && hasCredits(response)) {
      totalCreditsUsed += response.creditsUsed;
      await updateCreditUsage(response.creditsUsed);
    }

    operation.status = 'completed';
    operation.result = response;

    // Log final credit usage for the batch
    if (!FIRECRAWL_API_URL) {
      server.sendLoggingMessage({
        level: 'info',
        data: `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`,
      });
    }
  } catch (error) {
    operation.status = 'failed';
    operation.error = error instanceof Error ? error.message : String(error);

    server.sendLoggingMessage({
      level: 'error',
      data: `Batch ${operation.id} failed: ${operation.error}`,
    });
  }
}

// Tool handlers
server.setRequestHandler(ListToolsRequestSchema, async () => ({
  tools: [
    SCRAPE_TOOL,
    MAP_TOOL,
    CRAWL_TOOL,
    BATCH_SCRAPE_TOOL,
    CHECK_BATCH_STATUS_TOOL,
    CHECK_CRAWL_STATUS_TOOL,
    SEARCH_TOOL,
    EXTRACT_TOOL,
    DEEP_RESEARCH_TOOL,
  ],
}));

server.setRequestHandler(CallToolRequestSchema, async (request) => {
  const startTime = Date.now();
  try {
    const { name, arguments: args } = request.params;

    // Log incoming request with timestamp
    server.sendLoggingMessage({
      level: 'info',
      data: `[${new Date().toISOString()}] Received request for tool: ${name}`,
    });

    if (!args) {
      throw new Error('No arguments provided');
    }

    switch (name) {
      case 'firecrawl_scrape': {
        if (!isScrapeOptions(args)) {
          throw new Error('Invalid arguments for firecrawl_scrape');
        }
        const { url, ...options } = args;
        try {
          const scrapeStartTime = Date.now();
          server.sendLoggingMessage({
            level: 'info',
            data: `Starting scrape for URL: ${url} with options: ${JSON.stringify(
              options
            )}`,
          });

          const response = await client.scrapeUrl(url, options);

          // Log performance metrics
          server.sendLoggingMessage({
            level: 'info',
            data: `Scrape completed in ${Date.now() - scrapeStartTime}ms`,
          });

          if ('success' in response && !response.success) {
            throw new Error(response.error || 'Scraping failed');
          }

          
          // Format content based on requested formats
          const contentParts = [];
          
          if (options.formats?.includes('markdown') && response.markdown) {
            contentParts.push(response.markdown);
          }
          if (options.formats?.includes('html') && response.html) {
            contentParts.push(response.html); 
          }
          if (options.formats?.includes('rawHtml') && response.rawHtml) {
            contentParts.push(response.rawHtml);
          }
          if (options.formats?.includes('links') && response.links) {
            contentParts.push(response.links.join('\n'));
          }
          if (options.formats?.includes('screenshot') && response.screenshot) {
            contentParts.push(response.screenshot);
          }
          if (options.formats?.includes('extract') && response.extract) {
            contentParts.push(JSON.stringify(response.extract, null, 2));
          }

          // Add warning to response if present
          if (response.warning) {
            server.sendLoggingMessage({
              level: 'warning', 
              data: response.warning,
            });
          }

          return {
            content: [
              { type: 'text', text: contentParts.join('\n\n') || 'No content available' },
            ],
            isError: false,
          };
        } catch (error) {
          const errorMessage =
            error instanceof Error ? error.message : String(error);
          return {
            content: [{ type: 'text', text: errorMessage }],
            isError: true,
          };
        }
      }

      case 'firecrawl_map': {
        if (!isMapOptions(args)) {
          throw new Error('Invalid arguments for firecrawl_map');
        }
        const { url, ...options } = args;
        const response = await client.mapUrl(url, options);
        if ('error' in response) {
          throw new Error(response.error);
        }
        if (!response.links) {
          throw new Error('No links received from FireCrawl API');
        }
        return {
          content: [{ type: 'text', text: response.links.join('\n') }],
          isError: false,
        };
      }

      case 'firecrawl_batch_scrape': {
        if (!isBatchScrapeOptions(args)) {
          throw new Error('Invalid arguments for firecrawl_batch_scrape');
        }

        try {
          const operationId = `batch_${++operationCounter}`;
          const operation: QueuedBatchOperation = {
            id: operationId,
            urls: args.urls,
            options: args.options,
            status: 'pending',
            progress: {
              completed: 0,
              total: args.urls.length,
            },
          };

          batchOperations.set(operationId, operation);

          // Queue the operation
          batchQueue.add(() => processBatchOperation(operation));

          server.sendLoggingMessage({
            level: 'info',
            data: `Queued batch operation ${operationId} with ${args.urls.length} URLs`,
          });

          return {
            content: [
              {
                type: 'text',
                text: `Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`,
              },
            ],
            isError: false,
          };
        } catch (error) {
          const errorMessage =
            error instanceof Error
              ? error.message
              : `Batch operation failed: ${JSON.stringify(error)}`;
          return {
            content: [{ type: 'text', text: errorMessage }],
            isError: true,
          };
        }
      }

      case 'firecrawl_check_batch_status': {
        if (!isStatusCheckOptions(args)) {
          throw new Error(
            'Invalid arguments for firecrawl_check_batch_status'
          );
        }

        const operation = batchOperations.get(args.id);
        if (!operation) {
          return {
            content: [
              {
                type: 'text',
                text: `No batch operation found with ID: ${args.id}`,
              },
            ],
            isError: true,
          };
        }

        const status = `Batch Status:
Status: ${operation.status}
Progress: ${operation.progress.completed}/${operation.progress.total}
${operation.error ? `Error: ${operation.error}` : ''}
${
  operation.result
    ? `Results: ${JSON.stringify(operation.result, null, 2)}`
    : ''
}`;

        return {
          content: [{ type: 'text', text: status }],
          isError: false,
        };
      }

      case 'firecrawl_crawl': {
        if (!isCrawlOptions(args)) {
          throw new Error('Invalid arguments for firecrawl_crawl');
        }
        const { url, ...options } = args;

        const response = await withRetry(
          async () => client.asyncCrawlUrl(url, options),
          'crawl operation'
        );

        if (!response.success) {
          throw new Error(response.error);
        }

        // Monitor credits for cloud API
        if (!FIRECRAWL_API_URL && hasCredits(response)) {
          await updateCreditUsage(response.creditsUsed);
        }

        return {
          content: [
            {
              type: 'text',
              text: `Started crawl for ${url} with job ID: ${response.id}`,
            },
          ],
          isError: false,
        };
      }

      case 'firecrawl_check_crawl_status': {
        if (!isStatusCheckOptions(args)) {
          throw new Error(
            'Invalid arguments for firecrawl_check_crawl_status'
          );
        }
        const response = await client.checkCrawlStatus(args.id);
        if (!response.success) {
          throw new Error(response.error);
        }
        const status = `Crawl Status:
Status: ${response.status}
Progress: ${response.completed}/${response.total}
Credits Used: ${response.creditsUsed}
Expires At: ${response.expiresAt}
${
  response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''
}`;
        return {
          content: [{ type: 'text', text: status }],
          isError: false,
        };
      }

      case 'firecrawl_search': {
        if (!isSearchOptions(args)) {
          throw new Error('Invalid arguments for firecrawl_search');
        }
        try {
          const response = await withRetry(
            async () => client.search(args.query, args),
            'search operation'
          );

          if (!response.success) {
            throw new Error(
              `Search failed: ${response.error || 'Unknown error'}`
            );
          }

          // Monitor credits for cloud API
          if (!FIRECRAWL_API_URL && hasCredits(response)) {
            await updateCreditUsage(response.creditsUsed);
          }

          // Format the results
          const results = response.data
            .map(
              (result) =>
                `URL: ${result.url}
Title: ${result.title || 'No title'}
Description: ${result.description || 'No description'}
${result.markdown ? `\nContent:\n${result.markdown}` : ''}`
            )
            .join('\n\n');

          return {
            content: [{ type: 'text', text: results }],
            isError: false,
          };
        } catch (error) {
          const errorMessage =
            error instanceof Error
              ? error.message
              : `Search failed: ${JSON.stringify(error)}`;
          return {
            content: [{ type: 'text', text: errorMessage }],
            isError: true,
          };
        }
      }

      case 'firecrawl_extract': {
        if (!isExtractOptions(args)) {
          throw new Error('Invalid arguments for firecrawl_extract');
        }

        try {
          const extractStartTime = Date.now();

          server.sendLoggingMessage({
            level: 'info',
            data: `Starting extraction for URLs: ${args.urls.join(', ')}`,
          });

          // Log if using self-hosted instance
          if (FIRECRAWL_API_URL) {
            server.sendLoggingMessage({
              level: 'info',
              data: 'Using self-hosted instance for extraction',
            });
          }

          const extractResponse = await withRetry(
            async () =>
              client.extract(args.urls, {
                prompt: args.prompt,
                systemPrompt: args.systemPrompt,
                schema: args.schema,
                allowExternalLinks: args.allowExternalLinks,
                enableWebSearch: args.enableWebSearch,
                includeSubdomains: args.includeSubdomains,
                origin: 'mcp-server',
              } as ExtractParams),
            'extract operation'
          );

          // Type guard for successful response
          if (!('success' in extractResponse) || !extractResponse.success) {
            throw new Error(extractResponse.error || 'Extraction failed');
          }

          const response = extractResponse as ExtractResponse;

          // Monitor credits for cloud API
          if (!FIRECRAWL_API_URL && hasCredits(response)) {
            await updateCreditUsage(response.creditsUsed || 0);
          }

          // Log performance metrics
          server.sendLoggingMessage({
            level: 'info',
            data: `Extraction completed in ${Date.now() - extractStartTime}ms`,
          });

          // Add warning to response if present
          const result = {
            content: [
              {
                type: 'text',
                text: JSON.stringify(response.data, null, 2),
              },
            ],
            isError: false,
          };

          if (response.warning) {
            server.sendLoggingMessage({
              level: 'warning',
              data: response.warning,
            });
          }

          return result;
        } catch (error) {
          const errorMessage =
            error instanceof Error ? error.message : String(error);

          // Special handling for self-hosted instance errors
          if (
            FIRECRAWL_API_URL &&
            errorMessage.toLowerCase().includes('not supported')
          ) {
            server.sendLoggingMessage({
              level: 'error',
              data: 'Extraction is not supported by this self-hosted instance',
            });
            return {
              content: [
                {
                  type: 'text',
                  text: 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.',
                },
              ],
              isError: true,
            };
          }

          return {
            content: [{ type: 'text', text: errorMessage }],
            isError: true,
          };
        }
      }

      case 'firecrawl_deep_research': {
        if (!args || typeof args !== 'object' || !('query' in args)) {
          throw new Error('Invalid arguments for firecrawl_deep_research');
        }

        try {
          const researchStartTime = Date.now();
          server.sendLoggingMessage({
            level: 'info',
            data: `Starting deep research for query: ${args.query}`,
          });

          const response = await client.deepResearch(
            args.query as string,
            {
              maxDepth: args.maxDepth as number,
              timeLimit: args.timeLimit as number,
              maxUrls: args.maxUrls as number,
            },
            // Activity callback
            (activity) => {
              server.sendLoggingMessage({
                level: 'info',
                data: `Research activity: ${activity.message} (Depth: ${activity.depth})`,
              });
            },
            // Source callback
            (source) => {
              server.sendLoggingMessage({
                level: 'info',
                data: `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`,
              });
            }
          );

          // Log performance metrics
          server.sendLoggingMessage({
            level: 'info',
            data: `Deep research completed in ${Date.now() - researchStartTime}ms`,
          });

          if (!response.success) {
            throw new Error(response.error || 'Deep research failed');
          }

          // Format the results
          const formattedResponse = {
            finalAnalysis: response.data.finalAnalysis,
            activities: response.data.activities,
            sources: response.data.sources,
          };

          return {
            content: [{ type: 'text', text: formattedResponse.finalAnalysis }],
            isError: false,
          };
        } catch (error) {
          const errorMessage = error instanceof Error ? error.message : String(error);
          return {
            content: [{ type: 'text', text: errorMessage }],
            isError: true,
          };
        }
      }

      default:
        return {
          content: [{ type: 'text', text: `Unknown tool: ${name}` }],
          isError: true,
        };
    }
  } catch (error) {
    // Log detailed error information
    server.sendLoggingMessage({
      level: 'error',
      data: {
        message: `Request failed: ${
          error instanceof Error ? error.message : String(error)
        }`,
        tool: request.params.name,
        arguments: request.params.arguments,
        timestamp: new Date().toISOString(),
        duration: Date.now() - startTime,
      },
    });
    return {
      content: [
        {
          type: 'text',
          text: `Error: ${
            error instanceof Error ? error.message : String(error)
          }`,
        },
      ],
      isError: true,
    };
  } finally {
    // Log request completion with performance metrics
    server.sendLoggingMessage({
      level: 'info',
      data: `Request completed in ${Date.now() - startTime}ms`,
    });
  }
});

// Helper function to format results
function formatResults(data: FirecrawlDocument[]): string {
  return data
    .map((doc) => {
      const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
      return `URL: ${doc.url || 'Unknown URL'}
Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
    })
    .join('\n\n');
}

// Server startup
async function runServer() {
  try {
    console.error('Initializing FireCrawl MCP Server...');

    const transport = new StdioServerTransport();
    await server.connect(transport);

    // Now that we're connected, we can send logging messages
    server.sendLoggingMessage({
      level: 'info',
      data: 'FireCrawl MCP Server initialized successfully',
    });

    server.sendLoggingMessage({
      level: 'info',
      data: `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`,
    });

    console.error('FireCrawl MCP Server running on stdio');
  } catch (error) {
    console.error('Fatal error running server:', error);
    process.exit(1);
  }
}

runServer().catch((error) => {
  console.error('Fatal error running server:', error);
  process.exit(1);
});

// Add type guard for credit usage
function hasCredits(response: any): response is { creditsUsed: number } {
  return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
}