#!/usr/bin/env node
/**
* Firecrawl Agent MCP Server
* Provides AI-powered web data extraction through Firecrawl's Agent API
*/
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
import {
CallToolRequestSchema,
ListToolsRequestSchema,
Tool,
} from '@modelcontextprotocol/sdk/types.js';
import express from 'express';
import * as dotenv from 'dotenv';
import { FirecrawlClient } from './services/firecrawl-client.js';
import { z } from 'zod';
// Load environment variables
dotenv.config();
// Validate required environment variables
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
if (!FIRECRAWL_API_KEY) {
console.error('Error: FIRECRAWL_API_KEY environment variable is required');
process.exit(1);
}
const FIRECRAWL_API_BASE = process.env.FIRECRAWL_API_BASE;
// Initialize Firecrawl client
const firecrawl = new FirecrawlClient({
apiKey: FIRECRAWL_API_KEY,
apiBase: FIRECRAWL_API_BASE,
});
/**
* Initialize the MCP server
*/
const server = new Server(
{
name: 'firecrawl-agent',
version: '1.0.0',
},
{
capabilities: {
tools: {},
},
}
);
/**
* Tool Definitions
*/
const TOOLS: Tool[] = [
{
name: 'agent_execute',
description:
'Execute Firecrawl Agent to search, navigate, and gather data from the web. The agent autonomously finds and extracts information based on your prompt. Waits for completion and returns results. Use this for immediate results.',
inputSchema: {
type: 'object',
properties: {
prompt: {
type: 'string',
description:
'Describe what data you want to extract. Be specific about what information you need. Examples: "Find the founders of Anthropic", "Get pricing information for Claude API", "Extract contact emails from YCombinator companies"',
},
urls: {
type: 'array',
items: { type: 'string' },
description:
'Optional: Specific URLs to search. If not provided, agent will search the web.',
},
schema: {
type: 'object',
description:
'Optional: JSON schema for structured output. Define the exact structure you want the data returned in.',
},
maxCredits: {
type: 'number',
description:
'Optional: Maximum credits to spend on this request. Use to control costs.',
},
},
required: ['prompt'],
},
},
{
name: 'agent_start',
description:
'Start a Firecrawl Agent job asynchronously. Returns a job ID immediately without waiting for completion. Use this for long-running research tasks. Poll with agent_status to check progress.',
inputSchema: {
type: 'object',
properties: {
prompt: {
type: 'string',
description:
'Describe what data you want to extract. Be specific about what information you need.',
},
urls: {
type: 'array',
items: { type: 'string' },
description:
'Optional: Specific URLs to search. If not provided, agent will search the web.',
},
schema: {
type: 'object',
description:
'Optional: JSON schema for structured output. Define the exact structure you want the data returned in.',
},
maxCredits: {
type: 'number',
description:
'Optional: Maximum credits to spend on this request. Use to control costs.',
},
},
required: ['prompt'],
},
},
{
name: 'agent_status',
description:
'Check the status of an asynchronous Firecrawl Agent job. Returns current status, progress, and results if completed. Job results are available for 24 hours after completion.',
inputSchema: {
type: 'object',
properties: {
jobId: {
type: 'string',
description: 'The job ID returned from agent_start',
},
},
required: ['jobId'],
},
},
{
name: 'scrape',
description:
'Scrape a single URL and extract content in various formats (markdown, html, links, screenshot). Use this for simple single-page scraping without AI agent capabilities.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL to scrape',
},
formats: {
type: 'array',
items: {
type: 'string',
enum: ['markdown', 'html', 'rawHtml', 'links', 'screenshot'],
},
description:
'Output formats to return. Default: ["markdown"]. Can request multiple formats.',
},
onlyMainContent: {
type: 'boolean',
description:
'Extract only main content, removing headers, footers, nav, etc. Default: true',
},
includeTags: {
type: 'array',
items: { type: 'string' },
description: 'HTML tags to include (e.g., ["article", "main"])',
},
excludeTags: {
type: 'array',
items: { type: 'string' },
description: 'HTML tags to exclude (e.g., ["nav", "footer"])',
},
waitFor: {
type: 'number',
description: 'Milliseconds to wait before scraping (for JS rendering)',
},
timeout: {
type: 'number',
description: 'Request timeout in milliseconds',
},
},
required: ['url'],
},
},
{
name: 'search',
description:
'Search the web and scrape the results. Returns scraped content from multiple search results. Use this for finding and extracting data from multiple sources at once.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query (e.g., "best AI tools 2025")',
},
limit: {
type: 'number',
description: 'Maximum number of results to return. Default: 5',
},
formats: {
type: 'array',
items: {
type: 'string',
enum: ['markdown', 'html', 'rawHtml', 'links'],
},
description: 'Output formats for each result. Default: ["markdown"]',
},
},
required: ['query'],
},
},
];
/**
* Tool Handlers
*/
server.setRequestHandler(ListToolsRequestSchema, async () => {
return { tools: TOOLS };
});
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
switch (name) {
case 'agent_execute': {
const { prompt, urls, schema, maxCredits } = args as {
prompt: string;
urls?: string[];
schema?: Record<string, any>;
maxCredits?: number;
};
const result = await firecrawl.executeAgent({
prompt,
urls,
schema,
maxCredits,
});
if (!result.success) {
return {
content: [
{
type: 'text',
text: `Error: ${result.error}`,
},
],
isError: true,
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
data: result.data,
creditsUsed: result.creditsUsed,
},
null,
2
),
},
],
};
}
case 'agent_start': {
const { prompt, urls, schema, maxCredits } = args as {
prompt: string;
urls?: string[];
schema?: Record<string, any>;
maxCredits?: number;
};
const result = await firecrawl.startAgent({
prompt,
urls,
schema,
maxCredits,
});
if (!result.success) {
return {
content: [
{
type: 'text',
text: `Error: ${result.error}`,
},
],
isError: true,
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
jobId: result.id,
message:
'Agent job started. Use agent_status with this jobId to check progress.',
},
null,
2
),
},
],
};
}
case 'agent_status': {
const { jobId } = args as { jobId: string };
const result = await firecrawl.getAgentStatus(jobId);
return {
content: [
{
type: 'text',
text: JSON.stringify(result, null, 2),
},
],
};
}
case 'scrape': {
const {
url,
formats,
onlyMainContent,
includeTags,
excludeTags,
waitFor,
timeout,
} = args as {
url: string;
formats?: ('markdown' | 'html' | 'rawHtml' | 'links' | 'screenshot')[];
onlyMainContent?: boolean;
includeTags?: string[];
excludeTags?: string[];
waitFor?: number;
timeout?: number;
};
const result = await firecrawl.scrape({
url,
formats,
onlyMainContent,
includeTags,
excludeTags,
waitFor,
timeout,
});
if (!result.success) {
return {
content: [
{
type: 'text',
text: `Error: ${result.error}`,
},
],
isError: true,
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
data: result.data,
},
null,
2
),
},
],
};
}
case 'search': {
const { query, limit, formats } = args as {
query: string;
limit?: number;
formats?: ('markdown' | 'html' | 'rawHtml' | 'links')[];
};
const result = await firecrawl.search({
query,
limit,
formats,
});
if (!result.success) {
return {
content: [
{
type: 'text',
text: `Error: ${result.error}`,
},
],
isError: true,
};
}
return {
content: [
{
type: 'text',
text: JSON.stringify(
{
success: true,
data: result.data,
},
null,
2
),
},
],
};
}
default:
return {
content: [
{
type: 'text',
text: `Unknown tool: ${name}`,
},
],
isError: true,
};
}
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error executing tool: ${error instanceof Error ? error.message : 'Unknown error'}`,
},
],
isError: true,
};
}
});
/**
* Start the server
*/
async function main() {
const transport = process.env.TRANSPORT || 'stdio';
if (transport === 'sse') {
// SSE transport for HTTP-based communication
const app = express();
const port = parseInt(process.env.PORT || '3000');
app.get('/sse', async (req, res) => {
const transport = new SSEServerTransport('/message', res);
await server.connect(transport);
});
app.post('/message', async (req, res) => {
// Handle SSE messages
res.status(200).send();
});
app.listen(port, () => {
console.error(`Firecrawl Agent MCP server running on port ${port}`);
console.error(`SSE endpoint: http://localhost:${port}/sse`);
});
} else {
// stdio transport for direct process communication
const transport = new StdioServerTransport();
await server.connect(transport);
console.error('Firecrawl Agent MCP server running on stdio');
}
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});