Firecrawl Agent MCP Server

server.ts•13.3 kB

#!/usr/bin/env node /** * Firecrawl Agent MCP Server * Provides AI-powered web data extraction through Firecrawl's Agent API */ import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js'; import { CallToolRequestSchema, ListToolsRequestSchema, Tool, } from '@modelcontextprotocol/sdk/types.js'; import express from 'express'; import * as dotenv from 'dotenv'; import { FirecrawlClient } from './services/firecrawl-client.js'; import { z } from 'zod'; // Load environment variables dotenv.config(); // Validate required environment variables const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY; if (!FIRECRAWL_API_KEY) { console.error('Error: FIRECRAWL_API_KEY environment variable is required'); process.exit(1); } const FIRECRAWL_API_BASE = process.env.FIRECRAWL_API_BASE; // Initialize Firecrawl client const firecrawl = new FirecrawlClient({ apiKey: FIRECRAWL_API_KEY, apiBase: FIRECRAWL_API_BASE, }); /** * Initialize the MCP server */ const server = new Server( { name: 'firecrawl-agent', version: '1.0.0', }, { capabilities: { tools: {}, }, } ); /** * Tool Definitions */ const TOOLS: Tool[] = [ { name: 'agent_execute', description: 'Execute Firecrawl Agent to search, navigate, and gather data from the web. The agent autonomously finds and extracts information based on your prompt. Waits for completion and returns results. Use this for immediate results.', inputSchema: { type: 'object', properties: { prompt: { type: 'string', description: 'Describe what data you want to extract. Be specific about what information you need. Examples: "Find the founders of Anthropic", "Get pricing information for Claude API", "Extract contact emails from YCombinator companies"', }, urls: { type: 'array', items: { type: 'string' }, description: 'Optional: Specific URLs to search. If not provided, agent will search the web.', }, schema: { type: 'object', description: 'Optional: JSON schema for structured output. Define the exact structure you want the data returned in.', }, maxCredits: { type: 'number', description: 'Optional: Maximum credits to spend on this request. Use to control costs.', }, }, required: ['prompt'], }, }, { name: 'agent_start', description: 'Start a Firecrawl Agent job asynchronously. Returns a job ID immediately without waiting for completion. Use this for long-running research tasks. Poll with agent_status to check progress.', inputSchema: { type: 'object', properties: { prompt: { type: 'string', description: 'Describe what data you want to extract. Be specific about what information you need.', }, urls: { type: 'array', items: { type: 'string' }, description: 'Optional: Specific URLs to search. If not provided, agent will search the web.', }, schema: { type: 'object', description: 'Optional: JSON schema for structured output. Define the exact structure you want the data returned in.', }, maxCredits: { type: 'number', description: 'Optional: Maximum credits to spend on this request. Use to control costs.', }, }, required: ['prompt'], }, }, { name: 'agent_status', description: 'Check the status of an asynchronous Firecrawl Agent job. Returns current status, progress, and results if completed. Job results are available for 24 hours after completion.', inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'The job ID returned from agent_start', }, }, required: ['jobId'], }, }, { name: 'scrape', description: 'Scrape a single URL and extract content in various formats (markdown, html, links, screenshot). Use this for simple single-page scraping without AI agent capabilities.', inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to scrape', }, formats: { type: 'array', items: { type: 'string', enum: ['markdown', 'html', 'rawHtml', 'links', 'screenshot'], }, description: 'Output formats to return. Default: ["markdown"]. Can request multiple formats.', }, onlyMainContent: { type: 'boolean', description: 'Extract only main content, removing headers, footers, nav, etc. Default: true', }, includeTags: { type: 'array', items: { type: 'string' }, description: 'HTML tags to include (e.g., ["article", "main"])', }, excludeTags: { type: 'array', items: { type: 'string' }, description: 'HTML tags to exclude (e.g., ["nav", "footer"])', }, waitFor: { type: 'number', description: 'Milliseconds to wait before scraping (for JS rendering)', }, timeout: { type: 'number', description: 'Request timeout in milliseconds', }, }, required: ['url'], }, }, { name: 'search', description: 'Search the web and scrape the results. Returns scraped content from multiple search results. Use this for finding and extracting data from multiple sources at once.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query (e.g., "best AI tools 2025")', }, limit: { type: 'number', description: 'Maximum number of results to return. Default: 5', }, formats: { type: 'array', items: { type: 'string', enum: ['markdown', 'html', 'rawHtml', 'links'], }, description: 'Output formats for each result. Default: ["markdown"]', }, }, required: ['query'], }, }, ]; /** * Tool Handlers */ server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: TOOLS }; }); server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case 'agent_execute': { const { prompt, urls, schema, maxCredits } = args as { prompt: string; urls?: string[]; schema?: Record<string, any>; maxCredits?: number; }; const result = await firecrawl.executeAgent({ prompt, urls, schema, maxCredits, }); if (!result.success) { return { content: [ { type: 'text', text: `Error: ${result.error}`, }, ], isError: true, }; } return { content: [ { type: 'text', text: JSON.stringify( { success: true, data: result.data, creditsUsed: result.creditsUsed, }, null, 2 ), }, ], }; } case 'agent_start': { const { prompt, urls, schema, maxCredits } = args as { prompt: string; urls?: string[]; schema?: Record<string, any>; maxCredits?: number; }; const result = await firecrawl.startAgent({ prompt, urls, schema, maxCredits, }); if (!result.success) { return { content: [ { type: 'text', text: `Error: ${result.error}`, }, ], isError: true, }; } return { content: [ { type: 'text', text: JSON.stringify( { success: true, jobId: result.id, message: 'Agent job started. Use agent_status with this jobId to check progress.', }, null, 2 ), }, ], }; } case 'agent_status': { const { jobId } = args as { jobId: string }; const result = await firecrawl.getAgentStatus(jobId); return { content: [ { type: 'text', text: JSON.stringify(result, null, 2), }, ], }; } case 'scrape': { const { url, formats, onlyMainContent, includeTags, excludeTags, waitFor, timeout, } = args as { url: string; formats?: ('markdown' | 'html' | 'rawHtml' | 'links' | 'screenshot')[]; onlyMainContent?: boolean; includeTags?: string[]; excludeTags?: string[]; waitFor?: number; timeout?: number; }; const result = await firecrawl.scrape({ url, formats, onlyMainContent, includeTags, excludeTags, waitFor, timeout, }); if (!result.success) { return { content: [ { type: 'text', text: `Error: ${result.error}`, }, ], isError: true, }; } return { content: [ { type: 'text', text: JSON.stringify( { success: true, data: result.data, }, null, 2 ), }, ], }; } case 'search': { const { query, limit, formats } = args as { query: string; limit?: number; formats?: ('markdown' | 'html' | 'rawHtml' | 'links')[]; }; const result = await firecrawl.search({ query, limit, formats, }); if (!result.success) { return { content: [ { type: 'text', text: `Error: ${result.error}`, }, ], isError: true, }; } return { content: [ { type: 'text', text: JSON.stringify( { success: true, data: result.data, }, null, 2 ), }, ], }; } default: return { content: [ { type: 'text', text: `Unknown tool: ${name}`, }, ], isError: true, }; } } catch (error) { return { content: [ { type: 'text', text: `Error executing tool: ${error instanceof Error ? error.message : 'Unknown error'}`, }, ], isError: true, }; } }); /** * Start the server */ async function main() { const transport = process.env.TRANSPORT || 'stdio'; if (transport === 'sse') { // SSE transport for HTTP-based communication const app = express(); const port = parseInt(process.env.PORT || '3000'); app.get('/sse', async (req, res) => { const transport = new SSEServerTransport('/message', res); await server.connect(transport); }); app.post('/message', async (req, res) => { // Handle SSE messages res.status(200).send(); }); app.listen(port, () => { console.error(`Firecrawl Agent MCP server running on port ${port}`); console.error(`SSE endpoint: http://localhost:${port}/sse`); }); } else { // stdio transport for direct process communication const transport = new StdioServerTransport(); await server.connect(transport); console.error('Firecrawl Agent MCP server running on stdio'); } } main().catch((error) => { console.error('Fatal error:', error); process.exit(1); });

Latest Blog Posts

Federated Learning with MCP: Building Privacy-Preserving Agents Across Distributed Edges
By Om-Shree-0709 on December 21, 2025.
Secure
mcp
Learning
What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Replicant-Partners/Firecrawler-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server