mcp-server-firecrawl
by vrknetha
fire_crawl_scrape
Scrape a single webpage with advanced options for content extraction. Supports various formats including markdown, HTML, and screenshots. Can execute custom actions like clicking or scrolling before scraping.
Input Schema
Name | Required | Description | Default |
---|---|---|---|
actions | No | List of actions to perform before scraping | |
excludeTags | No | HTML tags to exclude from extraction | |
extract | No | Configuration for structured data extraction | |
formats | No | Content formats to extract (default: ['markdown']) | |
includeTags | No | HTML tags to specifically include in extraction | |
location | No | Location settings for scraping | |
mobile | No | Use mobile viewport | |
onlyMainContent | No | Extract only the main content, filtering out navigation, footers, etc. | |
removeBase64Images | No | Remove base64 encoded images from output | |
skipTlsVerification | No | Skip TLS certificate verification | |
timeout | No | Maximum time in milliseconds to wait for the page to load | |
url | Yes | The URL to scrape | |
waitFor | No | Time in milliseconds to wait for dynamic content to load |
Input Schema (JSON Schema)
{
"properties": {
"actions": {
"description": "List of actions to perform before scraping",
"items": {
"properties": {
"direction": {
"description": "Scroll direction",
"enum": [
"up",
"down"
],
"type": "string"
},
"fullPage": {
"description": "Take full page screenshot",
"type": "boolean"
},
"key": {
"description": "Key to press (for press action)",
"type": "string"
},
"milliseconds": {
"description": "Time to wait in milliseconds (for wait action)",
"type": "number"
},
"script": {
"description": "JavaScript code to execute",
"type": "string"
},
"selector": {
"description": "CSS selector for the target element",
"type": "string"
},
"text": {
"description": "Text to write (for write action)",
"type": "string"
},
"type": {
"description": "Type of action to perform",
"enum": [
"wait",
"click",
"screenshot",
"write",
"press",
"scroll",
"scrape",
"executeJavascript"
],
"type": "string"
}
},
"required": [
"type"
],
"type": "object"
},
"type": "array"
},
"excludeTags": {
"description": "HTML tags to exclude from extraction",
"items": {
"type": "string"
},
"type": "array"
},
"extract": {
"description": "Configuration for structured data extraction",
"properties": {
"prompt": {
"description": "User prompt for LLM extraction",
"type": "string"
},
"schema": {
"description": "Schema for structured data extraction",
"type": "object"
},
"systemPrompt": {
"description": "System prompt for LLM extraction",
"type": "string"
}
},
"type": "object"
},
"formats": {
"description": "Content formats to extract (default: ['markdown'])",
"items": {
"enum": [
"markdown",
"html",
"rawHtml",
"screenshot",
"links",
"screenshot@fullPage",
"extract"
],
"type": "string"
},
"type": "array"
},
"includeTags": {
"description": "HTML tags to specifically include in extraction",
"items": {
"type": "string"
},
"type": "array"
},
"location": {
"description": "Location settings for scraping",
"properties": {
"country": {
"description": "Country code for geolocation",
"type": "string"
},
"languages": {
"description": "Language codes for content",
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"mobile": {
"description": "Use mobile viewport",
"type": "boolean"
},
"onlyMainContent": {
"description": "Extract only the main content, filtering out navigation, footers, etc.",
"type": "boolean"
},
"removeBase64Images": {
"description": "Remove base64 encoded images from output",
"type": "boolean"
},
"skipTlsVerification": {
"description": "Skip TLS certificate verification",
"type": "boolean"
},
"timeout": {
"description": "Maximum time in milliseconds to wait for the page to load",
"type": "number"
},
"url": {
"description": "The URL to scrape",
"type": "string"
},
"waitFor": {
"description": "Time in milliseconds to wait for dynamic content to load",
"type": "number"
}
},
"required": [
"url"
],
"type": "object"
}