[
{
"name": "scrape_website",
"description": "Extract content from a single URL. Supports multiple formats and JavaScript rendering.",
"arguments": [
{
"name": "url_to_scrape",
"type": "string",
"desc": "The URL of the website you want to scrape"
},
{
"name": "output_format",
"type": "string",
"desc": "Choose format: html, markdown, json, or text. Default: markdown",
"optional": true
},
{
"name": "country",
"type": "string",
"desc": "Optional country code (e.g., US, GB, CA) for location-specific scraping",
"optional": true
},
{
"name": "wait_before_scraping",
"type": "number",
"desc": "Wait time in milliseconds before scraping (0-10000)",
"optional": true
},
{
"name": "parser",
"type": "string",
"desc": "Optional parser ID for specialized extraction (e.g., @olostep/amazon-product)",
"optional": true
}
]
},
{
"name": "search_web",
"description": "Search the web for a given query and return structured results (non-AI, parser-based)",
"arguments": [
{
"name": "query",
"type": "string",
"desc": "Search query"
},
{
"name": "country",
"type": "string",
"desc": "Optional country code for localized results (e.g., US, GB)",
"optional": true
}
]
},
{
"name": "answers",
"description": "Search the web and return AI-powered answers in the JSON structure you want, with sources and citations",
"arguments": [
{
"name": "task",
"type": "string",
"desc": "Question or task to answer using web data"
},
{
"name": "json",
"type": "string",
"desc": "Optional JSON schema/object or description of desired output shape",
"optional": true
}
]
},
{
"name": "batch_scrape_urls",
"description": "Scrape up to 10k URLs at the same time. Perfect for large-scale data extraction",
"arguments": [
{
"name": "urls_to_scrape",
"type": "array",
"desc": "JSON array of objects with url and optional custom_id"
},
{
"name": "output_format",
"type": "string",
"desc": "Choose format for all URLs. Default: markdown",
"optional": true
},
{
"name": "country",
"type": "string",
"desc": "Optional country code for location-specific scraping",
"optional": true
},
{
"name": "wait_before_scraping",
"type": "number",
"desc": "Wait time in milliseconds before scraping each URL",
"optional": true
},
{
"name": "parser",
"type": "string",
"desc": "Optional parser ID for specialized extraction",
"optional": true
}
]
},
{
"name": "create_crawl",
"description": "Autonomously discover and scrape entire websites by following links from a start URL",
"arguments": [
{
"name": "start_url",
"type": "string",
"desc": "Starting URL for the crawl"
},
{
"name": "max_pages",
"type": "number",
"desc": "Maximum number of pages to crawl",
"optional": true
},
{
"name": "follow_links",
"type": "boolean",
"desc": "Whether to follow links found on pages",
"optional": true
},
{
"name": "output_format",
"type": "string",
"desc": "Format for scraped content. Default: markdown",
"optional": true
},
{
"name": "country",
"type": "string",
"desc": "Optional country code for location-specific crawling",
"optional": true
},
{
"name": "parser",
"type": "string",
"desc": "Optional parser ID for specialized content extraction",
"optional": true
}
]
},
{
"name": "create_map",
"description": "Get all URLs on a website. Extract URLs for discovery and site analysis",
"arguments": [
{
"name": "website_url",
"type": "string",
"desc": "Website URL to extract links from"
},
{
"name": "search_query",
"type": "string",
"desc": "Optional search query to filter URLs",
"optional": true
},
{
"name": "top_n",
"type": "number",
"desc": "Optional limit for number of URLs returned",
"optional": true
},
{
"name": "include_url_patterns",
"type": "array",
"desc": "Optional glob patterns to include (e.g., /blog/**)",
"optional": true
},
{
"name": "exclude_url_patterns",
"type": "array",
"desc": "Optional glob patterns to exclude (e.g., /admin/**)",
"optional": true
}
]
},
{
"name": "get_webpage_content",
"description": "Retrieve content of a webpage in markdown format",
"arguments": [
{
"name": "url_to_scrape",
"type": "string",
"desc": "The URL of the webpage to scrape"
},
{
"name": "wait_before_scraping",
"type": "number",
"desc": "Time to wait in milliseconds before starting the scrape",
"optional": true
},
{
"name": "country",
"type": "string",
"desc": "Residential country to load the request from (e.g., US, CA, GB)",
"optional": true
}
]
},
{
"name": "get_website_urls",
"description": "Search and retrieve relevant URLs from a website",
"arguments": [
{
"name": "url",
"type": "string",
"desc": "The URL of the website to map"
},
{
"name": "search_query",
"type": "string",
"desc": "The search query to sort URLs by"
}
]
},
{
"name": "google_search",
"description": "Retrieve structured data from Google search results",
"arguments": [
{
"name": "query",
"type": "string",
"desc": "The search query to perform"
},
{
"name": "country",
"type": "string",
"desc": "Country code for localized results (e.g., US, GB)",
"optional": true
}
]
}
]