Deep Research MCP

by ali-kh7

deep-research-tool

Conduct comprehensive web research using Tavily Search and Crawl, generating aggregated JSON data with detailed findings, search summaries, and markdown formatting instructions.

Instructions

Performs extensive web research using Tavily Search and Crawl. Returns aggregated JSON data including the query, search summary (if any), detailed research findings, and documentation instructions. The documentation instructions will guide you on how the user wants the research data to be formatted into markdown.

Input Schema

NameRequiredDescriptionDefault
chunks_per_sourceNoFor 'advanced' search: number of content chunks from each source (1-3).
crawl_allow_externalNoAllow crawler to follow links to external domains.
crawl_categoriesNoFilter crawl URLs by categories (e.g., 'Blog', 'Documentation').
crawl_exclude_domainsNoRegex for domains/subdomains to exclude.
crawl_exclude_pathsNoRegex for URL paths to exclude.
crawl_extract_depthNoExtraction depth for crawl ('basic' or 'advanced').basic
crawl_include_imagesNoExtract image URLs from crawled pages.
crawl_instructionsNoNatural language instructions for the crawler.
crawl_limitNoTotal links crawler will process per root URL (1-20).
crawl_max_breadthNoMax links to follow per page level during crawl (1-10).
crawl_max_depthNoMax crawl depth from base URL (1-2). Higher values increase processing time significantly.
crawl_select_domainsNoRegex for domains/subdomains to crawl (e.g., '^docs\.example\.com$'). Overrides auto-domain focus.
crawl_select_pathsNoRegex for URLs paths to crawl (e.g., '/docs/.*').
crawl_timeoutNoTimeout in seconds for Tavily crawl requests.
daysNoFor 'news' topic: number of days back from current date to include results.
documentation_promptNoOptional. Custom prompt for LLM documentation generation. Overrides 'DOCUMENTATION_PROMPT' env var and default. If none set, a comprehensive default is used.
exclude_domains_searchNoList of domains to specifically exclude from search.
hardware_accelerationNoTry to use hardware acceleration (WebGPU) if available.
include_answerNoInclude an LLM-generated answer from Tavily search (true implies 'basic').
include_domains_searchNoList of domains to specifically include in search.
include_raw_content_searchNoInclude cleaned HTML from initial search results.
include_search_image_descriptionsNoInclude image descriptions from initial search results.
include_search_imagesNoInclude image URLs from initial search results.
max_search_resultsNoMax search results to retrieve for crawling (1-20).
output_pathNoOptional. Path where generated research documents and images should be saved. If not provided, a default path in user's Documents folder with timestamp will be used.
queryYesThe main research topic or question.
search_depthNoDepth of the initial Tavily search ('basic' or 'advanced').advanced
search_timeoutNoTimeout in seconds for Tavily search requests.
time_rangeNoTime range for search results (e.g., 'd' for day, 'w' for week, 'm' for month, 'y' for year).
topicNoCategory for the Tavily search ('general' or 'news').general

Input Schema (JSON Schema)

{ "properties": { "chunks_per_source": { "default": 3, "description": "For 'advanced' search: number of content chunks from each source (1-3).", "maximum": 3, "minimum": 1, "type": "number" }, "crawl_allow_external": { "default": false, "description": "Allow crawler to follow links to external domains.", "type": "boolean" }, "crawl_categories": { "default": [], "description": "Filter crawl URLs by categories (e.g., 'Blog', 'Documentation').", "items": { "type": "string" }, "type": "array" }, "crawl_exclude_domains": { "default": [], "description": "Regex for domains/subdomains to exclude.", "items": { "type": "string" }, "type": "array" }, "crawl_exclude_paths": { "default": [], "description": "Regex for URL paths to exclude.", "items": { "type": "string" }, "type": "array" }, "crawl_extract_depth": { "default": "basic", "description": "Extraction depth for crawl ('basic' or 'advanced').", "enum": [ "basic", "advanced" ], "type": "string" }, "crawl_include_images": { "default": false, "description": "Extract image URLs from crawled pages.", "type": "boolean" }, "crawl_instructions": { "description": "Natural language instructions for the crawler.", "type": "string" }, "crawl_limit": { "default": 10, "description": "Total links crawler will process per root URL (1-20).", "type": "number" }, "crawl_max_breadth": { "default": 10, "description": "Max links to follow per page level during crawl (1-10).", "type": "number" }, "crawl_max_depth": { "default": 1, "description": "Max crawl depth from base URL (1-2). Higher values increase processing time significantly.", "type": "number" }, "crawl_select_domains": { "default": [], "description": "Regex for domains/subdomains to crawl (e.g., '^docs\\.example\\.com$'). Overrides auto-domain focus.", "items": { "type": "string" }, "type": "array" }, "crawl_select_paths": { "default": [], "description": "Regex for URLs paths to crawl (e.g., '/docs/.*').", "items": { "type": "string" }, "type": "array" }, "crawl_timeout": { "default": 180, "description": "Timeout in seconds for Tavily crawl requests.", "type": "number" }, "days": { "description": "For 'news' topic: number of days back from current date to include results.", "type": "number" }, "documentation_prompt": { "description": "Optional. Custom prompt for LLM documentation generation. Overrides 'DOCUMENTATION_PROMPT' env var and default. If none set, a comprehensive default is used.", "type": "string" }, "exclude_domains_search": { "default": [], "description": "List of domains to specifically exclude from search.", "items": { "type": "string" }, "type": "array" }, "hardware_acceleration": { "default": false, "description": "Try to use hardware acceleration (WebGPU) if available.", "type": "boolean" }, "include_answer": { "anyOf": [ { "type": "boolean" }, { "enum": [ "basic", "advanced" ], "type": "string" } ], "default": false, "description": "Include an LLM-generated answer from Tavily search (true implies 'basic')." }, "include_domains_search": { "default": [], "description": "List of domains to specifically include in search.", "items": { "type": "string" }, "type": "array" }, "include_raw_content_search": { "default": false, "description": "Include cleaned HTML from initial search results.", "type": "boolean" }, "include_search_image_descriptions": { "default": false, "description": "Include image descriptions from initial search results.", "type": "boolean" }, "include_search_images": { "default": false, "description": "Include image URLs from initial search results.", "type": "boolean" }, "max_search_results": { "default": 7, "description": "Max search results to retrieve for crawling (1-20).", "maximum": 20, "minimum": 1, "type": "number" }, "output_path": { "description": "Optional. Path where generated research documents and images should be saved. If not provided, a default path in user's Documents folder with timestamp will be used.", "type": "string" }, "query": { "description": "The main research topic or question.", "type": "string" }, "search_depth": { "default": "advanced", "description": "Depth of the initial Tavily search ('basic' or 'advanced').", "enum": [ "basic", "advanced" ], "type": "string" }, "search_timeout": { "default": 60, "description": "Timeout in seconds for Tavily search requests.", "type": "number" }, "time_range": { "description": "Time range for search results (e.g., 'd' for day, 'w' for week, 'm' for month, 'y' for year).", "type": "string" }, "topic": { "default": "general", "description": "Category for the Tavily search ('general' or 'news').", "enum": [ "general", "news" ], "type": "string" } }, "required": [ "query" ], "type": "object" }

You must be authenticated.

Other Tools from Deep Research MCP

Related Tools

ID: djgfgo4ef9