"""
This type stub file was generated by pyright.
"""
import click
from typing import Any, Dict, List, Optional
from crawl4ai import BrowserConfig, BrowserProfiler, CrawlerRunConfig
from pathlib import Path
console = ...
def get_global_config() -> dict:
...
def save_global_config(config: dict): # -> None:
...
def setup_llm_config() -> tuple[str, str]:
...
async def stream_llm_response(url: str, markdown: str, query: str, provider: str, token: str): # -> None:
...
def parse_key_values(ctx, param, value) -> Dict[str, Any]:
...
def load_config_file(path: Optional[str]) -> dict:
...
def load_schema_file(path: Optional[str]) -> dict:
...
async def run_crawler(url: str, browser_cfg: BrowserConfig, crawler_cfg: CrawlerRunConfig, verbose: bool):
...
def show_examples(): # -> None:
...
def get_directory_size(path: str) -> int:
"""Calculate the total size of a directory in bytes"""
...
def display_profiles_table(profiles: List[Dict[str, Any]]): # -> None:
"""Display a rich table of browser profiles"""
...
async def create_profile_interactive(profiler: BrowserProfiler): # -> None:
"""Interactive profile creation wizard"""
...
def delete_profile_interactive(profiler: BrowserProfiler): # -> None:
"""Interactive profile deletion"""
...
async def crawl_with_profile_cli(profile_path, url): # -> None:
"""Use a profile to crawl a website via CLI"""
...
async def use_profile_to_crawl(): # -> None:
"""Interactive profile selection for crawling"""
...
async def manage_profiles(): # -> None:
"""Interactive profile management menu"""
...
@click.group(context_settings={ "help_option_names": ["-h", "--help"] })
def cli(): # -> None:
"""Crawl4AI CLI - Web content extraction and browser profile management tool"""
...
@cli.group("browser")
def browser_cmd(): # -> None:
"""Manage browser instances for Crawl4AI
Commands to manage browser instances for Crawl4AI, including:
- status - Check status of the builtin browser
- start - Start a new builtin browser
- stop - Stop the running builtin browser
- restart - Restart the builtin browser
"""
...
@browser_cmd.command("status")
def browser_status_cmd(): # -> None:
"""Show status of the builtin browser"""
...
@browser_cmd.command("start")
@click.option("--browser-type", "-b", type=click.Choice(["chromium", "firefox"]), default="chromium", help="Browser type (default: chromium)")
@click.option("--port", "-p", type=int, default=9222, help="Debugging port (default: 9222)")
@click.option("--headless/--no-headless", default=True, help="Run browser in headless mode")
def browser_start_cmd(browser_type: str, port: int, headless: bool): # -> None:
"""Start a builtin browser instance
This will start a persistent browser instance that can be used by Crawl4AI
by setting browser_mode="builtin" in BrowserConfig.
"""
...
@browser_cmd.command("stop")
def browser_stop_cmd(): # -> None:
"""Stop the running builtin browser"""
...
@browser_cmd.command("view")
@click.option("--url", "-u", help="URL to navigate to (defaults to about:blank)")
def browser_view_cmd(url: Optional[str]): # -> None:
"""
Open a visible window of the builtin browser
This command connects to the running builtin browser and opens a visible window,
allowing you to see what the browser is currently viewing or navigate to a URL.
"""
...
@browser_cmd.command("restart")
@click.option("--browser-type", "-b", type=click.Choice(["chromium", "firefox"]), default=None, help="Browser type (defaults to same as current)")
@click.option("--port", "-p", type=int, default=None, help="Debugging port (defaults to same as current)")
@click.option("--headless/--no-headless", default=None, help="Run browser in headless mode")
def browser_restart_cmd(browser_type: Optional[str], port: Optional[int], headless: Optional[bool]): # -> None:
"""Restart the builtin browser
Stops the current builtin browser if running and starts a new one.
By default, uses the same configuration as the current browser.
"""
...
@cli.command("cdp")
@click.option("--user-data-dir", "-d", help="Directory to use for browser data (will be created if it doesn't exist)")
@click.option("--port", "-P", type=int, default=9222, help="Debugging port (default: 9222)")
@click.option("--browser-type", "-b", type=click.Choice(["chromium", "firefox"]), default="chromium", help="Browser type (default: chromium)")
@click.option("--headless", is_flag=True, help="Run browser in headless mode")
@click.option("--incognito", is_flag=True, help="Run in incognito/private mode (ignores user-data-dir)")
def cdp_cmd(user_data_dir: Optional[str], port: int, browser_type: str, headless: bool, incognito: bool): # -> None:
"""Launch a standalone browser with CDP debugging enabled
This command launches a browser with Chrome DevTools Protocol (CDP) debugging enabled,
prints the CDP URL, and keeps the browser running until you press 'q'.
The CDP URL can be used for various automation and debugging tasks.
Examples:
# Launch Chromium with CDP on default port 9222
crwl cdp
# Use a specific directory for browser data and custom port
crwl cdp --user-data-dir ~/browser-data --port 9223
# Launch in headless mode
crwl cdp --headless
# Launch in incognito mode (ignores user-data-dir)
crwl cdp --incognito
"""
...
@cli.command("crawl")
@click.argument("url", required=True)
@click.option("--browser-config", "-B", type=click.Path(exists=True), help="Browser config file (YAML/JSON)")
@click.option("--crawler-config", "-C", type=click.Path(exists=True), help="Crawler config file (YAML/JSON)")
@click.option("--filter-config", "-f", type=click.Path(exists=True), help="Content filter config file")
@click.option("--extraction-config", "-e", type=click.Path(exists=True), help="Extraction strategy config file")
@click.option("--json-extract", "-j", is_flag=False, flag_value="", default=None, help="Extract structured data using LLM with optional description")
@click.option("--schema", "-s", type=click.Path(exists=True), help="JSON schema for extraction")
@click.option("--browser", "-b", type=str, callback=parse_key_values, help="Browser parameters as key1=value1,key2=value2")
@click.option("--crawler", "-c", type=str, callback=parse_key_values, help="Crawler parameters as key1=value1,key2=value2")
@click.option("--output", "-o", type=click.Choice(["all", "json", "markdown", "md", "markdown-fit", "md-fit"]), default="all")
@click.option("--output-file", "-O", type=click.Path(), help="Output file path (default: stdout)")
@click.option("--bypass-cache", "-bc", is_flag=True, default=True, help="Bypass cache when crawling")
@click.option("--question", "-q", help="Ask a question about the crawled content")
@click.option("--verbose", "-v", is_flag=True)
@click.option("--profile", "-p", help="Use a specific browser profile (by name)")
@click.option("--deep-crawl", type=click.Choice(["bfs", "dfs", "best-first"]), help="Enable deep crawling with specified strategy (bfs, dfs, or best-first)")
@click.option("--max-pages", type=int, default=10, help="Maximum number of pages to crawl in deep crawl mode")
def crawl_cmd(url: str, browser_config: str, crawler_config: str, filter_config: str, extraction_config: str, json_extract: str, schema: str, browser: Dict, crawler: Dict, output: str, output_file: str, bypass_cache: bool, question: str, verbose: bool, profile: str, deep_crawl: str, max_pages: int): # -> None:
"""Crawl a website and extract content
Simple Usage:
crwl crawl https://example.com
"""
...
@cli.command("examples")
def examples_cmd(): # -> None:
"""Show usage examples"""
...
@cli.group("config")
def config_cmd(): # -> None:
"""Manage global configuration settings
Commands to view and update global configuration settings:
- list: Display all current configuration settings
- get: Get the value of a specific setting
- set: Set the value of a specific setting
"""
...
@config_cmd.command("list")
def config_list_cmd(): # -> None:
"""List all configuration settings"""
...
@config_cmd.command("get")
@click.argument("key", required=True)
def config_get_cmd(key: str): # -> None:
"""Get a specific configuration setting"""
...
@config_cmd.command("set")
@click.argument("key", required=True)
@click.argument("value", required=True)
def config_set_cmd(key: str, value: str): # -> None:
"""Set a configuration setting"""
...
@cli.command("profiles")
def profiles_cmd(): # -> None:
"""Manage browser profiles interactively
Launch an interactive browser profile manager where you can:
- List all existing profiles
- Create new profiles for authenticated browsing
- Delete unused profiles
"""
...
@cli.command(name="")
@click.argument("url", required=False)
@click.option("--example", is_flag=True, help="Show usage examples")
@click.option("--browser-config", "-B", type=click.Path(exists=True), help="Browser config file (YAML/JSON)")
@click.option("--crawler-config", "-C", type=click.Path(exists=True), help="Crawler config file (YAML/JSON)")
@click.option("--filter-config", "-f", type=click.Path(exists=True), help="Content filter config file")
@click.option("--extraction-config", "-e", type=click.Path(exists=True), help="Extraction strategy config file")
@click.option("--json-extract", "-j", is_flag=False, flag_value="", default=None, help="Extract structured data using LLM with optional description")
@click.option("--schema", "-s", type=click.Path(exists=True), help="JSON schema for extraction")
@click.option("--browser", "-b", type=str, callback=parse_key_values, help="Browser parameters as key1=value1,key2=value2")
@click.option("--crawler", "-c", type=str, callback=parse_key_values, help="Crawler parameters as key1=value1,key2=value2")
@click.option("--output", "-o", type=click.Choice(["all", "json", "markdown", "md", "markdown-fit", "md-fit"]), default="all")
@click.option("--bypass-cache", is_flag=True, default=True, help="Bypass cache when crawling")
@click.option("--question", "-q", help="Ask a question about the crawled content")
@click.option("--verbose", "-v", is_flag=True)
@click.option("--profile", "-p", help="Use a specific browser profile (by name)")
@click.option("--deep-crawl", type=click.Choice(["bfs", "dfs", "best-first"]), help="Enable deep crawling with specified strategy")
@click.option("--max-pages", type=int, default=10, help="Maximum number of pages to crawl in deep crawl mode")
def default(url: str, example: bool, browser_config: str, crawler_config: str, filter_config: str, extraction_config: str, json_extract: str, schema: str, browser: Dict, crawler: Dict, output: str, bypass_cache: bool, question: str, verbose: bool, profile: str, deep_crawl: str, max_pages: int): # -> None:
"""Crawl4AI CLI - Web content extraction tool
Simple Usage:
crwl https://example.com
Run with --example to see detailed usage examples.
Other commands:
crwl profiles - Manage browser profiles for identity-based crawling
crwl crawl - Crawl a website with advanced options
crwl cdp - Launch browser with CDP debugging enabled
crwl browser - Manage builtin browser (start, stop, status, restart)
crwl config - Manage global configuration settings
crwl examples - Show more usage examples
Configuration Examples:
crwl config list - List all configuration settings
crwl config get DEFAULT_LLM_PROVIDER - Show current LLM provider
crwl config set VERBOSE true - Enable verbose mode globally
crwl config set BROWSER_HEADLESS false - Default to visible browser
"""
...
def main(): # -> None:
...
if __name__ == "__main__":
...