"""
This type stub file was generated by pyright.
"""
import subprocess
from typing import List, Optional
from playwright.async_api import BrowserContext
from .async_configs import BrowserConfig, CrawlerRunConfig
BROWSER_DISABLE_OPTIONS = ...
class ManagedBrowser:
"""
Manages the browser process and context. This class allows to connect to the browser using CDP protocol.
Attributes:
browser_type (str): The type of browser to launch. Supported values: "chromium", "firefox", "webkit".
Default: "chromium".
user_data_dir (str or None): Path to a user data directory for persistent sessions. If None, a
temporary directory may be used. Default: None.
headless (bool): Whether to run the browser in headless mode (no visible GUI).
Default: True.
browser_process (subprocess.Popen): The process object for the browser.
temp_dir (str): Temporary directory for user data if not provided.
debugging_port (int): Port for debugging the browser.
host (str): Host for debugging the browser.
Methods:
start(): Starts the browser process and returns the CDP endpoint URL.
_get_browser_path(): Returns the browser executable path based on OS and browser type.
_get_browser_args(): Returns browser-specific command line arguments.
_get_user_data_dir(): Returns the user data directory path.
_cleanup(): Terminates the browser process and removes the temporary directory.
create_profile(): Static method to create a user profile by launching a browser for user interaction.
"""
@staticmethod
def build_browser_flags(config: BrowserConfig) -> List[str]:
"""Common CLI flags for launching Chromium"""
...
browser_type: str
user_data_dir: str
headless: bool
browser_process: subprocess.Popen
temp_dir: str
debugging_port: int
host: str
def __init__(self, browser_type: str = ..., user_data_dir: Optional[str] = ..., headless: bool = ..., logger=..., host: str = ..., debugging_port: int = ..., cdp_url: Optional[str] = ..., browser_config: Optional[BrowserConfig] = ...) -> None:
"""
Initialize the ManagedBrowser instance.
Args:
browser_type (str): The type of browser to launch. Supported values: "chromium", "firefox", "webkit".
Default: "chromium".
user_data_dir (str or None): Path to a user data directory for persistent sessions. If None, a
temporary directory may be used. Default: None.
headless (bool): Whether to run the browser in headless mode (no visible GUI).
Default: True.
logger (logging.Logger): Logger instance for logging messages. Default: None.
host (str): Host for debugging the browser. Default: "localhost".
debugging_port (int): Port for debugging the browser. Default: 9222.
cdp_url (str or None): CDP URL to connect to the browser. Default: None.
browser_config (BrowserConfig): Configuration object containing all browser settings. Default: None.
"""
...
async def start(self) -> str:
"""
Starts the browser process or returns CDP endpoint URL.
If cdp_url is provided, returns it directly.
If user_data_dir is not provided for local browser, creates a temporary directory.
Returns:
str: CDP endpoint URL
"""
...
async def cleanup(self): # -> None:
"""Cleanup browser process and temporary directory"""
...
@staticmethod
async def create_profile(browser_config=..., profile_name=..., logger=...): # -> str | None:
"""
This method has been moved to the BrowserProfiler class.
Creates a browser profile by launching a browser for interactive user setup
and waits until the user closes it. The profile is stored in a directory that
can be used later with BrowserConfig.user_data_dir.
Please use BrowserProfiler.create_profile() instead.
Example:
```python
from crawl4ai.browser_profiler import BrowserProfiler
profiler = BrowserProfiler()
profile_path = await profiler.create_profile(profile_name="my-login-profile")
```
"""
...
@staticmethod
def list_profiles(): # -> List[Dict[str, Any]]:
"""
This method has been moved to the BrowserProfiler class.
Lists all available browser profiles in the Crawl4AI profiles directory.
Please use BrowserProfiler.list_profiles() instead.
Example:
```python
from crawl4ai.browser_profiler import BrowserProfiler
profiler = BrowserProfiler()
profiles = profiler.list_profiles()
```
"""
...
@staticmethod
def delete_profile(profile_name_or_path): # -> bool:
"""
This method has been moved to the BrowserProfiler class.
Delete a browser profile by name or path.
Please use BrowserProfiler.delete_profile() instead.
Example:
```python
from crawl4ai.browser_profiler import BrowserProfiler
profiler = BrowserProfiler()
success = profiler.delete_profile("my-profile")
```
"""
...
async def clone_runtime_state(src: BrowserContext, dst: BrowserContext, crawlerRunConfig: CrawlerRunConfig | None = ..., browserConfig: BrowserConfig | None = ...) -> None:
"""
Bring everything that *can* be changed at runtime from `src` → `dst`.
1. Cookies
2. localStorage (and sessionStorage, same API)
3. Extra headers, permissions, geolocation if supplied in configs
"""
...
class BrowserManager:
"""
Manages the browser instance and context.
Attributes:
config (BrowserConfig): Configuration object containing all browser settings
logger: Logger instance for recording events and errors
browser (Browser): The browser instance
default_context (BrowserContext): The default browser context
managed_browser (ManagedBrowser): The managed browser instance
playwright (Playwright): The Playwright instance
sessions (dict): Dictionary to store session information
session_ttl (int): Session timeout in seconds
"""
_playwright_instance = ...
@classmethod
async def get_playwright(cls, use_undetected: bool = ...): # -> patchright.async_api._generated.Playwright | playwright.async_api._generated.Playwright:
...
def __init__(self, browser_config: BrowserConfig, logger=..., use_undetected: bool = ...) -> None:
"""
Initialize the BrowserManager with a browser configuration.
Args:
browser_config (BrowserConfig): Configuration object containing all browser settings
logger: Logger instance for recording events and errors
use_undetected (bool): Whether to use undetected browser (Patchright)
"""
...
async def start(self): # -> None:
"""
Start the browser instance and set up the default context.
How it works:
1. Check if Playwright is already initialized.
2. If not, initialize Playwright.
3. If managed browser is used, start it and connect to the CDP endpoint.
4. If managed browser is not used, launch the browser and set up the default context.
Note: This method should be called in a separate task to avoid blocking the main event loop.
"""
...
async def setup_context(self, context: BrowserContext, crawlerRunConfig: CrawlerRunConfig = ..., is_default=...): # -> None:
"""
Set up a browser context with the configured options.
How it works:
1. Set extra HTTP headers if provided.
2. Add cookies if provided.
3. Load storage state if provided.
4. Accept downloads if enabled.
5. Set default timeouts for navigation and download.
6. Set user agent if provided.
7. Set browser hints if provided.
8. Set proxy if provided.
9. Set downloads path if provided.
10. Set storage state if provided.
11. Set cache if provided.
12. Set extra HTTP headers if provided.
13. Add cookies if provided.
14. Set default timeouts for navigation and download if enabled.
15. Set user agent if provided.
16. Set browser hints if provided.
Args:
context (BrowserContext): The browser context to set up
crawlerRunConfig (CrawlerRunConfig): Configuration object containing all browser settings
is_default (bool): Flag indicating if this is the default context
Returns:
None
"""
...
async def create_browser_context(self, crawlerRunConfig: CrawlerRunConfig = ...): # -> patchright.async_api._generated.BrowserContext | playwright.async_api._generated.BrowserContext:
"""
Creates and returns a new browser context with configured settings.
Applies text-only mode settings if text_mode is enabled in config.
Returns:
Context: Browser context object with the specified configurations
"""
...
async def get_page(self, crawlerRunConfig: CrawlerRunConfig): # -> tuple[Any, Any] | tuple[Any | patchright.async_api._generated.Page | playwright.async_api._generated.Page, patchright.async_api._generated.BrowserContext | playwright.async_api._generated.BrowserContext | patchright.async_api._generated.Browser | playwright.async_api._generated.Browser | Any | None]:
"""
Get a page for the given session ID, creating a new one if needed.
Args:
crawlerRunConfig (CrawlerRunConfig): Configuration object containing all browser settings
Returns:
(page, context): The Page and its BrowserContext
"""
...
async def kill_session(self, session_id: str): # -> None:
"""
Kill a browser session and clean up resources.
Args:
session_id (str): The session ID to kill.
"""
...
async def close(self): # -> None:
"""Close all browser resources and clean up."""
...