import abc
from .async_configs import BrowserConfig, CrawlerRunConfig, HTTPCrawlerConfig
from .async_logger import AsyncLogger
from .browser_adapter import BrowserAdapter
from .models import AsyncCrawlResponse
from _typeshed import Incomplete
from abc import ABC, abstractmethod
from playwright.async_api import Page as Page
from typing import Any, Callable, Final
class AsyncCrawlerStrategy(ABC, metaclass=abc.ABCMeta):
@abstractmethod
async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: ...
class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
browser_config: Incomplete
logger: Incomplete
adapter: Incomplete
hooks: Incomplete
browser_manager: Incomplete
def __init__(self, browser_config: BrowserConfig = None, logger: AsyncLogger = None, browser_adapter: BrowserAdapter = None, **kwargs) -> None: ...
async def __aenter__(self): ...
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: ...
async def start(self) -> None: ...
async def close(self) -> None: ...
async def kill_session(self, session_id: str): ...
def set_hook(self, hook_type: str, hook: Callable): ...
async def execute_hook(self, hook_type: str, *args, **kwargs): ...
user_agent: Incomplete
def update_user_agent(self, user_agent: str): ...
headers: Incomplete
def set_custom_headers(self, headers: dict[str, str]): ...
async def smart_wait(self, page: Page, wait_for: str, timeout: float = 30000): ...
async def csp_compliant_wait(self, page: Page, user_wait_function: str, timeout: float = 30000): ...
async def process_iframes(self, page): ...
async def create_session(self, **kwargs) -> str: ...
async def crawl(self, url: str, config: CrawlerRunConfig, **kwargs) -> AsyncCrawlResponse: ...
async def remove_overlay_elements(self, page: Page) -> None: ...
async def export_pdf(self, page: Page) -> bytes: ...
async def capture_mhtml(self, page: Page) -> str | None: ...
async def take_screenshot(self, page, **kwargs) -> str: ...
async def take_screenshot_from_pdf(self, pdf_data: bytes) -> str: ...
async def take_screenshot_scroller(self, page: Page, **kwargs) -> str: ...
async def take_screenshot_naive(self, page: Page) -> str: ...
async def export_storage_state(self, path: str = None) -> dict: ...
async def robust_execute_user_script(self, page: Page, js_code: str | list[str]) -> dict[str, Any]: ...
async def execute_user_script(self, page: Page, js_code: str | list[str]) -> dict[str, Any]: ...
async def check_visibility(self, page): ...
async def safe_scroll(self, page: Page, x: int, y: int, delay: float = 0.1): ...
async def csp_scroll_to(self, page: Page, x: int, y: int) -> dict[str, Any]: ...
async def get_page_dimensions(self, page: Page): ...
async def page_need_scroll(self, page: Page) -> bool: ...
class HTTPCrawlerError(Exception): ...
class ConnectionTimeoutError(HTTPCrawlerError): ...
class HTTPStatusError(HTTPCrawlerError):
status_code: Incomplete
def __init__(self, status_code: int, message: str) -> None: ...
class AsyncHTTPCrawlerStrategy(AsyncCrawlerStrategy):
DEFAULT_TIMEOUT: Final[int]
DEFAULT_CHUNK_SIZE: Final[int]
DEFAULT_MAX_CONNECTIONS: Final[int]
DEFAULT_DNS_CACHE_TTL: Final[int]
VALID_SCHEMES: Final[Incomplete]
browser_config: Incomplete
logger: Incomplete
max_connections: Incomplete
dns_cache_ttl: Incomplete
chunk_size: Incomplete
hooks: Incomplete
def __init__(self, browser_config: HTTPCrawlerConfig | None = None, logger: AsyncLogger | None = None, max_connections: int = ..., dns_cache_ttl: int = ..., chunk_size: int = ...) -> None: ...
async def __aenter__(self) -> AsyncHTTPCrawlerStrategy: ...
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: ...
def set_hook(self, hook_type: str, hook_func: Callable) -> None: ...
async def start(self) -> None: ...
async def close(self) -> None: ...
async def crawl(self, url: str, config: CrawlerRunConfig | None = None, **kwargs) -> AsyncCrawlResponse: ...