"""
This type stub file was generated by pyright.
"""
import asyncio
from typing import List, Optional, Tuple, Union
from .async_configs import CrawlerRunConfig
from .models import CrawlerTaskResult
from .components.crawler_monitor import CrawlerMonitor
from .types import AsyncWebCrawler
from collections.abc import AsyncGenerator
from abc import ABC, abstractmethod
class RateLimiter:
def __init__(self, base_delay: Tuple[float, float] = ..., max_delay: float = ..., max_retries: int = ..., rate_limit_codes: List[int] = ...) -> None:
...
def get_domain(self, url: str) -> str:
...
async def wait_if_needed(self, url: str) -> None:
...
def update_delay(self, url: str, status_code: int) -> bool:
...
class BaseDispatcher(ABC):
def __init__(self, rate_limiter: Optional[RateLimiter] = ..., monitor: Optional[CrawlerMonitor] = ...) -> None:
...
def select_config(self, url: str, configs: Union[CrawlerRunConfig, List[CrawlerRunConfig]]) -> Optional[CrawlerRunConfig]:
"""Select the appropriate config for a given URL.
Args:
url: The URL to match against
configs: Single config or list of configs to choose from
Returns:
The matching config, or None if no match found
"""
...
@abstractmethod
async def crawl_url(self, url: str, config: Union[CrawlerRunConfig, List[CrawlerRunConfig]], task_id: str, monitor: Optional[CrawlerMonitor] = ...) -> CrawlerTaskResult:
...
@abstractmethod
async def run_urls(self, urls: List[str], crawler: AsyncWebCrawler, config: Union[CrawlerRunConfig, List[CrawlerRunConfig]], monitor: Optional[CrawlerMonitor] = ...) -> List[CrawlerTaskResult]:
...
class MemoryAdaptiveDispatcher(BaseDispatcher):
def __init__(self, memory_threshold_percent: float = ..., critical_threshold_percent: float = ..., recovery_threshold_percent: float = ..., check_interval: float = ..., max_session_permit: int = ..., fairness_timeout: float = ..., memory_wait_timeout: Optional[float] = ..., rate_limiter: Optional[RateLimiter] = ..., monitor: Optional[CrawlerMonitor] = ...) -> None:
...
async def crawl_url(self, url: str, config: Union[CrawlerRunConfig, List[CrawlerRunConfig]], task_id: str, retry_count: int = ...) -> CrawlerTaskResult:
...
async def run_urls(self, urls: List[str], crawler: AsyncWebCrawler, config: Union[CrawlerRunConfig, List[CrawlerRunConfig]]) -> List[CrawlerTaskResult]:
...
async def run_urls_stream(self, urls: List[str], crawler: AsyncWebCrawler, config: Union[CrawlerRunConfig, List[CrawlerRunConfig]]) -> AsyncGenerator[CrawlerTaskResult, None]:
...
class SemaphoreDispatcher(BaseDispatcher):
def __init__(self, semaphore_count: int = ..., max_session_permit: int = ..., rate_limiter: Optional[RateLimiter] = ..., monitor: Optional[CrawlerMonitor] = ...) -> None:
...
async def crawl_url(self, url: str, config: Union[CrawlerRunConfig, List[CrawlerRunConfig]], task_id: str, semaphore: asyncio.Semaphore = ...) -> CrawlerTaskResult:
...
async def run_urls(self, crawler: AsyncWebCrawler, urls: List[str], config: Union[CrawlerRunConfig, List[CrawlerRunConfig]]) -> List[CrawlerTaskResult]:
...