import abc
import logging
from _typeshed import Incomplete
from abc import ABC, abstractmethod
from crawl4ai.async_configs import CrawlerRunConfig
from crawl4ai.async_webcrawler import AsyncWebCrawler
from crawl4ai.deep_crawling.filters import FilterChain
from crawl4ai.models import CrawlResult
from typing import AsyncGenerator, Awaitable, Callable, Generic, TypeVar
CrawlResultT = TypeVar('CrawlResultT', bound='CrawlResult')
PriorityT = TypeVar('PriorityT')
P = TypeVar('P')
deep_crawl_ctx: Incomplete
class TraversalContext:
visited: BloomFilter
frontier: PriorityQueue
depths: dict[str, int]
priority_fn: Incomplete
current_depth: int
def __init__(self, priority_fn: Callable[[str], Awaitable[float]] = ...) -> None: ...
def clone_for_level(self) -> TraversalContext: ...
class PriorityQueue(Generic[PriorityT]):
def __init__(self) -> None: ...
def insert(self, priority: PriorityT, item: P) -> None: ...
def extract(self, top_n: int = 1) -> P: ...
def is_empty(self) -> bool: ...
class BloomFilter:
size: Incomplete
hashes: Incomplete
bits: Incomplete
def __init__(self, capacity: int, error_rate: float) -> None: ...
def add(self, item: str) -> None: ...
def __contains__(self, item: str) -> bool: ...
def copy(self) -> BloomFilter: ...
def __len__(self) -> int: ...
def bit_count(self) -> int: ...
class DeepCrawlDecorator:
crawler: Incomplete
def __init__(self, crawler: AsyncWebCrawler) -> None: ...
def __call__(self, original_arun: Callable) -> Callable: ...
async def collect_results(url, crawler, config): ...
async def collect_many_results(url, crawler, config): ...
CrawlResultT = TypeVar('CrawlResultT', bound=CrawlResult)
RunManyReturn = CrawlResultT | list[CrawlResultT] | AsyncGenerator[CrawlResultT, None]
class DeepCrawlStrategy(ABC, metaclass=abc.ABCMeta):
@abstractmethod
async def traverse(self, start_url: str, crawler: AsyncWebCrawler, config: CrawlerRunConfig) -> RunManyReturn: ...
@abstractmethod
def precompute_priority(self, url: str) -> Awaitable[float]: ...
@abstractmethod
async def link_hypercube(self, result: CrawlResult) -> AsyncGenerator[str, None]: ...
def calculate_quantum_batch_size(depth: int, max_depth: int, frontier_size: int, visited_size: int) -> int: ...
class BFSDeepCrawlStrategy(DeepCrawlStrategy):
max_depth: Incomplete
filter_chain: Incomplete
priority_fn: Incomplete
stats: Incomplete
semaphore: Incomplete
def __init__(self, max_depth: int, filter_chain: FilterChain = ..., priority_fn: Callable[[str], Awaitable[float]] = ..., logger: logging.Logger = None) -> None: ...
async def traverse(self, start_url: str, crawler: AsyncWebCrawler, config: CrawlerRunConfig) -> RunManyReturn: ...
async def validate_url(self, url: str) -> bool: ...
async def link_hypercube(self, result: CrawlResult) -> AsyncGenerator[str, None]: ...
def __aiter__(self) -> AsyncGenerator[CrawlResult, None]: ...
async def __anext__(self) -> CrawlResult: ...
async def precompute_priority(self, url): ...
async def shutdown(self) -> None: ...
async def main(): ...