"""
This type stub file was generated by pyright.
"""
import logging
from typing import Dict, List, Optional, Set, Tuple
from .filters import FilterChain
from .scorers import URLScorer
from . import DeepCrawlStrategy
from ..types import CrawlResult
class BFSDeepCrawlStrategy(DeepCrawlStrategy):
"""
Breadth-First Search deep crawling strategy.
Core functions:
- arun: Main entry point; splits execution into batch or stream modes.
- link_discovery: Extracts, filters, and (if needed) scores the outgoing URLs.
- can_process_url: Validates URL format and applies the filter chain.
"""
def __init__(self, max_depth: int, filter_chain: FilterChain = ..., url_scorer: Optional[URLScorer] = ..., include_external: bool = ..., score_threshold: float = ..., max_pages: int = ..., logger: Optional[logging.Logger] = ...) -> None:
...
async def can_process_url(self, url: str, depth: int) -> bool:
"""
Validates the URL and applies the filter chain.
For the start URL (depth 0) filtering is bypassed.
"""
...
async def link_discovery(self, result: CrawlResult, source_url: str, current_depth: int, visited: Set[str], next_level: List[Tuple[str, Optional[str]]], depths: Dict[str, int]) -> None:
"""
Extracts links from the crawl result, validates and scores them, and
prepares the next level of URLs.
Each valid URL is appended to next_level as a tuple (url, parent_url)
and its depth is tracked.
"""
...
async def shutdown(self) -> None:
"""
Clean up resources and signal cancellation of the crawl.
"""
...