"""
This type stub file was generated by pyright.
"""
from typing import Dict, List, Optional, Set, Tuple
from ..models import CrawlResult
from .bfs_strategy import BFSDeepCrawlStrategy
class DFSDeepCrawlStrategy(BFSDeepCrawlStrategy):
"""
Depth-first deep crawling with familiar BFS rules.
We reuse the same filters, scoring, and page limits from :class:`BFSDeepCrawlStrategy`,
but walk the graph with a stack so we fully explore one branch before hopping to the
next. DFS also keeps its own ``_dfs_seen`` set so we can drop duplicate links at
discovery time without accidentally marking them as “already crawled”.
"""
def __init__(self, *args, **kwargs) -> None:
...
async def link_discovery(self, result: CrawlResult, source_url: str, current_depth: int, _visited: Set[str], next_level: List[Tuple[str, Optional[str]]], depths: Dict[str, int]) -> None:
"""
Find the next URLs we should push onto the DFS stack.
Parameters
----------
result : CrawlResult
Output of the page we just crawled; its ``links`` block is our raw material.
source_url : str
URL of the parent page; stored so callers can track ancestry.
current_depth : int
Depth of the parent; children naturally sit at ``current_depth + 1``.
_visited : Set[str]
Present to match the BFS signature, but we rely on ``_dfs_seen`` instead.
next_level : list of tuples
The stack buffer supplied by the caller; we append new ``(url, parent)`` items here.
depths : dict
Shared depth map so future metadata tagging knows how deep each URL lives.
Notes
-----
- ``_dfs_seen`` keeps us from pushing duplicates without touching the traversal guard.
- Validation, scoring, and capacity trimming mirror the BFS version so behaviour stays consistent.
"""
...