import abc
from _typeshed import Incomplete
from abc import ABC, abstractmethod
from collections import Counter as Counter
from crawl4ai.async_configs import LLMConfig
from crawl4ai.async_webcrawler import AsyncWebCrawler
from crawl4ai.models import CrawlResult, Link
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class CrawlState:
crawled_urls: set[str] = field(default_factory=set)
knowledge_base: list[CrawlResult] = field(default_factory=list)
pending_links: list[Link] = field(default_factory=list)
query: str = ...
metrics: dict[str, float] = field(default_factory=dict)
term_frequencies: dict[str, int] = field(default_factory=Incomplete)
document_frequencies: dict[str, int] = field(default_factory=Incomplete)
documents_with_terms: dict[str, set[int]] = field(default_factory=Incomplete)
total_documents: int = ...
new_terms_history: list[int] = field(default_factory=list)
crawl_order: list[str] = field(default_factory=list)
kb_embeddings: Any | None = ...
query_embeddings: Any | None = ...
expanded_queries: list[str] = field(default_factory=list)
coverage_shape: Any | None = ...
semantic_gaps: list[tuple[list[float], float]] = field(default_factory=list)
embedding_model: str = ...
def save(self, path: str | Path): ...
@classmethod
def load(cls, path: str | Path) -> CrawlState: ...
@dataclass
class AdaptiveConfig:
confidence_threshold: float = ...
max_depth: int = ...
max_pages: int = ...
top_k_links: int = ...
min_gain_threshold: float = ...
strategy: str = ...
saturation_threshold: float = ...
consistency_threshold: float = ...
coverage_weight: float = ...
consistency_weight: float = ...
saturation_weight: float = ...
relevance_weight: float = ...
novelty_weight: float = ...
authority_weight: float = ...
save_state: bool = ...
state_path: str | None = ...
embedding_model: str = ...
embedding_llm_config: LLMConfig | dict | None = ...
n_query_variations: int = ...
coverage_threshold: float = ...
alpha_shape_alpha: float = ...
embedding_min_confidence_threshold: float = ...
embedding_coverage_radius: float = ...
embedding_k_exp: float = ...
embedding_nearest_weight: float = ...
embedding_top_k_weight: float = ...
embedding_overlap_threshold: float = ...
embedding_min_relative_improvement: float = ...
embedding_validation_min_score: float = ...
embedding_quality_min_confidence: float = ...
embedding_quality_max_confidence: float = ...
embedding_quality_scale_factor: float = ...
def validate(self) -> None: ...
class CrawlStrategy(ABC, metaclass=abc.ABCMeta):
@abstractmethod
async def calculate_confidence(self, state: CrawlState) -> float: ...
@abstractmethod
async def rank_links(self, state: CrawlState, config: AdaptiveConfig) -> list[tuple[Link, float]]: ...
@abstractmethod
async def should_stop(self, state: CrawlState, config: AdaptiveConfig) -> bool: ...
@abstractmethod
async def update_state(self, state: CrawlState, new_results: list[CrawlResult]) -> None: ...
class StatisticalStrategy(CrawlStrategy):
idf_cache: Incomplete
bm25_k1: float
bm25_b: float
def __init__(self) -> None: ...
async def calculate_confidence(self, state: CrawlState) -> float: ...
async def rank_links(self, state: CrawlState, config: AdaptiveConfig) -> list[tuple[Link, float]]: ...
async def should_stop(self, state: CrawlState, config: AdaptiveConfig) -> bool: ...
async def update_state(self, state: CrawlState, new_results: list[CrawlResult]) -> None: ...
class EmbeddingStrategy(CrawlStrategy):
embedding_model: Incomplete
llm_config: Incomplete
def __init__(self, embedding_model: str = None, llm_config: LLMConfig | dict = None) -> None: ...
async def map_query_semantic_space(self, query: str, n_synthetic: int = 10) -> Any: ...
def compute_coverage_shape(self, query_points: Any, alpha: float = 0.5): ...
def find_coverage_gaps(self, kb_embeddings: Any, query_embeddings: Any) -> list[tuple[Any, float]]: ...
async def select_links_for_expansion(self, candidate_links: list[Link], gaps: list[tuple[Any, float]], kb_embeddings: Any) -> list[tuple[Link, float]]: ...
async def calculate_confidence(self, state: CrawlState) -> float: ...
config: Incomplete
async def rank_links(self, state: CrawlState, config: AdaptiveConfig) -> list[tuple[Link, float]]: ...
async def validate_coverage(self, state: CrawlState) -> float: ...
async def should_stop(self, state: CrawlState, config: AdaptiveConfig) -> bool: ...
def get_quality_confidence(self, state: CrawlState) -> float: ...
async def update_state(self, state: CrawlState, new_results: list[CrawlResult]) -> None: ...
class AdaptiveCrawler:
crawler: Incomplete
config: Incomplete
strategy: Incomplete
state: CrawlState | None
def __init__(self, crawler: AsyncWebCrawler | None = None, config: AdaptiveConfig | None = None, strategy: CrawlStrategy | None = None) -> None: ...
async def digest(self, start_url: str, query: str, resume_from: str | None = None) -> CrawlState: ...
@property
def confidence(self) -> float: ...
@property
def coverage_stats(self) -> dict[str, Any]: ...
@property
def is_sufficient(self) -> bool: ...
def print_stats(self, detailed: bool = False) -> None: ...
def export_knowledge_base(self, filepath: str | Path, format: str = 'jsonl') -> None: ...
def import_knowledge_base(self, filepath: str | Path, format: str = 'jsonl') -> None: ...
def get_relevant_content(self, top_k: int = 5) -> list[dict[str, Any]]: ...