"""
This type stub file was generated by pyright.
"""
from pydantic import BaseModel, HttpUrl
from typing import Any, AsyncGenerator, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union
from enum import Enum
from dataclasses import dataclass
from .ssl_certificate import SSLCertificate
from datetime import datetime
@dataclass
class DomainState:
last_request_time: float = ...
current_delay: float = ...
fail_count: int = ...
@dataclass
class CrawlerTaskResult:
task_id: str
url: str
result: CrawlResult
memory_usage: float
peak_memory: float
start_time: Union[datetime, float]
end_time: Union[datetime, float]
error_message: str = ...
retry_count: int = ...
wait_time: float = ...
@property
def success(self) -> bool:
...
class CrawlStatus(Enum):
QUEUED = ...
IN_PROGRESS = ...
COMPLETED = ...
FAILED = ...
@dataclass
class CrawlStats:
task_id: str
url: str
status: CrawlStatus
start_time: Optional[Union[datetime, float]] = ...
end_time: Optional[Union[datetime, float]] = ...
memory_usage: float = ...
peak_memory: float = ...
error_message: str = ...
wait_time: float = ...
retry_count: int = ...
counted_requeue: bool = ...
@property
def duration(self) -> str:
...
class DisplayMode(Enum):
DETAILED = ...
AGGREGATED = ...
@dataclass
class TokenUsage:
completion_tokens: int = ...
prompt_tokens: int = ...
total_tokens: int = ...
completion_tokens_details: Optional[dict] = ...
prompt_tokens_details: Optional[dict] = ...
class UrlModel(BaseModel):
url: HttpUrl
forced: bool = ...
@dataclass
class TraversalStats:
"""Statistics for the traversal process"""
start_time: datetime = ...
urls_processed: int = ...
urls_failed: int = ...
urls_skipped: int = ...
total_depth_reached: int = ...
current_depth: int = ...
class DispatchResult(BaseModel):
task_id: str
memory_usage: float
peak_memory: float
start_time: Union[datetime, float]
end_time: Union[datetime, float]
error_message: str = ...
class MarkdownGenerationResult(BaseModel):
raw_markdown: str
markdown_with_citations: str
references_markdown: str
fit_markdown: Optional[str] = ...
fit_html: Optional[str] = ...
def __str__(self) -> str:
...
class CrawlResult(BaseModel):
url: str
html: str
fit_html: Optional[str] = ...
success: bool
cleaned_html: Optional[str] = ...
media: Dict[str, List[Dict]] = ...
links: Dict[str, List[Dict]] = ...
downloaded_files: Optional[List[str]] = ...
js_execution_result: Optional[Dict[str, Any]] = ...
screenshot: Optional[str] = ...
pdf: Optional[bytes] = ...
mhtml: Optional[str] = ...
_markdown: Optional[MarkdownGenerationResult] = ...
extracted_content: Optional[str] = ...
metadata: Optional[dict] = ...
error_message: Optional[str] = ...
session_id: Optional[str] = ...
response_headers: Optional[dict] = ...
status_code: Optional[int] = ...
ssl_certificate: Optional[SSLCertificate] = ...
dispatch_result: Optional[DispatchResult] = ...
redirected_url: Optional[str] = ...
network_requests: Optional[List[Dict[str, Any]]] = ...
console_messages: Optional[List[Dict[str, Any]]] = ...
tables: List[Dict] = ...
class Config:
arbitrary_types_allowed = ...
def __init__(self, **data) -> None:
...
@property
def markdown(self): # -> StringCompatibleMarkdown | None:
"""
Property that returns a StringCompatibleMarkdown object that behaves like
a string but also provides access to MarkdownGenerationResult attributes.
This approach allows backward compatibility with code that expects 'markdown'
to be a string, while providing access to the full MarkdownGenerationResult.
"""
...
@markdown.setter
def markdown(self, value): # -> None:
"""
Setter for the markdown property.
"""
...
@property
def markdown_v2(self):
"""
Deprecated property that raises an AttributeError when accessed.
This property exists to inform users that 'markdown_v2' has been
deprecated and they should use 'markdown' instead.
"""
...
@property
def fit_markdown(self):
"""
Deprecated property that raises an AttributeError when accessed.
"""
...
@property
def fit_html(self):
"""
Deprecated property that raises an AttributeError when accessed.
"""
...
def model_dump(self, *args, **kwargs): # -> dict[str, Any]:
"""
Override model_dump to include the _markdown private attribute in serialization.
This override is necessary because:
1. PrivateAttr fields are excluded from serialization by default
2. We need to maintain backward compatibility by including the 'markdown' field
in the serialized output
3. We're transitioning from 'markdown_v2' to enhancing 'markdown' to hold
the same type of data
Future developers: This method ensures that the markdown content is properly
serialized despite being stored in a private attribute. If the serialization
requirements change, this is where you would update the logic.
"""
...
class StringCompatibleMarkdown(str):
"""A string subclass that also provides access to MarkdownGenerationResult attributes"""
def __new__(cls, markdown_result): # -> Self:
...
def __init__(self, markdown_result) -> None:
...
def __getattr__(self, name): # -> Any:
...
CrawlResultT = TypeVar('CrawlResultT', bound=CrawlResult)
class CrawlResultContainer(Generic[CrawlResultT]):
def __init__(self, results: Union[CrawlResultT, List[CrawlResultT]]) -> None:
...
def __iter__(self): # -> Iterator[CrawlResultT]:
...
def __getitem__(self, index):
...
def __len__(self): # -> int:
...
def __getattr__(self, attr): # -> Any:
...
def __repr__(self): # -> str:
...
RunManyReturn = Union[CrawlResultContainer[CrawlResultT], AsyncGenerator[CrawlResultT, None]]
class AsyncCrawlResponse(BaseModel):
html: str
response_headers: Dict[str, str]
js_execution_result: Optional[Dict[str, Any]] = ...
status_code: int
screenshot: Optional[str] = ...
pdf_data: Optional[bytes] = ...
mhtml_data: Optional[str] = ...
get_delayed_content: Optional[Callable[[Optional[float]], Awaitable[str]]] = ...
downloaded_files: Optional[List[str]] = ...
ssl_certificate: Optional[SSLCertificate] = ...
redirected_url: Optional[str] = ...
network_requests: Optional[List[Dict[str, Any]]] = ...
console_messages: Optional[List[Dict[str, Any]]] = ...
class Config:
arbitrary_types_allowed = ...
class MediaItem(BaseModel):
src: Optional[str] = ...
data: Optional[str] = ...
alt: Optional[str] = ...
desc: Optional[str] = ...
score: Optional[int] = ...
type: str = ...
group_id: Optional[int] = ...
format: Optional[str] = ...
width: Optional[int] = ...
class Link(BaseModel):
href: Optional[str] = ...
text: Optional[str] = ...
title: Optional[str] = ...
base_domain: Optional[str] = ...
head_data: Optional[Dict[str, Any]] = ...
head_extraction_status: Optional[str] = ...
head_extraction_error: Optional[str] = ...
intrinsic_score: Optional[float] = ...
contextual_score: Optional[float] = ...
total_score: Optional[float] = ...
class Media(BaseModel):
images: List[MediaItem] = ...
videos: List[MediaItem] = ...
audios: List[MediaItem] = ...
tables: List[Dict] = ...
class Links(BaseModel):
internal: List[Link] = ...
external: List[Link] = ...
class ScrapingResult(BaseModel):
cleaned_html: str
success: bool
media: Media = ...
links: Links = ...
metadata: Dict[str, Any] = ...