import abc
from .models import ScrapingResult
from .utils import extract_metadata as extract_metadata
from _typeshed import Incomplete
from abc import ABC, abstractmethod
from bs4 import BeautifulSoup as BeautifulSoup, Comment as Comment, NavigableString as NavigableString, PageElement as PageElement, Tag as Tag
from lxml import html as lhtml
from typing import Any
OG_REGEX: Incomplete
TWITTER_REGEX: Incomplete
DIMENSION_REGEX: Incomplete
def parse_srcset(s: str) -> list[dict]: ...
def parse_dimension(dimension): ...
def fetch_image_file_size(img, base_url): ...
class ContentScrapingStrategy(ABC, metaclass=abc.ABCMeta):
@abstractmethod
def scrap(self, url: str, html: str, **kwargs) -> ScrapingResult: ...
@abstractmethod
async def ascrap(self, url: str, html: str, **kwargs) -> ScrapingResult: ...
class LXMLWebScrapingStrategy(ContentScrapingStrategy):
logger: Incomplete
DIMENSION_REGEX: Incomplete
BASE64_PATTERN: Incomplete
def __init__(self, logger=None) -> None: ...
def scrap(self, url: str, html: str, **kwargs) -> ScrapingResult: ...
async def ascrap(self, url: str, html: str, **kwargs) -> ScrapingResult: ...
def process_element(self, url, element: lhtml.HtmlElement, **kwargs) -> dict[str, Any]: ...
def find_closest_parent_with_useful_text(self, element: lhtml.HtmlElement, **kwargs) -> str | None: ...
def flatten_nested_elements(self, element: lhtml.HtmlElement) -> lhtml.HtmlElement: ...
def process_image(self, img: lhtml.HtmlElement, url: str, index: int, total_images: int, **kwargs) -> list[dict] | None: ...
def remove_empty_elements_fast(self, root, word_count_threshold: int = 5): ...
def remove_unwanted_attributes_fast(self, root: lhtml.HtmlElement, important_attrs=None, keep_data_attributes: bool = False) -> lhtml.HtmlElement: ...
WebScrapingStrategy = LXMLWebScrapingStrategy