"""
This type stub file was generated by pyright.
"""
from typing import Optional
from .async_logger import AsyncLogger
from .async_configs import CrawlerRunConfig
from .models import Links
"""
Link Extractor for Crawl4AI
Extracts head content from links discovered during crawling using URLSeeder's
efficient parallel processing and caching infrastructure.
"""
class LinkPreview:
"""
Extracts head content from links using URLSeeder's parallel processing infrastructure.
This class provides intelligent link filtering and head content extraction with:
- Pattern-based inclusion/exclusion filtering
- Parallel processing with configurable concurrency
- Caching for performance
- BM25 relevance scoring
- Memory-safe processing for large link sets
"""
def __init__(self, logger: Optional[AsyncLogger] = ...) -> None:
"""
Initialize the LinkPreview.
Args:
logger: Optional logger instance for recording events
"""
...
async def __aenter__(self): # -> Self:
"""Async context manager entry."""
...
async def __aexit__(self, exc_type, exc_val, exc_tb): # -> None:
"""Async context manager exit."""
...
async def start(self): # -> None:
"""Initialize the URLSeeder instance."""
...
async def close(self): # -> None:
"""Clean up resources."""
...
async def extract_link_heads(self, links: Links, config: CrawlerRunConfig) -> Links:
"""
Extract head content for filtered links and attach to Link objects.
Args:
links: Links object containing internal and external links
config: CrawlerRunConfig with link_preview_config settings
Returns:
Links object with head_data attached to filtered Link objects
"""
...