"""
This type stub file was generated by pyright.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional, Tuple
from .models import MarkdownGenerationResult
from .content_filter_strategy import RelevantContentFilter
LINK_PATTERN = ...
def fast_urljoin(base: str, url: str) -> str:
"""Fast URL joining for common cases."""
...
class MarkdownGenerationStrategy(ABC):
"""Abstract base class for markdown generation strategies."""
def __init__(self, content_filter: Optional[RelevantContentFilter] = ..., options: Optional[Dict[str, Any]] = ..., verbose: bool = ..., content_source: str = ...) -> None:
...
@abstractmethod
def generate_markdown(self, input_html: str, base_url: str = ..., html2text_options: Optional[Dict[str, Any]] = ..., content_filter: Optional[RelevantContentFilter] = ..., citations: bool = ..., **kwargs) -> MarkdownGenerationResult:
"""Generate markdown from the selected input HTML."""
...
class DefaultMarkdownGenerator(MarkdownGenerationStrategy):
"""
Default implementation of markdown generation strategy.
How it works:
1. Generate raw markdown from cleaned HTML.
2. Convert links to citations.
3. Generate fit markdown if content filter is provided.
4. Return MarkdownGenerationResult.
Args:
content_filter (Optional[RelevantContentFilter]): Content filter for generating fit markdown.
options (Optional[Dict[str, Any]]): Additional options for markdown generation. Defaults to None.
content_source (str): Source of content to generate markdown from. Options: "cleaned_html", "raw_html", "fit_html". Defaults to "cleaned_html".
Returns:
MarkdownGenerationResult: Result containing raw markdown, fit markdown, fit HTML, and references markdown.
"""
def __init__(self, content_filter: Optional[RelevantContentFilter] = ..., options: Optional[Dict[str, Any]] = ..., content_source: str = ...) -> None:
...
def convert_links_to_citations(self, markdown: str, base_url: str = ...) -> Tuple[str, str]:
"""
Convert links in markdown to citations.
How it works:
1. Find all links in the markdown.
2. Convert links to citations.
3. Return converted markdown and references markdown.
Note:
This function uses a regex pattern to find links in markdown.
Args:
markdown (str): Markdown text.
base_url (str): Base URL for URL joins.
Returns:
Tuple[str, str]: Converted markdown and references markdown.
"""
...
def generate_markdown(self, input_html: str, base_url: str = ..., html2text_options: Optional[Dict[str, Any]] = ..., options: Optional[Dict[str, Any]] = ..., content_filter: Optional[RelevantContentFilter] = ..., citations: bool = ..., **kwargs) -> MarkdownGenerationResult:
"""
Generate markdown with citations from the provided input HTML.
How it works:
1. Generate raw markdown from the input HTML.
2. Convert links to citations.
3. Generate fit markdown if content filter is provided.
4. Return MarkdownGenerationResult.
Args:
input_html (str): The HTML content to process (selected based on content_source).
base_url (str): Base URL for URL joins.
html2text_options (Optional[Dict[str, Any]]): HTML2Text options.
options (Optional[Dict[str, Any]]): Additional options for markdown generation.
content_filter (Optional[RelevantContentFilter]): Content filter for generating fit markdown.
citations (bool): Whether to generate citations.
Returns:
MarkdownGenerationResult: Result containing raw markdown, fit markdown, fit HTML, and references markdown.
"""
...