"""
This type stub file was generated by pyright.
"""
from abc import ABC, abstractmethod
class ChunkingStrategy(ABC):
"""
Abstract base class for chunking strategies.
"""
@abstractmethod
def chunk(self, text: str) -> list:
"""
Abstract method to chunk the given text.
Args:
text (str): The text to chunk.
Returns:
list: A list of chunks.
"""
...
class IdentityChunking(ChunkingStrategy):
"""
Chunking strategy that returns the input text as a single chunk.
"""
def chunk(self, text: str) -> list:
...
class RegexChunking(ChunkingStrategy):
"""
Chunking strategy that splits text based on regular expression patterns.
"""
def __init__(self, patterns=..., **kwargs) -> None:
"""
Initialize the RegexChunking object.
Args:
patterns (list): A list of regular expression patterns to split text.
"""
...
def chunk(self, text: str) -> list:
...
class NlpSentenceChunking(ChunkingStrategy):
"""
Chunking strategy that splits text into sentences using NLTK's sentence tokenizer.
"""
def __init__(self, **kwargs) -> None:
"""
Initialize the NlpSentenceChunking object.
"""
...
def chunk(self, text: str) -> list:
...
class TopicSegmentationChunking(ChunkingStrategy):
"""
Chunking strategy that segments text into topics using NLTK's TextTilingTokenizer.
How it works:
1. Segment the text into topics using TextTilingTokenizer
2. Extract keywords for each topic segment
"""
def __init__(self, num_keywords=..., **kwargs) -> None:
"""
Initialize the TopicSegmentationChunking object.
Args:
num_keywords (int): The number of keywords to extract for each topic segment.
"""
...
def chunk(self, text: str) -> list:
...
def extract_keywords(self, text: str) -> list:
...
def chunk_with_topics(self, text: str) -> list:
...
class FixedLengthWordChunking(ChunkingStrategy):
"""
Chunking strategy that splits text into fixed-length word chunks.
How it works:
1. Split the text into words
2. Create chunks of fixed length
3. Return the list of chunks
"""
def __init__(self, chunk_size=..., **kwargs) -> None:
"""
Initialize the fixed-length word chunking strategy with the given chunk size.
Args:
chunk_size (int): The size of each chunk in words.
"""
...
def chunk(self, text: str) -> list:
...
class SlidingWindowChunking(ChunkingStrategy):
"""
Chunking strategy that splits text into overlapping word chunks.
How it works:
1. Split the text into words
2. Create chunks of fixed length
3. Return the list of chunks
"""
def __init__(self, window_size=..., step=..., **kwargs) -> None:
"""
Initialize the sliding window chunking strategy with the given window size and
step size.
Args:
window_size (int): The size of the sliding window in words.
step (int): The step size for sliding the window in words.
"""
...
def chunk(self, text: str) -> list:
...
class OverlappingWindowChunking(ChunkingStrategy):
"""
Chunking strategy that splits text into overlapping word chunks.
How it works:
1. Split the text into words using whitespace
2. Create chunks of fixed length equal to the window size
3. Slide the window by the overlap size
4. Return the list of chunks
"""
def __init__(self, window_size=..., overlap=..., **kwargs) -> None:
"""
Initialize the overlapping window chunking strategy with the given window size and
overlap size.
Args:
window_size (int): The size of the window in words.
overlap (int): The size of the overlap between consecutive chunks in words.
"""
...
def chunk(self, text: str) -> list:
...