text_utils.py•1.69 kB
"""
Copyright 2024, Zep Software, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import re
# Maximum length for entity/node summaries
MAX_SUMMARY_CHARS = 500
def truncate_at_sentence(text: str, max_chars: int) -> str:
"""
Truncate text at or about max_chars while respecting sentence boundaries.
Attempts to truncate at the last complete sentence before max_chars.
If no sentence boundary is found before max_chars, truncates at max_chars.
Args:
text: The text to truncate
max_chars: Maximum number of characters
Returns:
Truncated text
"""
if not text or len(text) <= max_chars:
return text
# Find all sentence boundaries (., !, ?) up to max_chars
truncated = text[:max_chars]
# Look for sentence boundaries: period, exclamation, or question mark followed by space or end
sentence_pattern = r'[.!?](?:\s|$)'
matches = list(re.finditer(sentence_pattern, truncated))
if matches:
# Truncate at the last sentence boundary found
last_match = matches[-1]
return text[: last_match.end()].rstrip()
# No sentence boundary found, truncate at max_chars
return truncated.rstrip()