fuzzy_matcher.py•3 kB
"""Simple fuzzy matching utilities for search improvements."""
from typing import List, Tuple
import difflib
class SimpleFuzzyMatcher:
"""Lightweight fuzzy matching for author/title searches."""
@staticmethod
def expand_search_terms(term: str, threshold: float = 0.7) -> List[str]:
"""Generate variations of search term for fuzzy matching.
Args:
term: Original search term
threshold: Similarity threshold (0.0 to 1.0)
Returns:
List of potential search variations
"""
variations = [term]
# Basic typo corrections
common_typos = {
'teh': 'the',
'adn': 'and',
'taht': 'that',
'hte': 'the',
'nad': 'and',
'ot': 'to',
'fo': 'of',
'jsut': 'just',
'cna': 'can',
'woudl': 'would',
'shoudl': 'should',
'seperate': 'separate',
'recieve': 'receive',
'occured': 'occurred',
'definately': 'definitely'
}
# Apply common typo fixes
words = term.lower().split()
corrected_words = [common_typos.get(word, word) for word in words]
corrected_term = ' '.join(corrected_words)
if corrected_term != term.lower():
variations.append(corrected_term)
# Add case variations
variations.extend([
term.lower(),
term.upper(),
term.title(),
term.capitalize()
])
return list(set(variations))
@staticmethod
def calculate_similarity(term1: str, term2: str) -> float:
"""Calculate similarity between two terms using difflib.
Args:
term1: First term
term2: Second term
Returns:
Similarity ratio (0.0 to 1.0)
"""
return difflib.SequenceMatcher(None, term1.lower(), term2.lower()).ratio()
@staticmethod
def suggest_corrections(term: str, candidates: List[str], max_suggestions: int = 3) -> List[Tuple[str, float]]:
"""Suggest corrections from a list of candidates.
Args:
term: Search term with potential typos
candidates: List of possible corrections
max_suggestions: Maximum number of suggestions
Returns:
List of (suggestion, similarity_score) tuples
"""
suggestions = []
for candidate in candidates:
similarity = SimpleFuzzyMatcher.calculate_similarity(term, candidate)
if similarity > 0.6: # Only suggest reasonably similar terms
suggestions.append((candidate, similarity))
# Sort by similarity score, descending
suggestions.sort(key=lambda x: x[1], reverse=True)
return suggestions[:max_suggestions]