query_builder.py•10.5 kB
"""Composable Query System for Calibre Library API
Provides a unified interface for building complex, performant queries
across metadata, content, and discovery operations.
"""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union, Set
from enum import Enum
import json
class SortOrder(Enum):
"""Standard sort orders for consistent API behavior."""
TITLE_ASC = "title"
TITLE_DESC = "title_desc"
AUTHOR_ASC = "author"
AUTHOR_DESC = "author_desc"
DATE_ASC = "date"
DATE_DESC = "date_desc"
RELEVANCE = "relevance"
SERIES_ORDER = "series"
RANDOM = "random"
ADDED_DESC = "added_desc"
class QueryType(Enum):
"""Types of queries for optimization routing."""
METADATA_ONLY = "metadata"
WITH_CONTENT = "content"
DISCOVERY = "discovery"
ANALYTICS = "analytics"
@dataclass
class Pagination:
"""Consistent pagination with performance safeguards."""
offset: int = 0
limit: int = 20
def __post_init__(self):
"""Validate and normalize pagination parameters."""
self.offset = max(0, self.offset)
self.limit = min(max(1, self.limit), 1000) # Cap at 1000 for performance
@property
def sql_params(self) -> tuple:
"""Get SQL LIMIT/OFFSET parameters."""
return (self.limit, self.offset)
@dataclass
class TextSearch:
"""Text search configuration with fuzzy matching."""
query: str = ""
fuzzy: bool = False
case_sensitive: bool = False
exact_phrase: bool = False
def is_empty(self) -> bool:
"""Check if search has meaningful content."""
return not self.query.strip()
def get_search_terms(self) -> List[str]:
"""Extract searchable terms from query."""
if self.exact_phrase:
return [self.query.strip()]
return [term.strip() for term in self.query.split() if term.strip()]
@dataclass
class MetadataFilters:
"""Metadata-based filtering options."""
authors: List[str] = field(default_factory=list)
titles: List[str] = field(default_factory=list)
series: List[str] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
formats: List[str] = field(default_factory=list)
languages: List[str] = field(default_factory=list)
publishers: List[str] = field(default_factory=list)
# Date ranges
published_after: Optional[str] = None
published_before: Optional[str] = None
added_after: Optional[str] = None
added_before: Optional[str] = None
# Numeric filters
rating_min: Optional[float] = None
rating_max: Optional[float] = None
def is_empty(self) -> bool:
"""Check if any filters are applied."""
return not any([
self.authors, self.titles, self.series, self.tags, self.formats,
self.languages, self.publishers, self.published_after, self.published_before,
self.added_after, self.added_before, self.rating_min, self.rating_max
])
@dataclass
class ResponseOptions:
"""Configure what data to include in responses."""
include_content_preview: bool = False
include_file_paths: bool = False
include_full_metadata: bool = True
include_format_details: bool = False
include_series_info: bool = True
include_tags: bool = True
include_similar_books: bool = False
preview_length: int = 500
max_tags: int = 20
@dataclass
class LibraryQuery:
"""Unified query builder for all library operations."""
# Core search components
text_search: TextSearch = field(default_factory=TextSearch)
metadata_filters: MetadataFilters = field(default_factory=MetadataFilters)
# Result configuration
pagination: Pagination = field(default_factory=Pagination)
sort_order: SortOrder = SortOrder.TITLE_ASC
response_options: ResponseOptions = field(default_factory=ResponseOptions)
# Query behavior
query_type: QueryType = QueryType.METADATA_ONLY
book_ids: List[int] = field(default_factory=list) # For specific book queries
def __post_init__(self):
"""Validate and optimize query after construction."""
# Auto-detect query type if not specified
if self.query_type == QueryType.METADATA_ONLY:
if not self.text_search.is_empty():
self.query_type = QueryType.WITH_CONTENT
elif self.sort_order == SortOrder.RANDOM:
self.query_type = QueryType.DISCOVERY
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'LibraryQuery':
"""Create query from JSON/dict representation."""
text_search = TextSearch(**data.get('text_search', {}))
metadata_filters = MetadataFilters(**data.get('metadata_filters', {}))
pagination = Pagination(**data.get('pagination', {}))
sort_order = SortOrder.TITLE_ASC
if 'sort_order' in data:
try:
sort_order = SortOrder(data['sort_order'])
except ValueError:
pass # Use default
query_type = QueryType.METADATA_ONLY
if 'query_type' in data:
try:
query_type = QueryType(data['query_type'])
except ValueError:
pass
response_options = ResponseOptions(**data.get('response_options', {}))
return cls(
text_search=text_search,
metadata_filters=metadata_filters,
pagination=pagination,
sort_order=sort_order,
response_options=response_options,
query_type=query_type,
book_ids=data.get('book_ids', [])
)
def to_dict(self) -> Dict[str, Any]:
"""Serialize query for logging/caching."""
return {
'text_search': {
'query': self.text_search.query,
'fuzzy': self.text_search.fuzzy,
'case_sensitive': self.text_search.case_sensitive,
'exact_phrase': self.text_search.exact_phrase
},
'metadata_filters': {
k: v for k, v in self.metadata_filters.__dict__.items()
if v is not None and (not isinstance(v, list) or v)
},
'pagination': {
'offset': self.pagination.offset,
'limit': self.pagination.limit
},
'sort_order': self.sort_order.value,
'query_type': self.query_type.value,
'book_ids': self.book_ids
}
def is_simple_lookup(self) -> bool:
"""Check if this is a simple book ID lookup."""
return (bool(self.book_ids) and
self.text_search.is_empty() and
self.metadata_filters.is_empty())
def estimate_complexity(self) -> str:
"""Estimate query complexity for routing/optimization."""
score = 0
# Text search complexity
if not self.text_search.is_empty():
score += 3 if self.text_search.fuzzy else 1
# Filter complexity
active_filters = sum(1 for f in [
self.metadata_filters.authors, self.metadata_filters.titles,
self.metadata_filters.series, self.metadata_filters.tags,
self.metadata_filters.formats
] if f)
score += active_filters
# Date range complexity
if any([self.metadata_filters.published_after, self.metadata_filters.published_before,
self.metadata_filters.added_after, self.metadata_filters.added_before]):
score += 2
# Result size impact
if self.pagination.limit > 100:
score += 1
if score == 0:
return "simple"
elif score <= 3:
return "moderate"
else:
return "complex"
def get_cache_key(self) -> str:
"""Generate cache key for this query."""
# Create deterministic hash of query parameters
query_dict = self.to_dict()
return f"query_{hash(json.dumps(query_dict, sort_keys=True))}"
# Query builder helpers for common patterns
class QueryBuilder:
"""Helper class for constructing common query patterns."""
@staticmethod
def simple_search(query: str, limit: int = 20) -> LibraryQuery:
"""Create a simple text search query."""
return LibraryQuery(
text_search=TextSearch(query=query),
pagination=Pagination(limit=limit)
)
@staticmethod
def author_books(author: str, limit: int = 50) -> LibraryQuery:
"""Get all books by a specific author."""
return LibraryQuery(
metadata_filters=MetadataFilters(authors=[author]),
pagination=Pagination(limit=limit),
sort_order=SortOrder.SERIES_ORDER
)
@staticmethod
def series_books(series: str) -> LibraryQuery:
"""Get all books in a series."""
return LibraryQuery(
metadata_filters=MetadataFilters(series=[series]),
sort_order=SortOrder.SERIES_ORDER
)
@staticmethod
def recent_additions(limit: int = 20) -> LibraryQuery:
"""Get recently added books."""
return LibraryQuery(
pagination=Pagination(limit=limit),
sort_order=SortOrder.ADDED_DESC,
query_type=QueryType.DISCOVERY
)
@staticmethod
def random_discovery(limit: int = 10, formats: List[str] = None) -> LibraryQuery:
"""Get random books for discovery."""
filters = MetadataFilters()
if formats:
filters.formats = formats
return LibraryQuery(
metadata_filters=filters,
pagination=Pagination(limit=limit),
sort_order=SortOrder.RANDOM,
query_type=QueryType.DISCOVERY
)
@staticmethod
def book_details(book_ids: List[int], include_content: bool = False) -> LibraryQuery:
"""Get detailed information for specific books."""
options = ResponseOptions(
include_content_preview=include_content,
include_full_metadata=True,
include_file_paths=True
)
return LibraryQuery(
book_ids=book_ids,
response_options=options,
pagination=Pagination(limit=len(book_ids))
)