calibre_client.py•14.6 kB
"""Calibre Library Client - Enhanced with Full-Text Search
High-performance client for accessing Calibre library functionality.
Provides direct database access for fast searches, metadata retrieval, content reading,
and full-text search across book content.
"""
import logging
from typing import Dict, Any, Optional, List
import difflib
from calibre_config import CalibreConfig
from calibre_database import CalibreDatabase
from calibre_content_reader import CalibreContentReader
from calibre_full_text_search import CalibreFullTextSearch
logger = logging.getLogger(__name__)
class CalibreClient:
"""Professional Calibre library client with full-text search capabilities.
Provides high-performance access to Calibre library operations including:
- Connection testing and health checks
- Library statistics and metadata
- Author and title search functionality
- Individual book detail retrieval
- Book content extraction and analysis
- Full-text search across all book content
Uses direct SQLite database access for optimal performance.
"""
def __init__(self, config_path: str = "config.json"):
"""Initialize Calibre client with configuration."""
self.config = CalibreConfig(config_path)
# Pass config path to database for performance optimization
self.database = CalibreDatabase(
self.config.get_metadata_db_path(),
config_path=self.config.config_path
)
self.content_reader = CalibreContentReader(self.config.library_path)
# Initialize full-text search if database exists
self._full_text_search = None
if self.config.has_fts_database():
try:
self._full_text_search = CalibreFullTextSearch(str(self.config.get_fts_db_path()))
logger.info(f"Full-text search enabled with database: {self.config.get_fts_db_path()}")
except Exception as e:
logger.warning(f"Could not initialize full-text search: {e}")
else:
logger.info("Full-text search database not found - FTS features disabled")
logger.info(f"Calibre client initialized for library: {self.config.library_path}")
# Configuration and health check methods
def ping(self) -> Dict[str, Any]:
"""Test connection to Calibre library."""
return self.config.ping()
# Database query methods
def get_total_books(self) -> Dict[str, Any]:
"""Get total number of books in the library."""
return self.database.get_total_books()
def search_by_author(self, author_name: str, offset: int = 0, limit: int = 50, fuzzy: bool = False) -> Dict[str, Any]:
"""Search for books by author name with pagination and optional fuzzy matching."""
return self.database.search_by_author(author_name, offset, limit, fuzzy)
def search_by_title(self, title: str, offset: int = 0, limit: int = 50, fuzzy: bool = False) -> Dict[str, Any]:
"""Search for books by title with pagination and optional fuzzy matching."""
return self.database.search_by_title(title, offset, limit, fuzzy)
def get_book_details(self, book_id: int) -> Dict[str, Any]:
"""Get detailed metadata for a specific book by book_id."""
return self.database.get_book_details(book_id)
def get_book_formats(self, book_id: int) -> Dict[str, Any]:
"""Get all available formats and their file paths for a book."""
return self.database.get_book_formats(book_id, self.config.library_path)
# Content reading methods
def get_book_content(self, book_id: int, preferred_format: Optional[str] = None, max_length: int = 50000) -> Dict[str, Any]:
"""Extract text content from a book file."""
# First get the available formats
formats_result = self.get_book_formats(book_id)
if formats_result["status"] != "success":
return formats_result
formats = formats_result["formats"]
return self.content_reader.get_book_content(formats, preferred_format, max_length)
def get_book_sample(self, book_id: int, sample_type: str = "beginning", sample_size: int = 5000) -> Dict[str, Any]:
"""Get a sample of book content for LLM analysis."""
# First get the available formats
formats_result = self.get_book_formats(book_id)
if formats_result["status"] != "success":
return formats_result
formats = formats_result["formats"]
return self.content_reader.get_book_sample(formats, sample_type, sample_size)
def analyze_book_content(self, book_id: int, analysis_type: str = "summary", max_length: int = 5000) -> Dict[str, Any]:
"""Get book content prepared for LLM analysis."""
if analysis_type == "summary":
return self.get_book_sample(book_id, "overview", max_length)
elif analysis_type == "beginning":
return self.get_book_sample(book_id, "beginning", max_length)
elif analysis_type == "themes":
# For theme analysis, we want a broader overview
return self.get_book_sample(book_id, "overview", max_length * 2)
elif analysis_type == "characters":
# For character analysis, beginning is often most useful
return self.get_book_sample(book_id, "beginning", max_length)
elif analysis_type == "quotes":
# For quotes, we want samples from throughout the book
return self.get_book_sample(book_id, "overview", max_length)
else:
return {
"status": "error",
"message": f"Unknown analysis type: {analysis_type}. Use: summary, beginning, themes, characters, quotes"
}
def search_content(self, book_id: int, query: str, case_sensitive: bool = False) -> Dict[str, Any]:
"""Search for text within a book's content."""
# First get the available formats
formats_result = self.get_book_formats(book_id)
if formats_result["status"] != "success":
return formats_result
formats = formats_result["formats"]
return self.content_reader.search_content(formats, query, case_sensitive)
def search_multiple_books(self, book_ids: List[int], query: str, case_sensitive: bool = False) -> Dict[str, Any]:
"""Search for text within multiple books' content."""
all_results: List[Dict[str, Any]] = []
failed_searches: List[Dict[str, Any]] = []
for book_id in book_ids:
try:
# Get book details for context
book_details = self.get_book_details(book_id)
if book_details["status"] != "success":
failed_searches.append({
"book_id": book_id,
"error": "Could not get book details"
})
continue
# Search content
search_result = self.search_content(book_id, query, case_sensitive)
if search_result["status"] == "success" and search_result["results"]:
# Add book metadata to results
book_info = book_details["result"]
for result in search_result["results"]:
result["book_id"] = book_id
result["book_title"] = book_info["title"]
result["authors"] = book_info["authors"]
all_results.append(result)
elif search_result["status"] != "success":
failed_searches.append({
"book_id": book_id,
"error": search_result.get("message", "Search failed")
})
except Exception as e:
failed_searches.append({
"book_id": book_id,
"error": str(e)
})
return {
"status": "success",
"query": query,
"case_sensitive": case_sensitive,
"searched_books": len(book_ids),
"books_with_matches": len(set(result["book_id"] for result in all_results)),
"total_matches": len(all_results),
"results": all_results,
"failed_searches": failed_searches
}
# Full-text search methods
def full_text_search(self, query: str, case_sensitive: bool = False, max_results: int = 100) -> Dict[str, Any]:
"""Search for text content across all books using full-text search database."""
if not self._full_text_search:
return {
"status": "error",
"message": "Full-text search is not available. FTS database not found or not initialized."
}
# Perform the search
fts_result = self._full_text_search.search_content(query, case_sensitive, max_results)
if fts_result["status"] != "success":
return fts_result
# Enhance results with book metadata
enhanced_results: List[Dict[str, Any]] = []
for result in fts_result["results"]:
book_id = result["book_id"]
# Get book metadata
book_details = self.get_book_details(book_id)
if book_details["status"] == "success":
book_info = book_details["result"]
result["book_title"] = book_info["title"]
result["authors"] = book_info["authors"]
result["published"] = book_info.get("published")
result["tags"] = book_info.get("tags", [])
else:
result["book_title"] = f"Book {book_id}"
result["authors"] = []
result["published"] = None
result["tags"] = []
enhanced_results.append(result)
# Update the results in the response
fts_result["results"] = enhanced_results
return fts_result
def full_text_search_book(self, book_id: int, query: str, case_sensitive: bool = False) -> Dict[str, Any]:
"""Search for text content within a specific book using full-text search database."""
if not self._full_text_search:
return {
"status": "error",
"message": "Full-text search is not available. FTS database not found or not initialized."
}
# Perform the search
fts_result = self._full_text_search.search_specific_book(book_id, query, case_sensitive)
if fts_result["status"] != "success":
return fts_result
# Enhance with book metadata
book_details = self.get_book_details(book_id)
if book_details["status"] == "success":
book_info = book_details["result"]
fts_result["book_title"] = book_info["title"]
fts_result["authors"] = book_info["authors"]
fts_result["published"] = book_info.get("published")
else:
fts_result["book_title"] = f"Book {book_id}"
fts_result["authors"] = []
fts_result["published"] = None
return fts_result
def get_full_text_search_stats(self) -> Dict[str, Any]:
"""Get statistics about the full-text search database."""
if not self._full_text_search:
return {
"status": "error",
"message": "Full-text search is not available. FTS database not found or not initialized."
}
return self._full_text_search.get_search_statistics()
# New unified and batch methods
def unified_search(self, query: str = "", filters: Dict[str, Any] = None,
pagination: Dict[str, int] = None, options: Dict[str, Any] = None) -> Dict[str, Any]:
"""Unified search with comprehensive filters and options."""
if filters is None:
filters = {}
if pagination is None:
pagination = {"offset": 0, "limit": 20}
if options is None:
options = {"sort_by": "title"}
return self.database.unified_search(
query=query,
author=filters.get("author", ""),
title=filters.get("title", ""),
series=filters.get("series", ""),
formats=filters.get("formats", []),
date_range=filters.get("date_range"),
offset=pagination.get("offset", 0),
limit=pagination.get("limit", 20),
sort_by=options.get("sort_by", "title")
)
def get_books_batch(self, book_ids: List[int]) -> Dict[str, Any]:
"""Get detailed metadata for multiple books in a single operation."""
return self.database.get_books_batch(book_ids)
def get_random_books(self, count: int = 10, filters: Dict[str, Any] = None) -> Dict[str, Any]:
"""Get random books for discovery, with optional filters."""
if filters is None:
filters = {}
# Use database method but enhance with additional filtering if needed
result = self.database.get_random_books(count)
if result["status"] == "success" and filters:
# Apply client-side filtering if specific filters are requested
filtered_results = []
for book in result["results"]:
include = True
# Format filtering
if filters.get("formats"):
book_formats = self.get_book_formats(book["book_id"])
if book_formats["status"] == "success":
available_formats = list(book_formats["formats"].keys())
if not any(fmt in available_formats for fmt in filters["formats"]):
include = False
# Series filter
if filters.get("series_only") and not book.get("series"):
include = False
if include:
filtered_results.append(book)
if len(filtered_results) >= count:
break
result["results"] = filtered_results
result["count"] = len(filtered_results)
result["applied_filters"] = filters
return result