Skip to main content
Glama

Neolibrarian MCP

by pshap
query_engine.py20.4 kB
"""High-Performance Query Engine for Calibre Library Handles query execution, optimization, and result assembly for large-scale library operations (107k+ books). """ import logging import sqlite3 import time import uuid from typing import List, Dict, Any, Optional, Set, Tuple from pathlib import Path from query_builder import LibraryQuery, QueryType, SortOrder, TextSearch, MetadataFilters from response_models import ( LibraryResponse, BookSummary, BookDetails, ContentMatch, SeriesInfo, PaginationInfo, QueryMetadata, ResponseStatus, ResponseBuilder ) from fuzzy_matcher import SimpleFuzzyMatcher logger = logging.getLogger(__name__) class QueryOptimizer: """Optimizes queries for performance on large datasets.""" @staticmethod def should_use_index(query: LibraryQuery) -> Dict[str, bool]: """Determine which indexes to prefer for this query.""" recommendations = { "author_index": bool(query.metadata_filters.authors), "title_index": bool(query.metadata_filters.titles), "series_index": bool(query.metadata_filters.series), "date_index": any([ query.metadata_filters.published_after, query.metadata_filters.published_before, query.sort_order in [SortOrder.DATE_ASC, SortOrder.DATE_DESC] ]), "format_index": bool(query.metadata_filters.formats) } return recommendations @staticmethod def estimate_result_size(query: LibraryQuery, total_books: int) -> int: """Estimate result set size for query planning.""" if query.book_ids: return len(query.book_ids) # Apply rough filtering estimates estimated_size = total_books # Text search typically reduces by 90-95% if not query.text_search.is_empty(): estimated_size *= 0.1 # Author filter reduces by ~99% (assuming ~100 books per author) if query.metadata_filters.authors: estimated_size = min(estimated_size, len(query.metadata_filters.authors) * 100) # Series filter is very selective if query.metadata_filters.series: estimated_size = min(estimated_size, len(query.metadata_filters.series) * 20) # Format filter moderately selective if query.metadata_filters.formats: estimated_size *= 0.7 return max(1, int(estimated_size)) @staticmethod def build_optimal_sql(query: LibraryQuery, total_books: int) -> Tuple[str, List, str, List]: """Build optimized SQL queries for count and data retrieval.""" estimated_size = QueryOptimizer.estimate_result_size(query, total_books) use_indexes = QueryOptimizer.should_use_index(query) # Build JOIN strategy based on query base_from = "books b" joins = [] where_conditions = [] count_conditions = [] params = [] count_params = [] # Add JOINs based on what we need if (query.metadata_filters.authors or query.sort_order in [SortOrder.AUTHOR_ASC, SortOrder.AUTHOR_DESC] or not query.text_search.is_empty()): joins.append("LEFT JOIN books_authors_link bal ON b.id = bal.book") joins.append("LEFT JOIN authors a ON bal.author = a.id") if query.metadata_filters.series or query.sort_order == SortOrder.SERIES_ORDER: joins.append("LEFT JOIN books_series_link bsl ON b.id = bsl.book") joins.append("LEFT JOIN series s ON bsl.series = s.id") if query.metadata_filters.tags: joins.append("LEFT JOIN books_tags_link btl ON b.id = btl.book") joins.append("LEFT JOIN tags t ON btl.tag = t.id") if query.metadata_filters.formats: joins.append("LEFT JOIN data d ON b.id = d.book") # Build WHERE conditions if query.book_ids: placeholders = ','.join(['?' for _ in query.book_ids]) where_conditions.append(f"b.id IN ({placeholders})") count_conditions.append(f"b.id IN ({placeholders})") params.extend(query.book_ids) count_params.extend(query.book_ids) # Text search conditions if not query.text_search.is_empty(): search_terms = query.text_search.get_search_terms() if query.text_search.fuzzy: # Add fuzzy variations expanded_terms = [] for term in search_terms: expanded_terms.extend(SimpleFuzzyMatcher.expand_search_terms(term)) search_terms = list(set(expanded_terms)) text_conditions = [] for term in search_terms[:5]: # Limit to 5 terms for performance if query.text_search.case_sensitive: text_conditions.append("(b.title LIKE ? OR a.name LIKE ?)") params.extend([f"%{term}%", f"%{term}%"]) count_params.extend([f"%{term}%", f"%{term}%"]) else: text_conditions.append("(b.title LIKE ? COLLATE NOCASE OR a.name LIKE ? COLLATE NOCASE)") params.extend([f"%{term}%", f"%{term}%"]) count_params.extend([f"%{term}%", f"%{term}%"]) if text_conditions: combined_condition = " OR ".join(text_conditions) where_conditions.append(f"({combined_condition})") count_conditions.append(f"({combined_condition})") # Metadata filter conditions filters = query.metadata_filters if filters.authors: author_conditions = [] for author in filters.authors: author_conditions.append("a.name LIKE ? COLLATE NOCASE") params.append(f"%{author}%") count_params.append(f"%{author}%") if author_conditions: where_conditions.append(f"({' OR '.join(author_conditions)})") count_conditions.append(f"({' OR '.join(author_conditions)})") if filters.titles: title_conditions = [] for title in filters.titles: title_conditions.append("b.title LIKE ? COLLATE NOCASE") params.append(f"%{title}%") count_params.append(f"%{title}%") if title_conditions: where_conditions.append(f"({' OR '.join(title_conditions)})") count_conditions.append(f"({' OR '.join(title_conditions)})") if filters.series: series_conditions = [] for series in filters.series: series_conditions.append("s.name LIKE ? COLLATE NOCASE") params.append(f"%{series}%") count_params.append(f"%{series}%") if series_conditions: where_conditions.append(f"({' OR '.join(series_conditions)})") count_conditions.append(f"({' OR '.join(series_conditions)})") if filters.formats: format_conditions = [] for fmt in filters.formats: format_conditions.append("d.format = ?") params.append(fmt.upper()) count_params.append(fmt.upper()) if format_conditions: where_conditions.append(f"({' OR '.join(format_conditions)})") count_conditions.append(f"({' OR '.join(format_conditions)})") # Date range conditions if filters.published_after: where_conditions.append("b.pubdate >= ?") count_conditions.append("b.pubdate >= ?") params.append(filters.published_after) count_params.append(filters.published_after) if filters.published_before: where_conditions.append("b.pubdate <= ?") count_conditions.append("b.pubdate <= ?") params.append(filters.published_before) count_params.append(filters.published_before) # Build the queries join_clause = " ".join(joins) where_clause = " WHERE " + " AND ".join(where_conditions) if where_conditions else "" count_where_clause = " WHERE " + " AND ".join(count_conditions) if count_conditions else "" # Count query (simplified) count_query = f""" SELECT COUNT(DISTINCT b.id) FROM {base_from} {join_clause} {count_where_clause} """ # Main data query select_fields = """ b.id, b.title, b.pubdate, b.timestamp, GROUP_CONCAT(DISTINCT a.name, ', ') as authors, s.name as series_name, b.series_index, GROUP_CONCAT(DISTINCT d.format, ', ') as formats """ # Add GROUP BY and ORDER BY group_by = "GROUP BY b.id, b.title, b.pubdate, b.timestamp, s.name, b.series_index" # Build ORDER BY clause order_mapping = { SortOrder.TITLE_ASC: "b.title ASC", SortOrder.TITLE_DESC: "b.title DESC", SortOrder.AUTHOR_ASC: "MIN(a.name) ASC, b.title ASC", SortOrder.AUTHOR_DESC: "MIN(a.name) DESC, b.title ASC", SortOrder.DATE_ASC: "b.pubdate ASC, b.title ASC", SortOrder.DATE_DESC: "b.pubdate DESC, b.title ASC", SortOrder.SERIES_ORDER: "s.name ASC, b.series_index ASC, b.title ASC", SortOrder.ADDED_DESC: "b.timestamp DESC", SortOrder.RANDOM: "RANDOM()" } order_by = f"ORDER BY {order_mapping.get(query.sort_order, 'b.title ASC')}" # Add LIMIT and OFFSET limit_clause = f"LIMIT ? OFFSET ?" params.extend([query.pagination.limit, query.pagination.offset]) data_query = f""" SELECT {select_fields} FROM {base_from} {join_clause} {where_clause} {group_by} {order_by} {limit_clause} """ # Debug logging logger.debug(f"Generated count query: {count_query}") logger.debug(f"Generated data query: {data_query}") logger.debug(f"Query parameters: {len(params)} params") return count_query, count_params, data_query, params class LibraryQueryEngine: """High-performance query engine for the Calibre library.""" def __init__(self, db_path: Path): """Initialize the query engine.""" self.db_path = db_path self.total_books = self._get_total_books() logger.info(f"Query engine initialized for {self.total_books} books") def _get_total_books(self) -> int: """Get total book count for optimization.""" try: with sqlite3.connect(str(self.db_path)) as conn: cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM books") return cursor.fetchone()[0] except Exception as e: logger.error(f"Failed to get total book count: {e}") return 100000 # Safe default for optimization def execute_query(self, query: LibraryQuery) -> LibraryResponse: """Execute a library query and return standardized response.""" start_time = time.time() query_id = str(uuid.uuid4())[:8] logger.debug(f"Executing query {query_id}: {query.estimate_complexity()} complexity") try: # Handle simple book ID lookups efficiently if query.is_simple_lookup(): return self._execute_book_lookup(query, query_id, start_time) # Execute general search query return self._execute_search_query(query, query_id, start_time) except Exception as e: execution_time = (time.time() - start_time) * 1000 logger.error(f"Query {query_id} failed after {execution_time:.2f}ms: {e}") return LibraryResponse.error(f"Query execution failed: {str(e)}") def _execute_book_lookup(self, query: LibraryQuery, query_id: str, start_time: float) -> LibraryResponse: """Execute optimized book ID lookup.""" with sqlite3.connect(str(self.db_path)) as conn: cursor = conn.cursor() placeholders = ','.join(['?' for _ in query.book_ids]) # Build query based on response requirements if query.response_options.include_full_metadata: sql = f""" SELECT b.id, b.title, b.pubdate, b.timestamp, b.isbn, b.publisher, GROUP_CONCAT(DISTINCT a.name, ', ') as authors, s.name as series_name, b.series_index, c.text as description, GROUP_CONCAT(DISTINCT d.format, ', ') as formats, GROUP_CONCAT(DISTINCT t.name, ', ') as tags FROM books b LEFT JOIN books_authors_link bal ON b.id = bal.book LEFT JOIN authors a ON bal.author = a.id LEFT JOIN books_series_link bsl ON b.id = bsl.book LEFT JOIN series s ON bsl.series = s.id LEFT JOIN comments c ON b.id = c.book LEFT JOIN data d ON b.id = d.book LEFT JOIN books_tags_link btl ON b.id = btl.book LEFT JOIN tags t ON btl.tag = t.id WHERE b.id IN ({placeholders}) GROUP BY b.id ORDER BY b.title """ else: sql = f""" SELECT b.id, b.title, b.pubdate, GROUP_CONCAT(DISTINCT a.name, ', ') as authors, s.name as series_name, b.series_index, GROUP_CONCAT(DISTINCT d.format, ', ') as formats FROM books b LEFT JOIN books_authors_link bal ON b.id = bal.book LEFT JOIN authors a ON bal.author = a.id LEFT JOIN books_series_link bsl ON b.id = bsl.book LEFT JOIN series s ON bsl.series = s.id LEFT JOIN data d ON b.id = d.book WHERE b.id IN ({placeholders}) GROUP BY b.id ORDER BY b.title """ cursor.execute(sql, query.book_ids) rows = cursor.fetchall() execution_time = (time.time() - start_time) * 1000 # Convert to response objects if query.response_options.include_full_metadata: books = [] for row in rows: (book_id, title, pubdate, timestamp, isbn, publisher, authors, series_name, series_index, description, formats, tags) = row series = None if series_name: series = SeriesInfo(name=series_name, index=series_index) book = BookDetails( book_id=book_id, title=title, authors=authors.split(', ') if authors else [], series=series, published_date=pubdate, publisher=publisher, isbn=isbn, description=description, formats={fmt: "" for fmt in (formats or "").split(', ') if fmt}, tags=(tags or "").split(', ')[:20] if tags else [] ) books.append(book) query_metadata = QueryMetadata( query_id=query_id, execution_time_ms=execution_time, complexity="simple", database_queries=1, rows_examined=len(rows) ) return ResponseBuilder.book_details_response(books, query_metadata) else: books = [] for row in rows: book_id, title, pubdate, authors, series_name, series_index, formats = row series = None if series_name: series = SeriesInfo(name=series_name, index=series_index) book = BookSummary( book_id=book_id, title=title, authors=authors.split(', ') if authors else [], series=series, published_date=pubdate, formats=(formats or "").split(', ') if formats else [] ) books.append(book) pagination = PaginationInfo( offset=0, limit=len(books), total_results=len(books), has_more=False ) query_metadata = QueryMetadata( query_id=query_id, execution_time_ms=execution_time, complexity="simple", database_queries=1, rows_examined=len(rows) ) return ResponseBuilder.book_search_response(books, pagination, query_metadata) def _execute_search_query(self, query: LibraryQuery, query_id: str, start_time: float) -> LibraryResponse: """Execute general search query with optimization.""" with sqlite3.connect(str(self.db_path)) as conn: cursor = conn.cursor() # Build optimized SQL count_query, count_params, data_query, data_params = QueryOptimizer.build_optimal_sql( query, self.total_books ) database_queries = 0 # Get total count cursor.execute(count_query, count_params) total_results = cursor.fetchone()[0] database_queries += 1 # Get data if there are results books = [] if total_results > 0: cursor.execute(data_query, data_params) rows = cursor.fetchall() database_queries += 1 # Convert rows to BookSummary objects for row in rows: book_id, title, pubdate, timestamp, authors, series_name, series_index, formats = row series = None if series_name: series = SeriesInfo(name=series_name, index=series_index) book = BookSummary( book_id=book_id, title=title, authors=authors.split(', ') if authors else [], series=series, published_date=pubdate, formats=(formats or "").split(', ') if formats else [] ) books.append(book) execution_time = (time.time() - start_time) * 1000 # Build response pagination = PaginationInfo( offset=query.pagination.offset, limit=query.pagination.limit, total_results=total_results, has_more=(query.pagination.offset + query.pagination.limit) < total_results ) optimizations = [] if query.estimate_complexity() == "complex" and execution_time < 100: optimizations.append("query_optimization_applied") query_metadata = QueryMetadata( query_id=query_id, execution_time_ms=execution_time, complexity=query.estimate_complexity(), database_queries=database_queries, rows_examined=total_results, optimizations_applied=optimizations ) return ResponseBuilder.book_search_response(books, pagination, query_metadata)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pshap/mcp-neolibrarian'

If you have feedback or need assistance with the MCP directory API, please join our Discord server