Skip to main content
Glama

Neolibrarian MCP

by pshap
calibre_full_text_search.py13.2 kB
"""Calibre Full-Text Search Module Provides full-text search functionality for Calibre library content. Connects to the Calibre full-text search database to search within book content. """ import logging import sqlite3 from typing import Dict, Any, List, Optional, Tuple import os import re logger = logging.getLogger(__name__) class CalibreFullTextSearch: """Full-text search functionality for Calibre library content. Provides search capabilities across the content of books in the Calibre library using the full-text search database generated by Calibre. """ def __init__(self, fts_db_path: str): """Initialize full-text search with database path.""" self.fts_db_path = fts_db_path self._validate_database() logger.info(f"Full-text search initialized with database: {fts_db_path}") def _validate_database(self) -> None: """Validate that the FTS database exists and has expected structure.""" if not os.path.exists(self.fts_db_path): raise FileNotFoundError(f"FTS database not found: {self.fts_db_path}") try: conn = sqlite3.connect(self.fts_db_path) cursor = conn.cursor() # Check for required table cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='books_text'") if not cursor.fetchone(): raise ValueError("books_text table not found in FTS database") # Check table structure cursor.execute("PRAGMA table_info(books_text)") columns = [col[1] for col in cursor.fetchall()] required_columns = ['book', 'searchable_text', 'format'] for col in required_columns: if col not in columns: raise ValueError(f"Required column '{col}' not found in books_text table") conn.close() except sqlite3.Error as e: raise ValueError(f"Error validating FTS database: {e}") def search_content(self, query: str, case_sensitive: bool = False, max_results: int = 100, context_chars: int = 200) -> Dict[str, Any]: """Search for text content across all books. Args: query: Search query string case_sensitive: Whether to perform case-sensitive search max_results: Maximum number of results to return context_chars: Number of characters to include around matches for context Returns: Dictionary containing search results and metadata """ if not query.strip(): return { "status": "error", "message": "Search query cannot be empty" } try: conn = sqlite3.connect(self.fts_db_path) cursor = conn.cursor() # Prepare search query if case_sensitive: search_sql = "SELECT book, format, searchable_text FROM books_text WHERE searchable_text GLOB ? LIMIT ?" search_param = f"*{query}*" else: search_sql = "SELECT book, format, searchable_text FROM books_text WHERE searchable_text LIKE ? LIMIT ?" search_param = f"%{query}%" cursor.execute(search_sql, (search_param, max_results)) results = cursor.fetchall() # Process results to extract context processed_results = [] for book_id, format_type, content in results: matches = self._extract_matches_with_context(content, query, context_chars, case_sensitive) processed_results.append({ "book_id": book_id, "format": format_type, "match_count": len(matches), "matches": matches }) conn.close() return { "status": "success", "query": query, "case_sensitive": case_sensitive, "total_books_searched": self._get_total_indexed_books(), "books_with_matches": len(processed_results), "total_matches": sum(result["match_count"] for result in processed_results), "results": processed_results } except sqlite3.Error as e: logger.error(f"Database error during search: {e}") return { "status": "error", "message": f"Database error: {e}" } except Exception as e: logger.error(f"Unexpected error during search: {e}") return { "status": "error", "message": f"Search error: {e}" } def search_specific_book(self, book_id: int, query: str, case_sensitive: bool = False, context_chars: int = 200) -> Dict[str, Any]: """Search for text content within a specific book. Args: book_id: ID of the book to search within query: Search query string case_sensitive: Whether to perform case-sensitive search context_chars: Number of characters to include around matches for context Returns: Dictionary containing search results and metadata """ if not query.strip(): return { "status": "error", "message": "Search query cannot be empty" } try: conn = sqlite3.connect(self.fts_db_path) cursor = conn.cursor() # Search within specific book if case_sensitive: search_sql = "SELECT format, searchable_text FROM books_text WHERE book = ? AND searchable_text GLOB ?" search_param = f"*{query}*" else: search_sql = "SELECT format, searchable_text FROM books_text WHERE book = ? AND searchable_text LIKE ?" search_param = f"%{query}%" cursor.execute(search_sql, (book_id, search_param)) results = cursor.fetchall() if not results: return { "status": "success", "book_id": book_id, "query": query, "case_sensitive": case_sensitive, "formats_with_matches": 0, "total_matches": 0, "results": [] } # Process results processed_results = [] for format_type, content in results: matches = self._extract_matches_with_context(content, query, context_chars, case_sensitive) if matches: # Only include formats that have matches processed_results.append({ "format": format_type, "match_count": len(matches), "matches": matches }) conn.close() return { "status": "success", "book_id": book_id, "query": query, "case_sensitive": case_sensitive, "formats_with_matches": len(processed_results), "total_matches": sum(result["match_count"] for result in processed_results), "results": processed_results } except sqlite3.Error as e: logger.error(f"Database error during book search: {e}") return { "status": "error", "message": f"Database error: {e}" } except Exception as e: logger.error(f"Unexpected error during book search: {e}") return { "status": "error", "message": f"Search error: {e}" } def get_search_statistics(self) -> Dict[str, Any]: """Get statistics about the full-text search database.""" try: conn = sqlite3.connect(self.fts_db_path) cursor = conn.cursor() # Get total indexed books cursor.execute("SELECT COUNT(DISTINCT book) FROM books_text") total_books = cursor.fetchone()[0] # Get total text entries (book+format combinations) cursor.execute("SELECT COUNT(*) FROM books_text") total_entries = cursor.fetchone()[0] # Get format distribution cursor.execute("SELECT format, COUNT(*) FROM books_text GROUP BY format ORDER BY COUNT(*) DESC") format_stats = cursor.fetchall() # Get total text size cursor.execute("SELECT SUM(text_size) FROM books_text WHERE text_size IS NOT NULL") total_text_size = cursor.fetchone()[0] or 0 # Get books with errors cursor.execute("SELECT COUNT(*) FROM books_text WHERE err_msg IS NOT NULL AND err_msg != ''") books_with_errors = cursor.fetchone()[0] conn.close() return { "status": "success", "database_path": self.fts_db_path, "total_indexed_books": total_books, "total_text_entries": total_entries, "total_text_size_bytes": total_text_size, "total_text_size_mb": round(total_text_size / (1024 * 1024), 2), "books_with_extraction_errors": books_with_errors, "format_distribution": [{"format": fmt, "count": count} for fmt, count in format_stats] } except sqlite3.Error as e: logger.error(f"Database error getting statistics: {e}") return { "status": "error", "message": f"Database error: {e}" } except Exception as e: logger.error(f"Unexpected error getting statistics: {e}") return { "status": "error", "message": f"Statistics error: {e}" } def _extract_matches_with_context(self, content: str, query: str, context_chars: int, case_sensitive: bool) -> List[Dict[str, Any]]: """Extract matches with surrounding context from content.""" if not content: return [] # Prepare regex pattern flags = 0 if case_sensitive else re.IGNORECASE pattern = re.escape(query) matches = [] for match in re.finditer(pattern, content, flags): start_pos = match.start() end_pos = match.end() # Calculate context boundaries context_start = max(0, start_pos - context_chars) context_end = min(len(content), end_pos + context_chars) # Extract context context = content[context_start:context_end] # Calculate relative position within context match_start_in_context = start_pos - context_start match_end_in_context = end_pos - context_start matches.append({ "match_text": match.group(), "position": start_pos, "context": context, "context_start": context_start, "context_end": context_end, "match_start_in_context": match_start_in_context, "match_end_in_context": match_end_in_context }) return matches def _get_total_indexed_books(self) -> int: """Get total number of indexed books.""" try: conn = sqlite3.connect(self.fts_db_path) cursor = conn.cursor() cursor.execute("SELECT COUNT(DISTINCT book) FROM books_text") count = cursor.fetchone()[0] conn.close() return count except sqlite3.Error: return 0 def ping(self) -> Dict[str, Any]: """Test connection to FTS database.""" try: conn = sqlite3.connect(self.fts_db_path) cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM books_text LIMIT 1") conn.close() return { "status": "success", "message": "FTS database connection successful", "database_path": self.fts_db_path } except sqlite3.Error as e: return { "status": "error", "message": f"FTS database connection failed: {e}", "database_path": self.fts_db_path } except Exception as e: return { "status": "error", "message": f"Unexpected error: {e}", "database_path": self.fts_db_path }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/pshap/mcp-neolibrarian'

If you have feedback or need assistance with the MCP directory API, please join our Discord server