Skip to main content
Glama
metadata_sqlite.py9.81 kB
import os import sqlite3 from pathlib import Path from typing import List, Optional, Tuple from ..core.models import FulltextHit class MetadataRepository(object): """Access Calibre's metadata.db using SQLite. This repository focuses on read-only operations that are needed for research: basic search and looking up books by ISBN. """ def __init__(self, library_root: str): self._root = library_root self._db_path = os.path.join(library_root, "metadata.db") def _connect(self) -> sqlite3.Connection: """Open a new read-only SQLite connection to metadata.db.""" db_file = Path(self._db_path) if not db_file.exists(): raise FileNotFoundError( f"metadata.db wurde nicht gefunden unter: {db_file}. " "Bitte CALIBRE_LIBRARY_PATH korrekt setzen." ) # Use URI mode to enforce read-only access. uri = f"file:{self._db_path}?mode=ro" try: return sqlite3.connect(uri, uri=True) except sqlite3.OperationalError as exc: # pragma: no cover raise RuntimeError( f"metadata.db konnte nicht geoeffnet werden ({db_file}): {exc}" ) from exc def _parse_boolean_query(self, raw: str) -> List[List[str]]: """Parse a simple boolean query string into OR-of-AND keyword groups. Beispiel: - "fahrzeug AND bussysteme" -> [["fahrzeug", "bussysteme"]] - "fahrzeug AND bussysteme OR ethernet" -> [["fahrzeug", "bussysteme"], ["ethernet"]] Unterstuetzt nur die Operatoren AND/OR (case-insensitive) ohne Klammern und NOT. Alles andere wird als normales Suchwort behandelt. """ import re raw = (raw or "").strip() if not raw: return [] # Tokenize: Worte und Operatoren AND/OR # Wir splitten zunaechst auf Whitespace/Komma/Semikolon und behandeln # dann explizit 'and'/'or' als Operatoren. rough_tokens = [t for t in re.split(r"[\s,;]+", raw) if t] tokens: List[str] = [] for tok in rough_tokens: if tok.upper() in ("AND", "OR"): tokens.append(tok.upper()) else: tokens.append(tok) if not tokens: return [] groups: List[List[str]] = [[]] current = groups[0] last_op = "AND" for tok in tokens: if tok == "AND": last_op = "AND" continue if tok == "OR": last_op = "OR" continue # Normales Suchwort if last_op == "OR" and current: # Neue OR-Gruppe beginnen current = [tok] groups.append(current) else: # AND oder Start current.append(tok) last_op = "AND" # Leere Gruppen entfernen groups = [g for g in groups if g] return groups def search_fulltext(self, query: str, limit: int) -> List[FulltextHit]: """Search in title, ISBN and comments using simple LIKE matching. Unterstuetzt eine kleine Teilmenge der Calibre-FT-Sprache: - Mehrere Suchbegriffe - Operatoren AND / OR (case-insensitive), ohne Klammern/NOT Beispiele: - "fahrzeug AND bussysteme" - "fahrzeug OR auto" - "fahrzeug bussysteme" (enthaelt implizites AND ueber alle Worte) """ raw = (query or "").strip() if not raw: return [] groups = self._parse_boolean_query(raw) if not groups: return [] hits: List[FulltextHit] = [] # Einzelne LIKE-Klausel fuer ein Suchwort base_clause = "(" + " OR ".join( [ "lower(b.title) LIKE lower(?)", "lower(COALESCE(b.isbn, '')) LIKE lower(?)", "lower(COALESCE(c.text, '')) LIKE lower(?)", ] ) + ")" # OR ueber Gruppen, AND ueber Begriffe innerhalb einer Gruppe or_clauses = [] params: List[str] = [] for group in groups: # implizites AND ueber alle Begriffe in der Gruppe and_clauses = [base_clause for _ in group] or_clauses.append("(" + " AND ".join(and_clauses) + ")") for term in group: pattern = f"%{term}%" params.extend([pattern, pattern, pattern]) where_sql = " OR ".join(or_clauses) sql = f""" SELECT b.id AS book_id, b.title AS title, b.isbn AS isbn, COALESCE(c.text, '') AS comments FROM books b LEFT JOIN comments c ON c.book = b.id WHERE {where_sql} ORDER BY b.id LIMIT ? """ params.append(limit) with self._connect() as conn: conn.row_factory = sqlite3.Row cur = conn.cursor() cur.execute(sql, params) rows = cur.fetchall() for row in rows: comments = row["comments"] or "" snippet = self._build_snippet(comments, raw) if not snippet: snippet = row["title"] or "" # ISBN bevorzugt aus identifiers/normalisiert nachladen isbn = row["isbn"] if not isbn: isbn = self._lookup_isbn_for_book(conn, row["book_id"]) or None hits.append( FulltextHit( book_id=row["book_id"], title=row["title"], isbn=isbn, snippet=snippet, ) ) return hits from typing import List, Optional, Tuple # ... Rest der Importe bleibt def get_book_by_isbn(self, isbn: str) -> Optional[Tuple[int, str, Optional[str], str]]: """Look up a single book by ISBN and return its metadata plus comments. The lookup tries both books.isbn and the identifiers table. This is necessary because Calibre often stores ISBNs only in identifiers. """ # Normalize requested ISBN: keep digits and 'X' only normalized_chars = [] for ch in isbn: if ch.isdigit() or ch.upper() == "X": normalized_chars.append(ch) normalized = "".join(normalized_chars) if not normalized: return None # 1) Try identifiers table (isbn / isbn13) – das ist in deiner DB der Fall sql_ident = """ SELECT b.id AS book_id, b.title AS title, COALESCE(b.isbn, i.val) AS isbn, COALESCE(c.text, '') AS comments FROM books b JOIN identifiers i ON i.book = b.id LEFT JOIN comments c ON c.book = b.id WHERE lower(i.type) IN ('isbn', 'isbn13') AND REPLACE(REPLACE(i.val, '-', ''), ' ', '') = ? LIMIT 1 """ with self._connect() as conn: conn.row_factory = sqlite3.Row cur = conn.cursor() cur.execute(sql_ident, (normalized,)) row = cur.fetchone() if row is not None: return ( row["book_id"], row["title"], row["isbn"], row["comments"], ) # 2) Fallback: direct match on books.isbn (für andere Bücher) sql_books = """ SELECT b.id AS book_id, b.title AS title, b.isbn AS isbn, COALESCE(c.text, '') AS comments FROM books b LEFT JOIN comments c ON c.book = b.id WHERE REPLACE(REPLACE(COALESCE(b.isbn, ''), '-', ''), ' ', '') = ? LIMIT 1 """ cur.execute(sql_books, (normalized,)) row = cur.fetchone() if row is None: return None return ( row["book_id"], row["title"], row["isbn"], row["comments"], ) @staticmethod def _build_snippet(text: str, query: str, window: int = 200) -> str: """Build a small snippet around the first occurrence of query. If the query is not found, return the first window characters. """ cleaned = text.strip() if not cleaned: return "" lower_text = cleaned.lower() lower_query = query.lower() idx = lower_text.find(lower_query) if idx < 0: return cleaned[:window] start = max(idx - window // 2, 0) end = start + window return cleaned[start:end] def _lookup_isbn_for_book(self, conn: sqlite3.Connection, book_id: int) -> Optional[str]: """Versuche, eine "beste" ISBN fuer ein Buch zu ermitteln. Reihenfolge: 1) identifiers (isbn / isbn13), bereinigt von '-', ' ' 2) books.isbn, bereinigt """ cur = conn.cursor() # 1) identifiers sql_ident = """ SELECT i.val FROM identifiers i WHERE i.book = ? AND lower(i.type) IN ('isbn', 'isbn13') LIMIT 1 """ cur.execute(sql_ident, (book_id,)) row = cur.fetchone() if row and row[0]: val = str(row[0]) return val.strip() # 2) books.isbn sql_book = "SELECT isbn FROM books WHERE id = ? LIMIT 1" cur.execute(sql_book, (book_id,)) row = cur.fetchone() if row and row[0]: return str(row[0]).strip() return None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Miguel0888/mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server