"""
Embedding engine for Ember V3.
Primary: sentence-transformers (all-MiniLM-L6-v2, 384 dims, ~90MB)
Fallback: FTS5 keyword search when sentence-transformers not installed.
The engine gracefully degrades — Ember always works, just with
reduced retrieval quality in FTS5-only mode.
"""
from __future__ import annotations
import sys
import logging
from typing import Optional, List
import numpy as np
logger = logging.getLogger("ember.embeddings")
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
EMBEDDING_DIM = 384
class EmbeddingEngine:
"""Manages embedding generation with graceful fallback to FTS5."""
def __init__(self):
self.mode: str = "unknown"
self.model = None
self._initialize()
def _initialize(self):
try:
from sentence_transformers import SentenceTransformer
self.model = SentenceTransformer(EMBEDDING_MODEL)
self.mode = "semantic"
logger.info("Embedding engine: semantic mode (all-MiniLM-L6-v2)")
except ImportError:
self.mode = "fts5_only"
logger.warning(
"sentence-transformers not found — using FTS5 keyword search. "
"Install with: pip install sentence-transformers"
)
@property
def is_semantic(self) -> bool:
return self.mode == "semantic"
def embed(self, text: str) -> Optional[bytes]:
"""Embed text and return serialized float32 bytes, or None in FTS5 mode."""
if not self.is_semantic or self.model is None:
return None
vector = self.model.encode(text, normalize_embeddings=True)
return vector.astype(np.float32).tobytes()
def embed_batch(self, texts: list[str]) -> list[Optional[bytes]]:
"""Embed multiple texts. Returns list of serialized vectors."""
if not self.is_semantic or self.model is None:
return [None] * len(texts)
vectors = self.model.encode(texts, normalize_embeddings=True, batch_size=32)
return [v.astype(np.float32).tobytes() for v in vectors]
def deserialize(self, blob: bytes) -> np.ndarray:
"""Deserialize embedding blob back to numpy array."""
return np.frombuffer(blob, dtype=np.float32).copy()
def similarity(self, vec_a: bytes, vec_b: bytes) -> float:
"""Cosine similarity between two serialized vectors. Both are L2-normalized."""
a = self.deserialize(vec_a)
b = self.deserialize(vec_b)
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
if norm_a == 0 or norm_b == 0:
return 0.0
return float(np.dot(a, b) / (norm_a * norm_b))
def bulk_similarity(self, query_blob: bytes, candidate_blobs: list[bytes]) -> list[float]:
"""Compute cosine similarity between query and multiple candidates."""
if not candidate_blobs:
return []
query = self.deserialize(query_blob)
norm_q = np.linalg.norm(query)
if norm_q == 0:
return [0.0] * len(candidate_blobs)
query_normed = query / norm_q
scores = []
for blob in candidate_blobs:
cand = self.deserialize(blob)
norm_c = np.linalg.norm(cand)
if norm_c == 0:
scores.append(0.0)
else:
scores.append(float(np.dot(query_normed, cand / norm_c)))
return scores