Daemon-MCP

Daemon-MCP
daem0nmcp

vectors.py•6.48 KiB

""" Vector Embeddings - Semantic understanding with sentence-transformers. This module provides: - Vector embeddings for memories (enhances TF-IDF semantic matching) - Hybrid search combining TF-IDF + vector similarity - Efficient storage of vectors in SQLite """ import logging import struct from typing import Dict, List, Optional, Tuple from sentence_transformers import SentenceTransformer import numpy as np from .config import settings logger = logging.getLogger(__name__) # Global model instance (lazy loaded, shared across all contexts) _model: Optional[SentenceTransformer] = None def is_available() -> bool: """Check if vector embeddings are available. Always True since deps are core.""" return True def _get_model() -> SentenceTransformer: """Get or create the embedding model (lazy loading, shared across contexts).""" global _model if _model is None: logger.info(f"Loading embedding model ({settings.embedding_model})...") _model = SentenceTransformer(settings.embedding_model) logger.info("Embedding model loaded.") return _model def encode(text: str) -> Optional[bytes]: """ Encode text to a vector embedding. Returns None if vectors are not available. Returns bytes (packed floats) for storage in SQLite. """ model = _get_model() if model is None: return None # Generate embedding embedding = model.encode(text, convert_to_numpy=True) # Pack as bytes for SQLite storage return struct.pack(f'{len(embedding)}f', *embedding) def decode(data: bytes) -> Optional[List[float]]: """Decode vector bytes back to a list of floats.""" if not data: return None num_floats = len(data) // 4 # 4 bytes per float return list(struct.unpack(f'{num_floats}f', data)) def cosine_similarity(vec1: List[float], vec2: List[float]) -> float: """Compute cosine similarity between two vectors.""" a = np.array(vec1) b = np.array(vec2) dot_product = np.dot(a, b) norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) if norm_a == 0 or norm_b == 0: return 0.0 return float(dot_product / (norm_a * norm_b)) class VectorIndex: """ In-memory vector index for fast similarity search. Stores vectors keyed by document ID and supports batch similarity queries. """ def __init__(self): self.vectors: Dict[int, List[float]] = {} def add(self, doc_id: int, text: str) -> bool: """Add a document to the index.""" model = _get_model() embedding = model.encode(text, convert_to_numpy=True) self.vectors[doc_id] = embedding.tolist() return True def add_from_bytes(self, doc_id: int, data: bytes) -> bool: """Add a pre-computed vector from bytes.""" vec = decode(data) if vec: self.vectors[doc_id] = vec return True return False def remove(self, doc_id: int) -> None: """Remove a document from the index.""" self.vectors.pop(doc_id, None) def search(self, query: str, top_k: int = 10, threshold: float = 0.3) -> List[Tuple[int, float]]: """ Search for similar documents. Args: query: Query text top_k: Maximum results threshold: Minimum similarity score Returns: List of (doc_id, similarity) tuples, sorted by similarity descending. """ if not self.vectors: return [] model = _get_model() # Encode query query_vec = model.encode(query, convert_to_numpy=True) # Compute similarities results = [] for doc_id, doc_vec in self.vectors.items(): sim = cosine_similarity(query_vec.tolist(), doc_vec) if sim >= threshold: results.append((doc_id, sim)) # Sort by similarity descending results.sort(key=lambda x: x[1], reverse=True) return results[:top_k] def __len__(self) -> int: return len(self.vectors) class HybridSearch: """ Hybrid search combining TF-IDF and vector similarity. Uses TF-IDF as primary with vector boosting for enhanced semantic matching. """ def __init__(self, tfidf_index, vector_index: Optional[VectorIndex] = None): self.tfidf = tfidf_index self.vectors = vector_index or VectorIndex() self.vector_weight = settings.hybrid_vector_weight # Configurable via DAEM0NMCP_HYBRID_VECTOR_WEIGHT def search( self, query: str, top_k: int = 10, tfidf_threshold: float = 0.1, vector_threshold: float = 0.3 ) -> List[Tuple[int, float]]: """ Hybrid search combining TF-IDF and vector similarity. Combines scores: final_score = (1 - weight) * tfidf_score + weight * vector_score Falls back to TF-IDF only if vector index is empty. """ # Get TF-IDF results tfidf_results = self.tfidf.search(query, top_k=top_k * 2, threshold=tfidf_threshold) tfidf_scores = {doc_id: score for doc_id, score in tfidf_results} # If vectors available, get vector results if len(self.vectors) > 0: vector_results = self.vectors.search(query, top_k=top_k * 2, threshold=vector_threshold) vector_scores = {doc_id: score for doc_id, score in vector_results} # Combine scores all_docs = set(tfidf_scores.keys()) | set(vector_scores.keys()) combined = [] for doc_id in all_docs: tfidf_score = tfidf_scores.get(doc_id, 0.0) vector_score = vector_scores.get(doc_id, 0.0) # Weighted combination final_score = ( (1 - self.vector_weight) * tfidf_score + self.vector_weight * vector_score ) combined.append((doc_id, final_score)) combined.sort(key=lambda x: x[1], reverse=True) return combined[:top_k] # Fall back to TF-IDF only if no vectors indexed return tfidf_results[:top_k] # Global vector index instance _global_vector_index: Optional[VectorIndex] = None def get_vector_index() -> VectorIndex: """Get or create the global vector index.""" global _global_vector_index if _global_vector_index is None: _global_vector_index = VectorIndex() return _global_vector_index def reset_vector_index() -> None: """Reset the global vector index (useful for testing).""" global _global_vector_index _global_vector_index = None

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DasBluEyedDevil/Daemon-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

vectors.py•6.48 KiB