Skip to main content
Glama

BuildAutomata Memory MCP Server

by brucepro
embeddings.py4.96 kB
""" Embedding generation for BuildAutomata Memory System Copyright 2025 Jurden Bruce """ import hashlib import logging import traceback from typing import List, Dict, Any from datetime import datetime logger = logging.getLogger("buildautomata-memory.embeddings") # Check availability without importing the heavy library try: import importlib.util EMBEDDINGS_AVAILABLE = importlib.util.find_spec("sentence_transformers") is not None except Exception: EMBEDDINGS_AVAILABLE = False if not EMBEDDINGS_AVAILABLE: logger.warning("SentenceTransformers not available - using fallback embeddings") class EmbeddingGenerator: """Handles text embedding generation with caching and fallback""" def __init__(self, config: Dict[str, Any], embedding_cache, error_log: List[Dict[str, Any]], lazy_load: bool = False): """ Initialize embedding generator Args: config: Configuration dict with 'vector_size' key embedding_cache: LRUCache for caching embeddings error_log: Shared error log list lazy_load: If True, delay encoder initialization until first use """ self.config = config self.embedding_cache = embedding_cache self.error_log = error_log self.lazy_load = lazy_load self.encoder = None self._encoder_initialized = False if not lazy_load: self._init_encoder() self._encoder_initialized = True def _ensure_encoder(self): """Ensure encoder is initialized (lazy loading support)""" if self._encoder_initialized or not self.lazy_load: return import time start = time.perf_counter() self._init_encoder() self._encoder_initialized = True logger.info(f"[LAZY] Encoder loaded on-demand in {(time.perf_counter() - start)*1000:.2f}ms") def _init_encoder(self): """Initialize sentence encoder""" if not EMBEDDINGS_AVAILABLE: logger.warning("SentenceTransformers not available, using fallback") return try: # Import only when actually needed (lazy loading) from sentence_transformers import SentenceTransformer self.encoder = SentenceTransformer("all-mpnet-base-v2", device="cpu") # Model dimension is fixed at 768 for all-mpnet-base-v2 # Only test if config disagrees (first-time init or model change) expected_size = 768 if self.config["vector_size"] != expected_size: logger.info(f"Verifying encoder dimension (config mismatch: {self.config['vector_size']} != {expected_size})") test_embedding = self.encoder.encode("test") actual_size = len(test_embedding) if actual_size != self.config["vector_size"]: logger.warning(f"Encoder size {actual_size} != config {self.config['vector_size']}, updating config") self.config["vector_size"] = actual_size logger.info(f"Encoder initialized with dimension {actual_size}") else: logger.info(f"Encoder initialized with dimension {expected_size}") except Exception as e: logger.error(f"Encoder initialization failed: {e}") self._log_error("encoder_init", e) self.encoder = None def _log_error(self, operation: str, error: Exception): """Log detailed error information""" error_entry = { "timestamp": datetime.now().isoformat(), "operation": operation, "error_type": type(error).__name__, "error_msg": str(error), "traceback": traceback.format_exc(), } self.error_log.append(error_entry) if len(self.error_log) > 100: self.error_log = self.error_log[-100:] def generate_embedding(self, text: str) -> List[float]: """Generate embedding with caching""" self._ensure_encoder() text_hash = hashlib.md5(text.encode()).hexdigest() if text_hash in self.embedding_cache: return self.embedding_cache[text_hash] if self.encoder: embedding = self.encoder.encode(text).tolist() else: # Fallback using repeated hash embedding = [] hash_input = text.encode() while len(embedding) < self.config["vector_size"]: hash_obj = hashlib.sha256(hash_input) hash_bytes = hash_obj.digest() embedding.extend([float(b) / 255.0 for b in hash_bytes]) hash_input = hash_bytes embedding = embedding[:self.config["vector_size"]] self.embedding_cache[text_hash] = embedding return embedding def is_available(self) -> bool: """Check if real embeddings (not fallback) are available""" return self.encoder is not None

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/brucepro/buildautomata_memory_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server