Skip to main content
Glama
backfill_embeddings.py6.8 kB
"""Backfill embeddings tool - generate embeddings for memories that lack them.""" from typing import TYPE_CHECKING, Any from ..context import db, mcp from ..security.validators import validate_positive_int if TYPE_CHECKING: from sentence_transformers import SentenceTransformer # Optional dependency for embeddings _SentenceTransformer: "type[SentenceTransformer] | None" try: from sentence_transformers import SentenceTransformer _SentenceTransformer = SentenceTransformer SENTENCE_TRANSFORMERS_AVAILABLE = True except ImportError: _SentenceTransformer = None SENTENCE_TRANSFORMERS_AVAILABLE = False @mcp.tool() def backfill_embeddings( model: str = "all-MiniLM-L6-v2", limit: int | None = None, force: bool = False, dry_run: bool = False, ) -> dict[str, Any]: """ Generate embeddings for memories that don't have them. This tool backfills embedding vectors for existing memories, enabling semantic search and improved clustering. Embeddings are generated using sentence-transformers models (default: all-MiniLM-L6-v2). **When to use:** - After importing memories without embeddings - When switching from text-only to semantic search - Before running consolidation with embedding-based clustering - To enable similarity-based features **Safety features:** - dry_run: Preview what would be processed without making changes - limit: Process only N memories (useful for testing or incremental backfill) - force: Regenerate embeddings even if they exist (for model upgrades) Args: model: Sentence-transformers model name (default: "all-MiniLM-L6-v2"). Common alternatives: "all-mpnet-base-v2" (higher quality, slower), "paraphrase-MiniLM-L6-v2" (good for paraphrase detection). limit: Maximum number of memories to process (1-10,000). If None, processes all. force: If True, regenerate embeddings even if they exist (for model upgrades). dry_run: If True, show what would be done without actually doing it. Returns: Result dictionary with: - success: Whether operation completed successfully - dry_run: Whether this was a dry run - processed: Number of memories processed (0 if dry_run) - errors: Number of errors encountered - model: Model name used - total_memories: Total memories in database - memories_without_embeddings: Count of memories lacking embeddings - would_process: Number of memories that would be processed (dry_run only) - message: Human-readable summary Raises: ValueError: If limit is out of valid range. ImportError: If sentence-transformers is not installed. Example: # Preview what would be backfilled backfill_embeddings(dry_run=True) # Backfill 10 memories for testing backfill_embeddings(limit=10) # Backfill all memories without embeddings backfill_embeddings() # Force regenerate with better model backfill_embeddings(model="all-mpnet-base-v2", force=True) """ # Check if sentence-transformers is available if not SENTENCE_TRANSFORMERS_AVAILABLE: return { "success": False, "error": "sentence-transformers not installed", "hint": "Install with: pip install sentence-transformers", } # Input validation if limit is not None: limit = validate_positive_int(limit, "limit", max_value=10000) # Get all memories memories = db.list_memories() total_count = len(memories) # Filter to those without embeddings (or all if force=True) if force: targets = memories[:limit] if limit else memories message_prefix = "Would regenerate" if dry_run else "Regenerating" else: targets = [m for m in memories if m.embed is None] if limit: targets = targets[:limit] message_prefix = "Would backfill" if dry_run else "Backfilled" without_embeddings = len([m for m in memories if m.embed is None]) # Handle case where nothing needs processing if not targets: return { "success": True, "dry_run": dry_run, "processed": 0, "errors": 0, "model": model, "total_memories": total_count, "memories_without_embeddings": without_embeddings, "message": "No memories need embeddings backfill", } # Dry run - return preview if dry_run: return { "success": True, "dry_run": True, "processed": 0, "errors": 0, "model": model, "total_memories": total_count, "memories_without_embeddings": without_embeddings, "would_process": len(targets), "message": f"Dry run: Would process {len(targets)} memories with model {model}", } # Load embedding model if _SentenceTransformer is None: return { "success": False, "error": "sentence-transformers not available", "model": model, } try: embedding_model = _SentenceTransformer(model) except Exception as e: return { "success": False, "error": f"Failed to load model {model}", "details": str(e), "hint": "Check model name or try default: all-MiniLM-L6-v2", } # Process memories processed = 0 errors = 0 error_details = [] for memory in targets: try: # Generate embedding embedding = embedding_model.encode(memory.content, convert_to_numpy=True) memory.embed = embedding.tolist() # Save back to storage db.save_memory(memory) processed += 1 except Exception as e: errors += 1 error_details.append( { "memory_id": memory.id, "error": str(e), } ) # Continue processing other memories continue # Build result result = { "success": errors == 0, "dry_run": False, "processed": processed, "errors": errors, "model": model, "total_memories": total_count, "memories_without_embeddings": without_embeddings - processed, # Updated count "message": f"{message_prefix} embeddings for {processed} memories", } # Include error details if any errors occurred if errors > 0: result["error_details"] = error_details[:10] # Limit to first 10 errors if errors > 10: result["additional_errors"] = errors - 10 return result

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/prefrontalsys/mnemex'

If you have feedback or need assistance with the MCP directory API, please join our Discord server