Skip to main content
Glama
maintenance.py5.67 kB
"""Maintenance CLI for JSONL storage. Expose storage statistics, compaction, and embeddings backfill operations. """ from __future__ import annotations import argparse import json from pathlib import Path from typing import TYPE_CHECKING from .jsonl_storage import JSONLStorage if TYPE_CHECKING: from sentence_transformers import SentenceTransformer # Optional dependency for embeddings _SentenceTransformer: type[SentenceTransformer] | None try: from sentence_transformers import SentenceTransformer _SentenceTransformer = SentenceTransformer SENTENCE_TRANSFORMERS_AVAILABLE = True except ImportError: _SentenceTransformer = None SENTENCE_TRANSFORMERS_AVAILABLE = False def cmd_stats(storage_path: Path | None) -> int: storage = JSONLStorage(storage_path=storage_path) storage.connect() stats = storage.get_storage_stats() print(json.dumps(stats, indent=2)) return 0 def cmd_compact(storage_path: Path | None, *, quiet: bool = False) -> int: storage = JSONLStorage(storage_path=storage_path) storage.connect() before = storage.get_storage_stats() result = storage.compact() after = storage.get_storage_stats() if quiet: print(json.dumps({"result": result}, indent=2)) else: print("Before:") print(json.dumps(before, indent=2)) print("\nCompaction:") print(json.dumps(result, indent=2)) print("\nAfter:") print(json.dumps(after, indent=2)) return 0 def cmd_backfill_embeddings( storage_path: Path | None, *, model: str = "all-MiniLM-L6-v2", limit: int | None = None, force: bool = False, dry_run: bool = False, ) -> int: """Backfill embeddings for memories that don't have them.""" if not SENTENCE_TRANSFORMERS_AVAILABLE: print(json.dumps({"error": "sentence-transformers not installed"}, indent=2)) return 1 storage = JSONLStorage(storage_path=storage_path) storage.connect() # Get all memories memories = storage.list_memories() # Filter to those without embeddings (or all if force=True) if force: targets = memories[:limit] if limit else memories else: targets = [m for m in memories if m.embed is None] if limit: targets = targets[:limit] if not targets: print(json.dumps({"message": "No memories need embeddings backfill"}, indent=2)) return 0 print(f"{'DRY RUN: ' if dry_run else ''}Processing {len(targets)} memories...") if dry_run: print( json.dumps( { "dry_run": True, "total_memories": len(memories), "memories_without_embeddings": len([m for m in memories if m.embed is None]), "would_process": len(targets), }, indent=2, ) ) return 0 # Load model if _SentenceTransformer is None: print("ERROR: sentence-transformers not available") return 1 print(f"Loading model: {model}...") embedding_model = _SentenceTransformer(model) # Process memories processed = 0 errors = 0 for i, memory in enumerate(targets, 1): try: # Generate embedding embedding = embedding_model.encode(memory.content, convert_to_numpy=True) memory.embed = embedding.tolist() # Save back to storage storage.save_memory(memory) processed += 1 if i % 10 == 0: print(f" Processed {i}/{len(targets)}...") except Exception as e: errors += 1 print(f" Error processing memory {memory.id}: {e}") result = { "success": True, "processed": processed, "errors": errors, "model": model, "message": f"Backfilled embeddings for {processed} memories", } print(json.dumps(result, indent=2)) return 0 if errors == 0 else 1 def main() -> int: parser = argparse.ArgumentParser(description="JSONL storage maintenance") parser.add_argument( "--storage-path", type=Path, help="Override storage path (defaults to STM_STORAGE_PATH or config)", ) sub = parser.add_subparsers(dest="command", required=True) p_stats = sub.add_parser("stats", help="Show storage stats") p_stats.set_defaults(func=lambda args: cmd_stats(args.storage_path)) p_compact = sub.add_parser("compact", help="Compact JSONL files") p_compact.add_argument("--quiet", action="store_true", help="Only print compaction result") p_compact.set_defaults(func=lambda args: cmd_compact(args.storage_path, quiet=args.quiet)) p_backfill = sub.add_parser( "backfill-embeddings", help="Generate embeddings for memories without them" ) p_backfill.add_argument("--model", default="all-MiniLM-L6-v2", help="Embedding model to use") p_backfill.add_argument("--limit", type=int, help="Maximum number of memories to process") p_backfill.add_argument( "--force", action="store_true", help="Regenerate embeddings even if they exist" ) p_backfill.add_argument( "--dry-run", action="store_true", help="Show what would be done without doing it" ) p_backfill.set_defaults( func=lambda args: cmd_backfill_embeddings( args.storage_path, model=args.model, limit=args.limit, force=args.force, dry_run=args.dry_run, ) ) args = parser.parse_args() return int(args.func(args) or 0) if __name__ == "__main__": raise SystemExit(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/prefrontalsys/mnemex'

If you have feedback or need assistance with the MCP directory API, please join our Discord server