MCP Memory Service

Apache 2.0

689

Overview InspectNew Endpoints Schema Related Servers Reviews Score

mcp-memory-service
scripts

#!/usr/bin/env python3
"""
Repair script to fix existing SQLite-vec databases without full migration.

This script attempts to repair the database in-place by:
1. Checking the current state
2. Regenerating missing embeddings
3. Fixing dimension mismatches if possible
"""

import asyncio
import os
import sys
import sqlite3
import logging
from typing import List, Tuple

# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

try:
    import sqlite_vec
    from sqlite_vec import serialize_float32
    SQLITE_VEC_AVAILABLE = True
except ImportError:
    SQLITE_VEC_AVAILABLE = False

try:
    from sentence_transformers import SentenceTransformer
    SENTENCE_TRANSFORMERS_AVAILABLE = True
except ImportError:
    SENTENCE_TRANSFORMERS_AVAILABLE = False

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class SqliteVecRepair:
    """Repair SQLite-vec database embeddings."""
    
    def __init__(self, db_path: str, model_name: str = "all-MiniLM-L6-v2"):
        self.db_path = db_path
        self.model_name = model_name
        self.conn = None
        self.model = None
        self.embedding_dimension = 384  # Default for all-MiniLM-L6-v2
        
    def check_dependencies(self):
        """Check required dependencies."""
        print("Checking dependencies...")
        
        if not SQLITE_VEC_AVAILABLE:
            print("❌ sqlite-vec not installed. Run: pip install sqlite-vec")
            return False
            
        if not SENTENCE_TRANSFORMERS_AVAILABLE:
            print("❌ sentence-transformers not installed. Run: pip install sentence-transformers torch")
            return False
            
        print("✅ All dependencies available")
        return True
        
    def connect_database(self):
        """Connect to the database."""
        print(f"\nConnecting to database: {self.db_path}")
        
        if not os.path.exists(self.db_path):
            raise FileNotFoundError(f"Database not found: {self.db_path}")
            
        self.conn = sqlite3.connect(self.db_path)
        self.conn.enable_load_extension(True)
        sqlite_vec.load(self.conn)
        self.conn.enable_load_extension(False)
        
        print("✅ Connected to database")
        
    def analyze_database(self) -> dict:
        """Analyze the current state of the database."""
        print("\nAnalyzing database...")
        
        analysis = {
            "memory_count": 0,
            "embedding_count": 0,
            "missing_embeddings": 0,
            "embedding_dimension": None,
            "issues": []
        }
        
        # Count memories
        cursor = self.conn.execute("SELECT COUNT(*) FROM memories")
        analysis["memory_count"] = cursor.fetchone()[0]
        
        # Count embeddings
        cursor = self.conn.execute("SELECT COUNT(*) FROM memory_embeddings")
        analysis["embedding_count"] = cursor.fetchone()[0]
        
        # Check embedding dimension
        cursor = self.conn.execute("""
            SELECT sql FROM sqlite_master 
            WHERE type='table' AND name='memory_embeddings'
        """)
        schema = cursor.fetchone()
        if schema:
            # Extract dimension from schema
            import re
            match = re.search(r'FLOAT\[(\d+)\]', schema[0])
            if match:
                analysis["embedding_dimension"] = int(match.group(1))
                
        # Find memories without embeddings
        cursor = self.conn.execute("""
            SELECT COUNT(*) FROM memories m
            WHERE NOT EXISTS (
                SELECT 1 FROM memory_embeddings e WHERE e.rowid = m.id
            )
        """)
        analysis["missing_embeddings"] = cursor.fetchone()[0]
        
        # Identify issues
        if analysis["memory_count"] != analysis["embedding_count"]:
            analysis["issues"].append(
                f"Mismatch: {analysis['memory_count']} memories vs {analysis['embedding_count']} embeddings"
            )
            
        if analysis["missing_embeddings"] > 0:
            analysis["issues"].append(
                f"Missing embeddings: {analysis['missing_embeddings']} memories have no embeddings"
            )
            
        print(f"  Memories: {analysis['memory_count']}")
        print(f"  Embeddings: {analysis['embedding_count']}")
        print(f"  Missing embeddings: {analysis['missing_embeddings']}")
        print(f"  Embedding dimension: {analysis['embedding_dimension']}")
        
        if analysis["issues"]:
            print("\n⚠️  Issues found:")
            for issue in analysis["issues"]:
                print(f"  - {issue}")
        else:
            print("\n✅ No issues found")
            
        return analysis
        
    def load_model(self):
        """Load the embedding model."""
        print(f"\nLoading embedding model: {self.model_name}")
        
        self.model = SentenceTransformer(self.model_name)
        
        # Get actual dimension
        test_embedding = self.model.encode(["test"], convert_to_numpy=True)
        self.embedding_dimension = test_embedding.shape[1]
        
        print(f"✅ Model loaded (dimension: {self.embedding_dimension})")
        
    def generate_missing_embeddings(self, analysis: dict) -> int:
        """Generate embeddings for memories that don't have them."""
        if analysis["missing_embeddings"] == 0:
            return 0
            
        print(f"\nGenerating {analysis['missing_embeddings']} missing embeddings...")
        
        # Check if dimensions match
        if analysis["embedding_dimension"] and analysis["embedding_dimension"] != self.embedding_dimension:
            print(f"⚠️  WARNING: Database expects dimension {analysis['embedding_dimension']}, "
                  f"but model produces dimension {self.embedding_dimension}")
            print("   This may cause errors. Consider full migration instead.")
            response = input("\nContinue anyway? (y/N): ").strip().lower()
            if response != 'y':
                return 0
                
        # Find memories without embeddings
        cursor = self.conn.execute("""
            SELECT m.id, m.content FROM memories m
            WHERE NOT EXISTS (
                SELECT 1 FROM memory_embeddings e WHERE e.rowid = m.id
            )
        """)
        
        memories_to_fix = cursor.fetchall()
        fixed_count = 0
        
        for memory_id, content in memories_to_fix:
            try:
                # Generate embedding
                embedding = self.model.encode([content], convert_to_numpy=True)[0]
                
                # Insert embedding
                self.conn.execute(
                    "INSERT INTO memory_embeddings (rowid, content_embedding) VALUES (?, ?)",
                    (memory_id, serialize_float32(embedding))
                )
                
                fixed_count += 1
                
                # Show progress
                if fixed_count % 10 == 0:
                    print(f"  ... {fixed_count}/{len(memories_to_fix)} embeddings generated")
                    
            except Exception as e:
                logger.error(f"Failed to generate embedding for memory {memory_id}: {e}")
                
        self.conn.commit()
        print(f"✅ Generated {fixed_count} embeddings")
        
        return fixed_count
        
    def verify_search(self) -> bool:
        """Test if semantic search works."""
        print("\nTesting semantic search...")
        
        try:
            # Generate a test query embedding
            test_query = "test query"
            query_embedding = self.model.encode([test_query], convert_to_numpy=True)[0]
            
            # Try to search
            cursor = self.conn.execute("""
                SELECT COUNT(*) FROM memory_embeddings 
                WHERE content_embedding MATCH ?
                LIMIT 1
            """, (serialize_float32(query_embedding),))
            
            cursor.fetchone()
            print("✅ Semantic search is working")
            return True
            
        except Exception as e:
            print(f"❌ Semantic search failed: {e}")
            return False
            
    def run_repair(self):
        """Run the repair process."""
        print("\n" + "="*60)
        print("SQLite-vec Embedding Repair Tool")
        print("="*60)
        
        try:
            # Check dependencies
            if not self.check_dependencies():
                return
                
            # Connect to database
            self.connect_database()
            
            # Analyze current state
            analysis = self.analyze_database()
            
            if not analysis["issues"]:
                print("\n✅ Database appears healthy, no repair needed")
                return
                
            # Load model
            self.load_model()
            
            # Fix missing embeddings
            fixed = self.generate_missing_embeddings(analysis)
            
            # Verify search works
            self.verify_search()
            
            # Re-analyze
            print("\nRe-analyzing database after repair...")
            new_analysis = self.analyze_database()
            
            print("\n" + "="*60)
            print("Repair Summary")
            print("="*60)
            print(f"Fixed {fixed} missing embeddings")
            
            if new_analysis["issues"]:
                print("\n⚠️  Some issues remain:")
                for issue in new_analysis["issues"]:
                    print(f"  - {issue}")
                print("\nConsider running the full migration script instead.")
            else:
                print("\n✅ All issues resolved!")
                
        except Exception as e:
            print(f"\n❌ Repair failed: {e}")
            logger.exception("Repair failed")
            
        finally:
            if self.conn:
                self.conn.close()
                

def main():
    """Run the repair tool."""
    if len(sys.argv) < 2:
        print("Usage: python repair_sqlite_vec_embeddings.py <database_path>")
        print("\nExample:")
        print("  python repair_sqlite_vec_embeddings.py ~/.mcp_memory/sqlite_vec.db")
        print("\nThis tool will:")
        print("  - Check for missing embeddings")
        print("  - Generate embeddings for memories that don't have them")
        print("  - Verify semantic search functionality")
        print("\nFor more complex issues (dimension mismatches, schema problems),")
        print("use migrate_sqlite_vec_embeddings.py instead.")
        sys.exit(1)
        
    db_path = sys.argv[1]
    
    repair = SqliteVecRepair(db_path)
    repair.run_repair()
    

if __name__ == "__main__":
    main()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/doobidoo/mcp-memory-service'

If you have feedback or need assistance with the MCP directory API, please join our Discord server