M.I.M.I.R - Multi-agent Intelligent Memory & Insight Repository

Overview Schema Related Servers Score Discussions

Mimir
scripts

check-and-reset-embeddings.js•23.6 KiB

#!/usr/bin/env node /** * Check and Reset Embeddings * * This script: * 1. Checks Neo4j vector index configuration * 2. Checks existing embeddings and their dimensions * 3. Compares with currently configured dimensions from environment * 4. Offers to reset the embedding space if mismatches are found * * Usage (via npm): * npm run embeddings:check # Check only * npm run embeddings:reset # Reset and regenerate embeddings * npm run embeddings:force-reset # Force reset without asking * npm run embeddings:clear # Clear embeddings without regenerating * npm run embeddings:generate-missing # Generate embeddings only for items without them * * Usage (direct): * node scripts/check-and-reset-embeddings.js # Check only * node scripts/check-and-reset-embeddings.js --reset # Reset and regenerate embeddings * node scripts/check-and-reset-embeddings.js --force # Force reset without asking * node scripts/check-and-reset-embeddings.js --reset --clear-only # Clear embeddings without regenerating * node scripts/check-and-reset-embeddings.js --generate-missing # Generate embeddings only for items without them */ import neo4j from 'neo4j-driver'; import readline from 'readline'; import { EmbeddingsService } from '../build/indexing/EmbeddingsService.js'; // Configuration from environment or defaults const NEO4J_URI = process.env.NEO4J_URI || 'bolt://localhost:7687'; const NEO4J_USER = process.env.NEO4J_USER || 'neo4j'; const NEO4J_PASSWORD = process.env.NEO4J_PASSWORD || 'password'; const CONFIGURED_DIMENSIONS = parseInt(process.env.MIMIR_EMBEDDINGS_DIMENSIONS || '1024', 10); const CONFIGURED_MODEL = process.env.MIMIR_EMBEDDINGS_MODEL || 'mxbai-embed-large'; // Ensure embeddings are enabled for regeneration process.env.MIMIR_EMBEDDINGS_ENABLED = 'true'; process.env.MIMIR_FEATURE_VECTOR_EMBEDDINGS = 'true'; process.env.MIMIR_EMBEDDINGS_PROVIDER = process.env.MIMIR_EMBEDDINGS_PROVIDER || 'llama.cpp'; process.env.MIMIR_EMBEDDINGS_MODEL = CONFIGURED_MODEL; process.env.MIMIR_EMBEDDINGS_DIMENSIONS = CONFIGURED_DIMENSIONS.toString(); process.env.OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL || 'http://localhost:11434/v1'; // Parse command line arguments const args = process.argv.slice(2); const shouldReset = args.includes('--reset') || args.includes('--force'); const forceReset = args.includes('--force'); const clearOnly = args.includes('--clear-only'); const generateMissing = args.includes('--generate-missing'); /** * Prompt user for confirmation */ function askQuestion(query) { const rl = readline.createInterface({ input: process.stdin, output: process.stdout, }); return new Promise(resolve => rl.question(query, ans => { rl.close(); resolve(ans); })); } /** * Check vector index configuration */ async function checkVectorIndex(session) { console.log('\n🔍 Checking vector index configuration...'); try { const result = await session.run(` SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options WHERE name = 'node_embedding_index' RETURN name, type, options `); if (result.records.length === 0) { console.log('⚠️ No vector index found (node_embedding_index does not exist)'); return { exists: false, dimensions: null }; } const record = result.records[0]; const options = record.get('options'); const indexConfig = options?.indexConfig || {}; const dimensions = indexConfig['vector.dimensions']; console.log(`✅ Vector index exists: node_embedding_index`); console.log(` Type: ${record.get('type')}`); console.log(` Dimensions: ${dimensions}`); console.log(` Similarity: ${indexConfig['vector.similarity_function'] || 'cosine'}`); return { exists: true, dimensions: parseInt(dimensions, 10) }; } catch (error) { console.error('❌ Error checking vector index:', error.message); return { exists: false, dimensions: null, error: error.message }; } } /** * Check existing embeddings in database */ async function checkExistingEmbeddings(session) { console.log('\n🔍 Checking existing embeddings...'); try { // Count nodes with embeddings (any label) const countResult = await session.run(` MATCH (n) WHERE n.embedding IS NOT NULL RETURN count(n) as count `); const totalCountValue = countResult.records[0].get('count'); const totalCount = typeof totalCountValue === 'object' && totalCountValue.toNumber ? totalCountValue.toNumber() : totalCountValue; if (totalCount === 0) { console.log('ℹ️ No embeddings found in database'); return { count: 0, models: [], dimensions: [] }; } // Get embedding statistics (any label) const statsResult = await session.run(` MATCH (n) WHERE n.embedding IS NOT NULL RETURN n.embedding_model as model, n.embedding_dimensions as dimensions, size(n.embedding) as actualDims, count(n) as count ORDER BY count DESC `); console.log(`✅ Found ${totalCount} nodes with embeddings:`); const models = []; const dimensions = []; statsResult.records.forEach(record => { const model = record.get('model') || 'unknown'; const dimsValue = record.get('dimensions'); const actualDimsValue = record.get('actualDims'); // Prefer stored dimensions, fall back to actual size of embedding array const dims = dimsValue ? (typeof dimsValue === 'object' && dimsValue.toNumber ? dimsValue.toNumber() : dimsValue) : (actualDimsValue ? (typeof actualDimsValue === 'object' && actualDimsValue.toNumber ? actualDimsValue.toNumber() : actualDimsValue) : null); const countValue = record.get('count'); const count = typeof countValue === 'object' && countValue.toNumber ? countValue.toNumber() : countValue; models.push({ model, dimensions: dims, count }); if (dims) dimensions.push(dims); console.log(` ${model} (${dims || '?'} dims): ${count} nodes`); }); return { count: totalCount, models, dimensions }; } catch (error) { console.error('❌ Error checking embeddings:', error.message); return { count: 0, models: [], dimensions: [], error: error.message }; } } /** * Check for chunk embeddings */ async function checkChunkEmbeddings(session) { console.log('\n🔍 Checking chunk embeddings...'); try { const result = await session.run(` MATCH (n)-[:HAS_CHUNK]->(c:FileChunk) WHERE c.embedding IS NOT NULL RETURN count(c) as count, c.embedding_dimensions as dimensions, size(c.embedding) as actualDims LIMIT 1 `); if (result.records.length === 0) { console.log('ℹ️ No chunk embeddings found'); return { count: 0, dimensions: null }; } const countValue = result.records[0].get('count'); const count = typeof countValue === 'object' && countValue.toNumber ? countValue.toNumber() : countValue; const dimsValue = result.records[0].get('dimensions'); const dimensions = dimsValue ? (typeof dimsValue === 'object' && dimsValue.toNumber ? dimsValue.toNumber() : dimsValue) : null; console.log(`✅ Found ${count} chunks with embeddings (${dimensions || '?'} dims)`); return { count, dimensions }; } catch (error) { console.error('❌ Error checking chunk embeddings:', error.message); return { count: 0, dimensions: null, error: error.message }; } } /** * Analyze mismatches and report */ function analyzeMismatches(indexInfo, embeddingsInfo, chunksInfo) { console.log('\n📊 Analysis:'); console.log(` Configured dimensions: ${CONFIGURED_DIMENSIONS}`); console.log(` Configured model: ${CONFIGURED_MODEL}`); const mismatches = []; // Check index dimensions if (indexInfo.exists && indexInfo.dimensions !== CONFIGURED_DIMENSIONS) { console.log(` ⚠️ Index dimensions (${indexInfo.dimensions}) != configured (${CONFIGURED_DIMENSIONS})`); mismatches.push('index'); } else if (indexInfo.exists) { console.log(` ✅ Index dimensions match configured`); } // Check embedding dimensions const uniqueDims = [...new Set(embeddingsInfo.dimensions)]; if (uniqueDims.length > 0) { const allMatch = uniqueDims.every(d => d === CONFIGURED_DIMENSIONS); if (!allMatch) { console.log(` ⚠️ Embedding dimensions ${JSON.stringify(uniqueDims)} != configured (${CONFIGURED_DIMENSIONS})`); mismatches.push('embeddings'); } else { console.log(` ✅ All embedding dimensions match configured`); } } // Check chunk dimensions if (chunksInfo.count > 0 && chunksInfo.dimensions !== CONFIGURED_DIMENSIONS) { console.log(` ⚠️ Chunk dimensions (${chunksInfo.dimensions}) != configured (${CONFIGURED_DIMENSIONS})`); mismatches.push('chunks'); } else if (chunksInfo.count > 0) { console.log(` ✅ Chunk dimensions match configured`); } return mismatches; } /** * Find nodes with mismatched embeddings */ async function findMismatchedNodes(session) { console.log('\n🔍 Finding nodes with mismatched embeddings...'); try { const result = await session.run(` MATCH (n) WHERE n.embedding IS NOT NULL AND (n.embedding_dimensions IS NULL OR n.embedding_dimensions <> $configuredDims OR n.embedding_model IS NULL OR n.embedding_model <> $configuredModel OR size(n.embedding) <> $configuredDims) RETURN n.id as id, n.type as type, n.title as title, n.content as content, n.text as text, n.embedding_dimensions as currentDims, n.embedding_model as currentModel, size(n.embedding) as actualDims ORDER BY n.type, n.id `, { configuredDims: CONFIGURED_DIMENSIONS, configuredModel: CONFIGURED_MODEL }); const nodes = result.records.map(r => { const dimsValue = r.get('currentDims'); const dims = dimsValue ? (typeof dimsValue === 'object' && dimsValue.toNumber ? dimsValue.toNumber() : dimsValue) : null; return { id: r.get('id'), type: r.get('type'), title: r.get('title'), content: r.get('content'), text: r.get('text'), currentDims: dims, currentModel: r.get('currentModel') }; }); if (nodes.length === 0) { console.log(' ✅ No mismatched nodes found'); } else { console.log(` ⚠️ Found ${nodes.length} nodes with mismatched embeddings`); } return nodes; } catch (error) { console.error('❌ Error finding mismatched nodes:', error.message); throw error; } } /** * Find nodes without embeddings */ async function findNodesWithoutEmbeddings(session) { console.log('\n🔍 Finding nodes and chunks without embeddings...'); // Find both regular nodes and FileChunks without embeddings const result = await session.run(` MATCH (n) WHERE (n.type IS NOT NULL OR n:FileChunk) AND (n.embedding IS NULL OR size(n.embedding) = 0) AND (n.content IS NOT NULL OR n.text IS NOT NULL) RETURN n.id as id, labels(n)[0] as type, coalesce(n.content, n.text) as content LIMIT 10000 `); const nodes = result.records.map(record => ({ id: record.get('id'), type: record.get('type'), content: record.get('content') })); console.log(` Found ${nodes.length} nodes without embeddings`); return nodes; } /** * Generate embeddings for nodes that don't have them */ async function generateMissingEmbeddings(session) { console.log('\n🔄 Generating missing embeddings...'); // Initialize embeddings service const embeddingsService = new EmbeddingsService(); await embeddingsService.initialize(); if (!embeddingsService.isEnabled()) { console.error('❌ Embeddings service is not enabled. Check your configuration.'); throw new Error('Embeddings service not enabled'); } // Find nodes without embeddings const nodes = await findNodesWithoutEmbeddings(session); if (nodes.length === 0) { console.log(' ✅ All nodes already have embeddings'); return; } console.log(` Model: ${CONFIGURED_MODEL}`); console.log(` Dimensions: ${CONFIGURED_DIMENSIONS}`); console.log(` Processing ${nodes.length} nodes...`); let processed = 0; let errors = 0; for (const node of nodes) { try { if (!node.content) continue; // Generate embedding const embeddingResult = await embeddingsService.generateEmbedding(node.content); // Extract just the embedding array if it's wrapped in an object const embedding = Array.isArray(embeddingResult) ? embeddingResult : (embeddingResult.embedding || embeddingResult); // Verify it's an array of numbers if (!Array.isArray(embedding) || embedding.length === 0) { throw new Error(`Invalid embedding format: expected array, got ${typeof embedding}`); } // Update node with embedding await session.run( `MATCH (n {id: $id}) SET n.embedding = $embedding`, { id: node.id, embedding } ); processed++; if (processed % 10 === 0) { console.log(` Progress: ${processed}/${nodes.length}`); } } catch (error) { errors++; console.error(` ⚠️ Failed for node ${node.id} (${node.type}): ${error.message}`); } } console.log(`\n✅ Generated embeddings for ${processed} nodes (${errors} errors)`); // Verify embeddings were stored console.log('\n🔍 Verifying embeddings in database...'); const verifyResult = await session.run(` MATCH (n) WHERE n.embedding IS NOT NULL RETURN count(n) as total, avg(size(n.embedding)) as avgDimensions, min(size(n.embedding)) as minDimensions, max(size(n.embedding)) as maxDimensions `); const verify = verifyResult.records[0]; console.log(` Total nodes with embeddings: ${verify.get('total')}`); console.log(` Embedding dimensions: min=${verify.get('minDimensions')}, max=${verify.get('maxDimensions')}, avg=${Math.round(verify.get('avgDimensions'))}`); if (errors > 0) { console.log(`\n⚠️ ${errors} nodes failed to generate embeddings`); } } /** * Regenerate embeddings for mismatched nodes */ async function regenerateEmbeddings(session, nodes) { console.log('\n🔄 Regenerating embeddings...'); // Initialize embeddings service const embeddingsService = new EmbeddingsService(); await embeddingsService.initialize(); if (!embeddingsService.isEnabled()) { console.error('❌ Embeddings service is not enabled. Check your configuration.'); throw new Error('Embeddings service not enabled'); } console.log(` Model: ${CONFIGURED_MODEL}`); console.log(` Dimensions: ${CONFIGURED_DIMENSIONS}`); console.log(` Processing ${nodes.length} nodes...\n`); let successCount = 0; let errorCount = 0; for (let i = 0; i < nodes.length; i++) { const node = nodes[i]; const progress = `[${i + 1}/${nodes.length}]`; console.log(`${progress} Processing: ${node.type} - ${node.id}`); if (node.title) { console.log(` Title: ${node.title.substring(0, 60)}${node.title.length > 60 ? '...' : ''}`); } if (node.currentModel) { console.log(` Current: ${node.currentModel} (${node.currentDims || '?'} dims)`); } // Get text content for embedding const textContent = node.content || node.text || node.title || ''; if (!textContent) { console.log(' ⚠️ No text content found, skipping'); errorCount++; continue; } try { // Generate new embedding const embeddingResult = await embeddingsService.generateEmbedding(textContent); // Extract just the embedding array if it's wrapped in an object const embedding = Array.isArray(embeddingResult) ? embeddingResult : (embeddingResult.embedding || embeddingResult); // Verify it's an array of numbers if (!Array.isArray(embedding) || embedding.length === 0) { throw new Error(`Invalid embedding format: expected array, got ${typeof embedding}`); } // Update node with new embedding await session.run(` MATCH (n:Node {id: $id}) SET n.embedding = $embedding, n.embedding_model = $model, n.embedding_dimensions = $dimensions, n.has_embedding = true REMOVE n.needs_embedding `, { id: node.id, embedding: embedding, model: embeddingResult.model || CONFIGURED_MODEL, dimensions: embedding.length }); console.log(` ✅ Updated (${embeddingResult.dimensions} dims)`); successCount++; // Small delay to avoid overwhelming the service if (i < nodes.length - 1) { await new Promise(resolve => setTimeout(resolve, 100)); } } catch (error) { console.error(` ❌ Failed: ${error.message}`); errorCount++; } } console.log(`\n✅ Regeneration complete!`); console.log(` Success: ${successCount} nodes`); if (errorCount > 0) { console.log(` Errors: ${errorCount} nodes`); } return { successCount, errorCount }; } /** * Reset embedding space (full reset) */ async function resetEmbeddingSpace(session, regenerate = true) { console.log('\n🔄 Resetting embedding space...'); try { // Step 1: Drop vector index console.log('\n1️⃣ Dropping vector index...'); await session.run('DROP INDEX node_embedding_index IF EXISTS'); console.log(' ✅ Vector index dropped'); // Step 2: Create new vector index with configured dimensions console.log(`\n2️⃣ Creating vector index with ${CONFIGURED_DIMENSIONS} dimensions...`); await session.run(` CREATE VECTOR INDEX node_embedding_index IF NOT EXISTS FOR (n:Node) ON (n.embedding) OPTIONS {indexConfig: { \`vector.dimensions\`: ${CONFIGURED_DIMENSIONS}, \`vector.similarity_function\`: 'cosine' }} `); console.log(' ✅ Vector index created'); if (!regenerate) { // Step 3a: Clear embeddings from nodes (if not regenerating) console.log('\n3️⃣ Clearing embeddings from nodes (any label)...'); const nodeResult = await session.run(` MATCH (n) WHERE n.embedding IS NOT NULL REMOVE n.embedding, n.embedding_model, n.embedding_dimensions, n.has_embedding RETURN count(n) as count `); const nodeCountValue = nodeResult.records[0].get('count'); const nodeCount = typeof nodeCountValue === 'object' && nodeCountValue.toNumber ? nodeCountValue.toNumber() : nodeCountValue; console.log(` ✅ Cleared embeddings from ${nodeCount} nodes`); // Step 4: Clear embeddings from chunks console.log('\n4️⃣ Clearing embeddings from FileChunks...'); const chunkResult = await session.run(` MATCH (c:FileChunk) WHERE c.embedding IS NOT NULL REMOVE c.embedding, c.embedding_model, c.embedding_dimensions RETURN count(c) as count `); const chunkCountValue = chunkResult.records[0].get('count'); const chunkCount = typeof chunkCountValue === 'object' && chunkCountValue.toNumber ? chunkCountValue.toNumber() : chunkCountValue; console.log(` ✅ Cleared embeddings from ${chunkCount} chunks`); console.log('\n✅ Embedding space reset complete!'); console.log('\n📝 Next steps:'); console.log(' Re-index your files to generate new embeddings:'); console.log(' npm run index-docs'); } else { // Step 3b: Find and regenerate mismatched embeddings const nodes = await findMismatchedNodes(session); if (nodes.length > 0) { await regenerateEmbeddings(session, nodes); } console.log('\n✅ Embedding space reset complete!'); } } catch (error) { console.error('\n❌ Error resetting embedding space:', error.message); throw error; } } /** * Main function */ async function main() { console.log('🔧 Mimir Embedding Space Checker\n'); console.log('Configuration:'); console.log(` Neo4j URI: ${NEO4J_URI}`); console.log(` Dimensions: ${CONFIGURED_DIMENSIONS}`); console.log(` Model: ${CONFIGURED_MODEL}`); const driver = neo4j.driver( NEO4J_URI, neo4j.auth.basic(NEO4J_USER, NEO4J_PASSWORD) ); const session = driver.session(); try { // Handle generate-missing mode separately if (generateMissing) { console.log('\n📝 Generate Missing Mode: Will only generate embeddings for items without them\n'); await generateMissingEmbeddings(session); console.log('\n✅ Done!'); return; } // Check current state const indexInfo = await checkVectorIndex(session); const embeddingsInfo = await checkExistingEmbeddings(session); const chunksInfo = await checkChunkEmbeddings(session); // Analyze mismatches const mismatches = analyzeMismatches(indexInfo, embeddingsInfo, chunksInfo); // If force or clear-only is set, proceed regardless of mismatches if (!forceReset && !shouldReset && mismatches.length === 0) { console.log('\n✅ All embeddings match configured dimensions. No action needed.'); return; } // Report mismatches (if any) if (mismatches.length > 0) { console.log('\n⚠️ Mismatches detected:'); for (const m of mismatches) { console.log(` - ${m}`); } } else if (forceReset || shouldReset) { console.log('\n✅ No mismatches detected, but proceeding with reset as requested...'); } // Decide whether to reset let doReset = false; if (forceReset) { console.log('\n🔨 Force reset enabled. Proceeding without confirmation...'); doReset = true; } else if (shouldReset) { if (clearOnly) { console.log('\n⚠️ WARNING: This will:'); console.log(' 1. Drop the existing vector index'); console.log(' 2. Clear all embeddings from nodes and chunks'); console.log(' 3. Create a new vector index with configured dimensions'); console.log(' 4. You will need to re-index files to generate new embeddings'); } else { console.log('\n⚠️ WARNING: This will:'); console.log(' 1. Drop the existing vector index'); console.log(' 2. Create a new vector index with configured dimensions'); console.log(' 3. Find all nodes with mismatched embeddings'); console.log(' 4. Regenerate embeddings for those nodes (this may take a while)'); } const answer = await askQuestion('\nProceed with reset? (yes/no): '); doReset = answer.toLowerCase() === 'yes' || answer.toLowerCase() === 'y'; } else { console.log('\n💡 Tip: Run with --reset to fix mismatches automatically'); console.log(' npm run embeddings:reset'); console.log(' or: node scripts/check-and-reset-embeddings.js --reset'); return; } if (doReset) { await resetEmbeddingSpace(session, !clearOnly); } else { console.log('\n❌ Reset cancelled by user.'); } } catch (error) { console.error('\n❌ Fatal error:', error.message); throw error; } finally { await session.close(); await driver.close(); } } // Run the script main() .then(() => { console.log('\n✅ Done!'); process.exit(0); }) .catch((error) => { console.error('\n❌ Failed:', error); process.exit(1); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/orneryd/Mimir'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

check-and-reset-embeddings.js•23.6 KiB