/**
* Resource tests for memory leak detection
* Tests long-running operation stability
*/
import { describe, it, expect, beforeEach, vi } from 'vitest'
import {
createEmbeddingMock,
BATCH_SIZE,
EMBEDDING_DIM
} from '../helpers/indexing-mocks.js'
import { generateTestEmails } from '../helpers/test-data-generators.js'
import {
measureMemory,
PerformanceTracker,
forceGC
} from '../helpers/performance-utils.js'
describe('Memory Leak Detection', () => {
let mockEmbedder
beforeEach(() => {
vi.clearAllMocks()
const embedding = createEmbeddingMock()
mockEmbedder = embedding.mockEmbedder
})
describe('repeated indexing cycles', () => {
it('should not leak memory over multiple index cycles', async () => {
const cycleCount = 5
const itemsPerCycle = 200
const memoryPerCycle = []
forceGC() // Try to GC before test
const baselineMemory = measureMemory().heapUsed
for (let cycle = 0; cycle < cycleCount; cycle++) {
const texts = generateTestEmails(itemsPerCycle).map(e => e.content)
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
forceGC() // Try to GC after each cycle
memoryPerCycle.push(measureMemory().heapUsed)
}
console.log('Memory per cycle:', memoryPerCycle.map(m => m.toFixed(2) + 'MB'))
// Check that memory doesn't grow significantly between cycles
// Allow first cycle to be higher (warmup)
const laterCycles = memoryPerCycle.slice(1)
const avgLaterMemory = laterCycles.reduce((a, b) => a + b, 0) / laterCycles.length
const maxLaterMemory = Math.max(...laterCycles)
// Max should not exceed average by more than 50%
expect(maxLaterMemory).toBeLessThan(avgLaterMemory * 1.5)
})
it('should stabilize memory after initial cycles', async () => {
const warmupCycles = 2
const testCycles = 5
const itemsPerCycle = 100
// Warmup
for (let i = 0; i < warmupCycles; i++) {
const texts = generateTestEmails(itemsPerCycle).map(e => e.content)
for (let j = 0; j < texts.length; j += BATCH_SIZE) {
const batch = texts.slice(j, j + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
}
forceGC()
const postWarmupMemory = measureMemory().heapUsed
// Test cycles
const testMemory = []
for (let i = 0; i < testCycles; i++) {
const texts = generateTestEmails(itemsPerCycle).map(e => e.content)
for (let j = 0; j < texts.length; j += BATCH_SIZE) {
const batch = texts.slice(j, j + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
forceGC()
testMemory.push(measureMemory().heapUsed)
}
const finalMemory = testMemory[testMemory.length - 1]
const memoryGrowth = finalMemory - postWarmupMemory
console.log(`Post-warmup: ${postWarmupMemory.toFixed(2)}MB`)
console.log(`Final: ${finalMemory.toFixed(2)}MB`)
console.log(`Growth: ${memoryGrowth.toFixed(2)}MB`)
// Memory should not grow significantly after warmup
expect(memoryGrowth).toBeLessThan(20)
})
})
describe('vector accumulation', () => {
it('should not accumulate vectors in memory', async () => {
const iterations = 10
const itemsPerIteration = 100
const tracker = new PerformanceTracker('Vector Accumulation').start()
for (let iter = 0; iter < iterations; iter++) {
const texts = generateTestEmails(itemsPerIteration).map(e => e.content)
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
const result = await mockEmbedder(batch, { pooling: 'mean', normalize: true })
// Vectors should be used and discarded, not accumulated
const _vectors = Array.from(result.data)
}
tracker.sample(`iteration ${iter}`)
}
tracker.stop()
const growth = tracker.getMemoryGrowth()
console.log(`Memory growth over ${iterations} iterations: ${growth.toFixed(2)}MB`)
// Growth should be minimal (vectors not accumulating)
expect(growth).toBeLessThan(30)
})
})
describe('closure leaks', () => {
it('should not leak through closures', async () => {
const iterations = 10
const callbacks = []
forceGC()
const startMemory = measureMemory().heapUsed
for (let i = 0; i < iterations; i++) {
const largeData = generateTestEmails(100)
// Create a closure that references large data
const callback = async () => {
const texts = largeData.map(e => e.content)
for (let j = 0; j < texts.length; j += BATCH_SIZE) {
const batch = texts.slice(j, j + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
}
// Execute and discard
await callback()
// Don't keep reference to callback
}
forceGC()
const endMemory = measureMemory().heapUsed
const growth = endMemory - startMemory
console.log(`Memory growth: ${growth.toFixed(2)}MB`)
// Closures should be garbage collected
expect(growth).toBeLessThan(20)
})
})
describe('event listener cleanup', () => {
it('should simulate proper listener cleanup', async () => {
const iterations = 5
const listeners = new Map()
for (let i = 0; i < iterations; i++) {
// Add listener
const listener = vi.fn()
listeners.set(`listener-${i}`, listener)
// Process some data
const texts = generateTestEmails(50).map(e => e.content)
for (let j = 0; j < texts.length; j += BATCH_SIZE) {
const batch = texts.slice(j, j + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
// Remove listener (simulate cleanup)
listeners.delete(`listener-${i}`)
}
// All listeners should be cleaned up
expect(listeners.size).toBe(0)
})
})
describe('buffer reuse', () => {
it('should demonstrate buffer reuse pattern', async () => {
const iterations = 5
const itemsPerIteration = BATCH_SIZE
// Pre-allocate buffer (reusable)
const reuseableBuffer = new Float32Array(BATCH_SIZE * EMBEDDING_DIM)
for (let i = 0; i < iterations; i++) {
const texts = generateTestEmails(itemsPerIteration).map(e => e.content)
const result = await mockEmbedder(texts, { pooling: 'mean', normalize: true })
// Copy to reusable buffer instead of creating new arrays
reuseableBuffer.set(result.data.slice(0, reuseableBuffer.length))
}
// Buffer should still be valid
expect(reuseableBuffer.length).toBe(BATCH_SIZE * EMBEDDING_DIM)
})
})
describe('string interning', () => {
it('should not duplicate identical strings', async () => {
// Generate emails with duplicate subjects
const uniqueSubjects = ['Meeting', 'Update', 'Review', 'Sync', 'Call']
const count = 100
const emails = []
for (let i = 0; i < count; i++) {
emails.push({
subject: uniqueSubjects[i % uniqueSubjects.length],
content: `Email ${i}`
})
}
// Process them
const texts = emails.map(e => `${e.subject}: ${e.content}`)
forceGC()
const startMemory = measureMemory().heapUsed
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
forceGC()
const endMemory = measureMemory().heapUsed
const growth = endMemory - startMemory
// Should not grow much despite processing 100 items
expect(growth).toBeLessThan(10)
})
})
describe('long-running simulation', () => {
it('should maintain stable memory over extended processing', async () => {
const totalItems = 1000
const batchCount = Math.ceil(totalItems / BATCH_SIZE)
const memoryAtQuartiles = []
const emails = generateTestEmails(totalItems)
const texts = emails.map(e => e.content)
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
const progress = i / texts.length
// Sample at 0%, 25%, 50%, 75%, 100%
if (progress === 0 || Math.abs(progress - 0.25) < 0.05 ||
Math.abs(progress - 0.5) < 0.05 || Math.abs(progress - 0.75) < 0.05) {
memoryAtQuartiles.push({
progress: Math.round(progress * 100),
memory: measureMemory().heapUsed
})
}
}
// Final measurement
memoryAtQuartiles.push({
progress: 100,
memory: measureMemory().heapUsed
})
console.table(memoryAtQuartiles)
// Memory at 100% should not be more than 2x memory at 25%
const earlyMemory = memoryAtQuartiles[1]?.memory || memoryAtQuartiles[0].memory
const finalMemory = memoryAtQuartiles[memoryAtQuartiles.length - 1].memory
expect(finalMemory).toBeLessThan(earlyMemory * 2)
})
})
describe('WeakRef simulation', () => {
it('should demonstrate weak reference pattern', async () => {
// Simulate weak reference pattern for cache
const weakCache = new Map()
let gcCount = 0
for (let i = 0; i < 5; i++) {
const texts = generateTestEmails(50).map(e => e.content)
// Store with "weak" semantics (simulated)
const cacheKey = `batch-${i}`
weakCache.set(cacheKey, {
texts,
timestamp: Date.now()
})
// Process
for (let j = 0; j < texts.length; j += BATCH_SIZE) {
const batch = texts.slice(j, j + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
// Simulate cache cleanup (every 2 iterations)
if (i % 2 === 1) {
// Clear old entries
const cutoff = Date.now() - 1000
for (const [key, value] of weakCache) {
if (value.timestamp < cutoff) {
weakCache.delete(key)
gcCount++
}
}
}
}
// Cache should have been cleaned
expect(weakCache.size).toBeLessThanOrEqual(5)
})
})
})