/**
* Resource tests for memory usage
* Tests peak heap usage and memory bounds
*/
import { describe, it, expect, beforeEach, vi } from 'vitest'
import {
createEmbeddingMock,
BATCH_SIZE,
EMBEDDING_DIM
} from '../helpers/indexing-mocks.js'
import {
generateTestEmails,
generateTestMessages,
generateCalendarEvents
} from '../helpers/test-data-generators.js'
import {
measureMemory,
PerformanceTracker,
assertMemory
} from '../helpers/performance-utils.js'
describe('Memory Usage', () => {
let mockEmbedder
beforeEach(() => {
vi.clearAllMocks()
const embedding = createEmbeddingMock()
mockEmbedder = embedding.mockEmbedder
})
describe('peak heap usage', () => {
it('should stay under 300MB during email indexing', async () => {
const count = 1000
const emails = generateTestEmails(count)
const texts = emails.map(e => e.content)
const tracker = new PerformanceTracker('Email Indexing Memory').start()
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
// Sample every 5 batches
if (i % (BATCH_SIZE * 5) === 0) {
tracker.sample(`batch ${Math.floor(i / BATCH_SIZE)}`)
}
}
tracker.stop()
const peakHeap = tracker.getPeakMemory()
console.log(`Peak heap: ${peakHeap.toFixed(2)}MB`)
// Peak heap should be under 300MB
expect(peakHeap).toBeLessThan(300)
})
it('should stay under 300MB during message indexing', async () => {
const count = 2000
const messages = generateTestMessages(count)
const texts = messages.map(m => m.text)
const tracker = new PerformanceTracker('Message Indexing Memory').start()
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
if (i % (BATCH_SIZE * 5) === 0) {
tracker.sample(`batch ${Math.floor(i / BATCH_SIZE)}`)
}
}
tracker.stop()
const peakHeap = tracker.getPeakMemory()
console.log(`Peak heap: ${peakHeap.toFixed(2)}MB`)
expect(peakHeap).toBeLessThan(300)
})
it('should stay under 300MB during combined indexing', async () => {
const emailCount = 500
const messageCount = 1000
const eventCount = 300
const emails = generateTestEmails(emailCount)
const messages = generateTestMessages(messageCount)
const events = generateCalendarEvents(eventCount)
const tracker = new PerformanceTracker('Combined Indexing Memory').start()
// Index emails
const emailTexts = emails.map(e => e.content)
for (let i = 0; i < emailTexts.length; i += BATCH_SIZE) {
const batch = emailTexts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
tracker.sample('emails done')
// Index messages
const messageTexts = messages.map(m => m.text)
for (let i = 0; i < messageTexts.length; i += BATCH_SIZE) {
const batch = messageTexts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
tracker.sample('messages done')
// Index events
const eventTexts = events.map(e => `${e.title} ${e.location}`)
for (let i = 0; i < eventTexts.length; i += BATCH_SIZE) {
const batch = eventTexts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
tracker.sample('events done')
tracker.stop()
const peakHeap = tracker.getPeakMemory()
console.log(`Peak heap: ${peakHeap.toFixed(2)}MB`)
expect(peakHeap).toBeLessThan(300)
})
})
describe('memory growth per batch', () => {
it('should not grow memory significantly per batch', async () => {
const count = 640 // 20 batches
const texts = generateTestEmails(count).map(e => e.content)
const memorySnapshots = []
const initialMemory = measureMemory()
memorySnapshots.push(initialMemory.heapUsed)
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
const currentMemory = measureMemory()
memorySnapshots.push(currentMemory.heapUsed)
}
// Calculate growth between first and last
const totalGrowth = memorySnapshots[memorySnapshots.length - 1] - memorySnapshots[0]
const batchCount = Math.ceil(count / BATCH_SIZE)
const growthPerBatch = totalGrowth / batchCount
console.log(`Total memory growth: ${totalGrowth.toFixed(2)}MB`)
console.log(`Growth per batch: ${growthPerBatch.toFixed(4)}MB`)
// Should not grow more than 50MB total for mocked operations
expect(totalGrowth).toBeLessThan(50)
})
it('should limit heap growth to < 50MB during full indexing', async () => {
const count = 500
const texts = generateTestEmails(count).map(e => e.content)
const tracker = new PerformanceTracker('Heap Growth').start()
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
tracker.sample(`batch ${Math.floor(i / BATCH_SIZE)}`)
}
tracker.stop()
const heapGrowth = tracker.getMemoryGrowth()
console.log(`Heap growth: ${heapGrowth.toFixed(2)}MB`)
expect(heapGrowth).toBeLessThan(50)
})
})
describe('embedding vector memory', () => {
it('should calculate correct memory for embedding vectors', () => {
const count = 1000
// Each vector is 384 floats * 4 bytes = 1536 bytes
const bytesPerVector = EMBEDDING_DIM * 4
const totalVectorMemory = (count * bytesPerVector) / (1024 * 1024) // MB
console.log(`Expected vector memory for ${count} items: ${totalVectorMemory.toFixed(2)}MB`)
// 1000 vectors * 384 dims * 4 bytes = ~1.46MB
expect(totalVectorMemory).toBeLessThan(2)
})
it('should verify EMBEDDING_DIM = 384', () => {
expect(EMBEDDING_DIM).toBe(384)
})
})
describe('memory bounds assertions', () => {
it('should throw when memory exceeds threshold', () => {
expect(() => assertMemory(350, 300, 'test')).toThrow('Memory exceeded')
})
it('should pass when memory is within threshold', () => {
expect(() => assertMemory(250, 300, 'test')).not.toThrow()
})
it('should pass when memory equals threshold', () => {
expect(() => assertMemory(300, 300, 'test')).not.toThrow()
})
})
describe('batch disposal', () => {
it('should not retain batch data after processing', async () => {
const count = 320
const texts = generateTestEmails(count).map(e => e.content)
let maxInFlightBatches = 0
let currentBatches = []
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
currentBatches.push(batch)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
maxInFlightBatches = Math.max(maxInFlightBatches, currentBatches.length)
// Simulate disposal
currentBatches = []
}
// Should only ever have 1 batch in flight at a time
expect(maxInFlightBatches).toBe(1)
})
})
describe('large text handling', () => {
it('should handle large email bodies without excessive memory', async () => {
const count = 50
// Generate emails with large bodies (10KB each)
const emails = generateTestEmails(count, { bodySize: 10000 })
const texts = emails.map(e => e.content)
const beforeMemory = measureMemory()
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
await mockEmbedder(batch, { pooling: 'mean', normalize: true })
}
const afterMemory = measureMemory()
const growth = afterMemory.heapUsed - beforeMemory.heapUsed
console.log(`Memory growth for large texts: ${growth.toFixed(2)}MB`)
// Even with large texts, growth should be reasonable
// Large bodies (10KB each * 50 = 500KB input) can cause GC pressure
expect(growth).toBeLessThan(200)
})
})
})