Skip to main content
Glama
scaling.test.js10.9 kB
/** * Performance tests for scaling behavior * Tests linear O(n) complexity and throughput at varying sizes */ import { describe, it, expect, beforeEach, vi } from 'vitest' import { createEmbeddingMock, createLanceDBMock, BATCH_SIZE, EMBEDDING_DIM } from '../helpers/indexing-mocks.js' import { generateTestEmails, generateTestMessages, generateCalendarEvents } from '../helpers/test-data-generators.js' import { measureTime, calculateThroughput, ThroughputTracker } from '../helpers/performance-utils.js' describe('Scaling Behavior', () => { let mockEmbedder let mockDb beforeEach(() => { vi.clearAllMocks() const embedding = createEmbeddingMock() mockEmbedder = embedding.mockEmbedder mockDb = createLanceDBMock() }) describe('linear O(n) time complexity', () => { it('should scale linearly with email count', async () => { const sizes = [100, 200, 400] const results = [] for (const size of sizes) { const emails = generateTestEmails(size) const texts = emails.map(e => e.content) const { duration } = await measureTime(async () => { // Simulate batched embedding for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) } }) results.push({ size, duration, timePerItem: duration / size }) } console.table(results) // Time per item should be relatively constant (within 3x tolerance) const avgTimePerItem = results.reduce((s, r) => s + r.timePerItem, 0) / results.length for (const result of results) { expect(result.timePerItem).toBeLessThan(avgTimePerItem * 3) } }) it('should scale linearly with message count', async () => { const sizes = [200, 400, 800] const results = [] for (const size of sizes) { const messages = generateTestMessages(size) const texts = messages.map(m => m.text) const { duration } = await measureTime(async () => { for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) } }) results.push({ size, duration, timePerItem: duration / size }) } console.table(results) // Verify linear scaling const avgTimePerItem = results.reduce((s, r) => s + r.timePerItem, 0) / results.length for (const result of results) { expect(result.timePerItem).toBeLessThan(avgTimePerItem * 3) } }) it('should scale linearly with calendar event count', async () => { const sizes = [150, 300, 600] const results = [] for (const size of sizes) { const events = generateCalendarEvents(size) const texts = events.map(e => `${e.title} ${e.location} ${e.notes}`) const { duration } = await measureTime(async () => { for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) } }) results.push({ size, duration, timePerItem: duration / size }) } console.table(results) const avgTimePerItem = results.reduce((s, r) => s + r.timePerItem, 0) / results.length for (const result of results) { expect(result.timePerItem).toBeLessThan(avgTimePerItem * 3) } }) }) describe('throughput at varying sizes', () => { it('should maintain throughput - Small (350 items)', async () => { const count = 350 const emails = generateTestEmails(count) const texts = emails.map(e => e.content) const tracker = new ThroughputTracker().start() for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) tracker.recordBatch(batch.length) } const summary = tracker.getSummary() console.log(`Small (${count}): ${summary.overallThroughput.toFixed(0)} items/sec`) // With mocked embeddings, should achieve good throughput expect(summary.overallThroughput).toBeGreaterThan(50) }) it('should maintain throughput - Medium (1700 items)', async () => { const count = 1700 const emails = generateTestEmails(count) const texts = emails.map(e => e.content) const tracker = new ThroughputTracker().start() for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) tracker.recordBatch(batch.length) } const summary = tracker.getSummary() console.log(`Medium (${count}): ${summary.overallThroughput.toFixed(0)} items/sec`) expect(summary.overallThroughput).toBeGreaterThan(50) }) it('should maintain throughput - Large (7500 items)', async () => { const count = 7500 const emails = generateTestEmails(count) const texts = emails.map(e => e.content) const tracker = new ThroughputTracker().start() for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) tracker.recordBatch(batch.length) } const summary = tracker.getSummary() console.log(`Large (${count}): ${summary.overallThroughput.toFixed(0)} items/sec`) // Throughput should stay reasonable even at scale expect(summary.overallThroughput).toBeGreaterThan(50) }) }) describe('throughput degradation', () => { it('should not degrade more than 50% as size increases 10x', async () => { const smallCount = 100 const largeCount = 1000 // Small batch const smallTexts = generateTestEmails(smallCount).map(e => e.content) const { duration: smallDuration } = await measureTime(async () => { for (let i = 0; i < smallTexts.length; i += BATCH_SIZE) { const batch = smallTexts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) } }) const smallThroughput = calculateThroughput(smallCount, smallDuration) // Large batch const largeTexts = generateTestEmails(largeCount).map(e => e.content) const { duration: largeDuration } = await measureTime(async () => { for (let i = 0; i < largeTexts.length; i += BATCH_SIZE) { const batch = largeTexts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) } }) const largeThroughput = calculateThroughput(largeCount, largeDuration) console.log(`Small (${smallCount}): ${smallThroughput.toFixed(0)} items/sec`) console.log(`Large (${largeCount}): ${largeThroughput.toFixed(0)} items/sec`) console.log(`Degradation: ${((1 - largeThroughput / smallThroughput) * 100).toFixed(1)}%`) // Throughput should not degrade by more than 50% expect(largeThroughput).toBeGreaterThan(smallThroughput * 0.5) }) }) describe('batch count scaling', () => { it('should use correct number of batches for any size', () => { const sizes = [1, 31, 32, 33, 64, 100, 1000] for (const size of sizes) { const expectedBatches = Math.ceil(size / BATCH_SIZE) let actualBatches = 0 for (let i = 0; i < size; i += BATCH_SIZE) { actualBatches++ } expect(actualBatches).toBe(expectedBatches) } }) it('should handle edge case batch boundaries', () => { // Exactly one batch expect(Math.ceil(32 / BATCH_SIZE)).toBe(1) // One more than batch size expect(Math.ceil(33 / BATCH_SIZE)).toBe(2) // Multiple full batches expect(Math.ceil(96 / BATCH_SIZE)).toBe(3) // Multiple with remainder expect(Math.ceil(100 / BATCH_SIZE)).toBe(4) }) }) describe('mixed source scaling', () => { it('should handle combined email + message + calendar indexing', async () => { const emailCount = 200 const messageCount = 300 const eventCount = 100 const totalCount = emailCount + messageCount + eventCount const emails = generateTestEmails(emailCount) const messages = generateTestMessages(messageCount) const events = generateCalendarEvents(eventCount) const allTexts = [ ...emails.map(e => e.content), ...messages.map(m => m.text), ...events.map(e => `${e.title} ${e.location}`) ] const tracker = new ThroughputTracker().start() const { duration } = await measureTime(async () => { for (let i = 0; i < allTexts.length; i += BATCH_SIZE) { const batch = allTexts.slice(i, i + BATCH_SIZE) await mockEmbedder(batch, { pooling: 'mean', normalize: true }) tracker.recordBatch(batch.length) } }) const throughput = calculateThroughput(totalCount, duration) console.log(`Mixed sources (${totalCount} total): ${throughput.toFixed(0)} items/sec`) expect(throughput).toBeGreaterThan(50) expect(tracker.getTotalItems()).toBe(totalCount) }) }) describe('consistent batch throughput', () => { it('should have consistent per-batch throughput', async () => { const count = 320 // 10 batches const texts = generateTestEmails(count).map(e => e.content) const batchDurations = [] for (let i = 0; i < texts.length; i += BATCH_SIZE) { const batch = texts.slice(i, i + BATCH_SIZE) const { duration } = await measureTime(async () => { await mockEmbedder(batch, { pooling: 'mean', normalize: true }) }) batchDurations.push(duration) } // Calculate variance in batch durations const avgDuration = batchDurations.reduce((a, b) => a + b, 0) / batchDurations.length const variance = batchDurations.reduce((sum, d) => sum + Math.pow(d - avgDuration, 2), 0) / batchDurations.length const stdDev = Math.sqrt(variance) const cv = stdDev / avgDuration // Coefficient of variation console.log(`Avg batch duration: ${avgDuration.toFixed(2)}ms`) console.log(`Std dev: ${stdDev.toFixed(2)}ms`) console.log(`CV: ${(cv * 100).toFixed(1)}%`) // Coefficient of variation should be reasonable (< 100%) // Higher tolerance for mocked operations which can have timing jitter expect(cv).toBeLessThan(2) }) }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server