Skip to main content
Glama
incremental-indexing.test.js15.3 kB
/** * Edge case tests for incremental indexing * Tests timestamp boundaries, batch sizes, and ID edge cases */ import { describe, it, expect } from 'vitest' // Mac Absolute Time epoch: Jan 1, 2001 00:00:00 UTC const MAC_ABSOLUTE_EPOCH = 978307200 const ONE_HOUR_MS = 60 * 60 * 1000 const BATCH_SIZE = 32 const BATCH_DELETE_SIZE = 100 describe('Incremental Indexing Edge Cases', () => { describe('timestamp edge cases', () => { // Convert Unix ms to Mac Absolute nanoseconds (like in indexer.js) const unixMsToMacAbsoluteNs = (unixMs) => { return (unixMs / 1000 - MAC_ABSOLUTE_EPOCH) * 1000000000 } // Convert Mac Absolute nanoseconds to Unix ms const macAbsoluteNsToUnixMs = (macNs) => { return (macNs / 1000000000 + MAC_ABSOLUTE_EPOCH) * 1000 } it('should handle Mac Absolute Time at epoch (Jan 1, 2001 00:00:00)', () => { // At Mac epoch, Mac Absolute Time is 0 const macEpochUnixMs = MAC_ABSOLUTE_EPOCH * 1000 const macAbsoluteNs = unixMsToMacAbsoluteNs(macEpochUnixMs) expect(macAbsoluteNs).toBe(0) // Round trip const backToUnixMs = macAbsoluteNsToUnixMs(macAbsoluteNs) expect(backToUnixMs).toBe(macEpochUnixMs) }) it('should handle timestamps before Mac epoch (pre-2001)', () => { // Year 2000 - before Mac epoch const year2000 = new Date('2000-06-15T12:00:00Z').getTime() const macAbsoluteNs = unixMsToMacAbsoluteNs(year2000) expect(macAbsoluteNs).toBeLessThan(0) // Verify this creates a valid SQL filter (negative number) const sqlFilter = `AND m.date >= ${macAbsoluteNs}` expect(sqlFilter).toContain('-') }) it('should handle future timestamps (clock skew)', () => { // 1 year in the future const futureTimestamp = Date.now() + 365 * 24 * 60 * 60 * 1000 const macAbsoluteNs = unixMsToMacAbsoluteNs(futureTimestamp) expect(macAbsoluteNs).toBeGreaterThan(0) // Should still produce valid SQL const sqlFilter = `AND m.date >= ${macAbsoluteNs}` expect(sqlFilter).not.toContain('NaN') expect(sqlFilter).not.toContain('Infinity') }) it('should handle message timestamp exactly at buffer boundary', () => { // Scenario: lastIndexTime = T, buffer = 1 hour // Message timestamp = T - 1 hour exactly const lastIndexTime = Date.now() const effectiveTimestamp = lastIndexTime - ONE_HOUR_MS // Message at exact boundary const messageTimestamp = effectiveTimestamp // Should be included (>= comparison) const isIncluded = messageTimestamp >= effectiveTimestamp expect(isIncluded).toBe(true) }) it('should handle message 1ms before buffer boundary', () => { const lastIndexTime = Date.now() const effectiveTimestamp = lastIndexTime - ONE_HOUR_MS // Message 1ms before boundary const messageTimestamp = effectiveTimestamp - 1 // Should NOT be included const isIncluded = messageTimestamp >= effectiveTimestamp expect(isIncluded).toBe(false) }) it('should handle Unix epoch (1970-01-01)', () => { const unixEpoch = 0 const macAbsoluteNs = unixMsToMacAbsoluteNs(unixEpoch) // Should be negative (1970 is before 2001) expect(macAbsoluteNs).toBeLessThan(0) // Should be approximately -31 years in nanoseconds const expectedNs = -MAC_ABSOLUTE_EPOCH * 1000000000 expect(macAbsoluteNs).toBe(expectedNs) }) it('should handle very large timestamps (year 3000)', () => { const year3000 = new Date('3000-01-01T00:00:00Z').getTime() const macAbsoluteNs = unixMsToMacAbsoluteNs(year3000) expect(Number.isFinite(macAbsoluteNs)).toBe(true) // Verify no precision loss const roundTrip = macAbsoluteNsToUnixMs(macAbsoluteNs) expect(Math.abs(roundTrip - year3000)).toBeLessThan(1) // Within 1ms }) }) describe('batch boundary edge cases', () => { const createBatches = (items, batchSize) => { const batches = [] for (let i = 0; i < items.length; i += batchSize) { batches.push(items.slice(i, i + batchSize)) } return batches } it('should handle exactly BATCH_SIZE items', () => { const items = Array.from({ length: BATCH_SIZE }, (_, i) => ({ id: i })) const batches = createBatches(items, BATCH_SIZE) expect(batches).toHaveLength(1) expect(batches[0]).toHaveLength(BATCH_SIZE) }) it('should handle BATCH_SIZE + 1 items', () => { const items = Array.from({ length: BATCH_SIZE + 1 }, (_, i) => ({ id: i })) const batches = createBatches(items, BATCH_SIZE) expect(batches).toHaveLength(2) expect(batches[0]).toHaveLength(BATCH_SIZE) expect(batches[1]).toHaveLength(1) }) it('should handle exactly BATCH_DELETE_SIZE items for calendar', () => { const items = Array.from({ length: BATCH_DELETE_SIZE }, (_, i) => `id-${i}`) const batches = createBatches(items, BATCH_DELETE_SIZE) expect(batches).toHaveLength(1) expect(batches[0]).toHaveLength(BATCH_DELETE_SIZE) }) it('should handle 101 stale calendar entries (tests chunking)', () => { const items = Array.from({ length: 101 }, (_, i) => `stale-${i}`) const batches = createBatches(items, BATCH_DELETE_SIZE) expect(batches).toHaveLength(2) expect(batches[0]).toHaveLength(100) expect(batches[1]).toHaveLength(1) }) it('should handle empty batch after dedup filtering', () => { const allItems = [{ id: 1 }, { id: 2 }, { id: 3 }] const indexedIds = new Set(['1', '2', '3']) const toIndex = allItems.filter(item => !indexedIds.has(String(item.id))) expect(toIndex).toHaveLength(0) const batches = createBatches(toIndex, BATCH_SIZE) expect(batches).toHaveLength(0) }) it('should handle single item batch', () => { const items = [{ id: 1 }] const batches = createBatches(items, BATCH_SIZE) expect(batches).toHaveLength(1) expect(batches[0]).toHaveLength(1) }) it('should handle items that fill exactly N batches', () => { const numBatches = 5 const items = Array.from({ length: BATCH_SIZE * numBatches }, (_, i) => ({ id: i })) const batches = createBatches(items, BATCH_SIZE) expect(batches).toHaveLength(numBatches) batches.forEach(batch => { expect(batch).toHaveLength(BATCH_SIZE) }) }) }) describe('ID edge cases', () => { // Validate ID for LanceDB (like validateLanceDBId in indexer.js) const validateLanceDBId = (id) => { if (typeof id !== 'string') return null if (id.length === 0 || id.length > 1000) return null // Only allow safe characters const safePattern = /^[a-zA-Z0-9._@<>:\-\/\s]+$/ if (!safePattern.test(id)) return null return id } // Escape SQL string (like escapeSQL in indexer.js) const escapeSQL = (str) => { if (typeof str !== 'string') return '' return str.replace(/'/g, "''") } it('should handle calendar ID with hyphen in dbId', () => { // Calendar ID format: "${dbId}-${startTimestamp}" const dbId = 'abc-123-def' const startTimestamp = 1234567890 const calendarId = `${dbId}-${startTimestamp}` // Should be valid const validated = validateLanceDBId(calendarId) expect(validated).toBe(calendarId) // Should parse correctly const lastHyphen = calendarId.lastIndexOf('-') const extractedDbId = calendarId.substring(0, lastHyphen) const extractedTimestamp = calendarId.substring(lastHyphen + 1) expect(extractedDbId).toBe(dbId) expect(extractedTimestamp).toBe(String(startTimestamp)) }) it('should handle very large message ROWID', () => { // SQLite max integer is 9223372036854775807 const largeRowId = '9223372036854775807' const validated = validateLanceDBId(largeRowId) expect(validated).toBe(largeRowId) }) it('should handle calendar event with negative startTimestamp', () => { // Events before 1970 const dbId = '12345' const negativeTimestamp = -86400000 // 1 day before Unix epoch const calendarId = `${dbId}-${negativeTimestamp}` // Should be valid (hyphen is allowed) const validated = validateLanceDBId(calendarId) expect(validated).toBe(calendarId) }) it('should reject ID with SQL injection characters', () => { const maliciousIds = [ "'; DROP TABLE emails; --", "1' OR '1'='1", "id); DELETE FROM messages; --" ] for (const id of maliciousIds) { const validated = validateLanceDBId(id) // Should be rejected due to semicolon, quotes, etc. expect(validated).toBeNull() } }) it('should escape single quotes in SQL', () => { const idWithQuote = "O'Brien" const escaped = escapeSQL(idWithQuote) expect(escaped).toBe("O''Brien") // Build delete condition const condition = `id = '${escaped}'` expect(condition).toBe("id = 'O''Brien'") }) it('should handle empty ID', () => { const validated = validateLanceDBId('') expect(validated).toBeNull() }) it('should handle ID at max length boundary', () => { const maxLength = 1000 const atLimit = 'a'.repeat(maxLength) const overLimit = 'a'.repeat(maxLength + 1) expect(validateLanceDBId(atLimit)).toBe(atLimit) expect(validateLanceDBId(overLimit)).toBeNull() }) it('should handle ID with email address format', () => { // Message IDs often contain email addresses const messageId = '<abc123.456@mail.gmail.com>' const validated = validateLanceDBId(messageId) expect(validated).toBe(messageId) }) }) describe('data size edge cases', () => { // Truncate text like in indexer.js const truncateText = (text, maxLength) => { if (!text || typeof text !== 'string') return '' return text.substring(0, maxLength) } it('should handle email body exactly at 500 char truncation boundary', () => { const exactlyAtLimit = 'a'.repeat(500) const truncated = truncateText(exactlyAtLimit, 500) expect(truncated).toHaveLength(500) expect(truncated).toBe(exactlyAtLimit) }) it('should truncate email body at 501 chars', () => { const overLimit = 'a'.repeat(501) const truncated = truncateText(overLimit, 500) expect(truncated).toHaveLength(500) }) it('should handle empty message text with valid attributedBody', () => { const processMessage = (msg) => { // If text is empty but attributedBody exists, we'd extract from attributedBody const text = msg.text || msg.extractedFromAttributedBody || '' return { id: msg.id, text, hasText: text.length > 0 } } const message = { id: 1, text: '', // Empty extractedFromAttributedBody: 'Hello from attributedBody!' } const result = processMessage(message) expect(result.hasText).toBe(true) expect(result.text).toBe('Hello from attributedBody!') }) it('should handle calendar event with empty title', () => { const processEvent = (event) => { const title = event.title || '(No Title)' return { id: event.id, title, hasTitle: event.title && event.title.length > 0 } } const eventWithEmptyTitle = { id: 1, title: '' } const eventWithNullTitle = { id: 2, title: null } const eventWithUndefinedTitle = { id: 3 } expect(processEvent(eventWithEmptyTitle).title).toBe('(No Title)') expect(processEvent(eventWithNullTitle).title).toBe('(No Title)') expect(processEvent(eventWithUndefinedTitle).title).toBe('(No Title)') }) it('should handle message with 0 attachments', () => { const msg = { id: 1, attachmentCount: 0 } expect(msg.attachmentCount).toBe(0) expect(msg.attachmentCount > 0).toBe(false) }) it('should handle message with many attachments', () => { const msg = { id: 1, attachmentCount: 100 } expect(msg.attachmentCount).toBe(100) expect(msg.attachmentCount > 0).toBe(true) }) it('should handle very long email subject', () => { const longSubject = 'Re: Fw: '.repeat(100) + 'Original Subject' const truncatedSubject = truncateText(longSubject, 200) expect(truncatedSubject.length).toBeLessThanOrEqual(200) }) it('should handle null text gracefully', () => { expect(truncateText(null, 500)).toBe('') expect(truncateText(undefined, 500)).toBe('') }) }) describe('deduplication edge cases', () => { it('should handle duplicate IDs in same batch', () => { const items = [ { id: '1', text: 'first' }, { id: '1', text: 'duplicate' }, { id: '2', text: 'second' } ] // Deduplicate by keeping first occurrence const seen = new Set() const unique = items.filter(item => { if (seen.has(item.id)) return false seen.add(item.id) return true }) expect(unique).toHaveLength(2) expect(unique[0].text).toBe('first') }) it('should handle ID type coercion (number vs string)', () => { const indexedIds = new Set(['1', '2', '3']) // SQLite might return number, but we store as string const numericId = 1 const stringId = '1' expect(indexedIds.has(numericId)).toBe(false) // Different type expect(indexedIds.has(String(numericId))).toBe(true) expect(indexedIds.has(stringId)).toBe(true) }) it('should handle very large Set of indexed IDs', () => { // Test performance with 100k IDs (typical large mailbox) const largeSet = new Set() for (let i = 0; i < 100000; i++) { largeSet.add(`id-${i}`) } // Lookup should still be O(1) const start = performance.now() for (let i = 0; i < 1000; i++) { largeSet.has(`id-${Math.floor(Math.random() * 100000)}`) } const duration = performance.now() - start // 1000 lookups should be very fast (< 10ms) expect(duration).toBeLessThan(10) }) }) describe('concurrent indexing edge cases', () => { it('should handle rapid indexing requests', async () => { let indexingInProgress = false const completedIndexes = [] const startIndexing = async (id) => { if (indexingInProgress) { return { skipped: true, reason: 'already_in_progress' } } indexingInProgress = true try { await new Promise(r => setTimeout(r, 10)) completedIndexes.push(id) return { success: true, id } } finally { indexingInProgress = false } } // Fire multiple requests rapidly const results = await Promise.all([ startIndexing(1), startIndexing(2), startIndexing(3) ]) // Only one should succeed, others skipped const successful = results.filter(r => r.success) const skipped = results.filter(r => r.skipped) expect(successful.length).toBe(1) expect(skipped.length).toBe(2) }) }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server