/**
* Unit tests for indexer.js functions
* Tests: indexEmails, indexMessages, indexCalendar, indexAll
*/
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
import {
createIndexerMocks,
createLanceDBMock,
createEmbeddingMock,
createEmailFileSystemMock,
BATCH_SIZE,
BATCH_DELAY_MS
} from '../helpers/indexing-mocks.js'
import {
generateTestEmails,
generateTestMessages,
generateCalendarEvents,
generateEmailFilePaths
} from '../helpers/test-data-generators.js'
// Mock all external dependencies
vi.mock('fs')
vi.mock('@lancedb/lancedb')
vi.mock('@xenova/transformers')
vi.mock('child_process')
describe('indexEmails', () => {
let mocks
beforeEach(() => {
vi.clearAllMocks()
mocks = createIndexerMocks()
})
afterEach(() => {
vi.restoreAllMocks()
})
describe('incremental vs full scan detection', () => {
it('should use mdfind when lastEmailIndexTime exists', async () => {
const { shell } = mocks
const lastIndexTime = Date.now() - (24 * 60 * 60 * 1000) // 1 day ago
// Set up metadata with lastEmailIndexTime
const fs = await import('fs')
fs.existsSync.mockReturnValue(true)
fs.readFileSync.mockReturnValue(JSON.stringify({
lastEmailIndexTime: lastIndexTime
}))
// Verify mdfind would be called with date filter
shell.execAsync.mockResolvedValue({ stdout: '', stderr: '' })
// The implementation should detect metadata and use mdfind
expect(shell.execAsync).toBeDefined()
})
it('should use full find scan when no metadata exists', async () => {
const fs = await import('fs')
fs.existsSync.mockImplementation((p) => !p.includes('index-meta.json'))
// When metadata file doesn't exist, should use find instead of mdfind
expect(fs.existsSync).toBeDefined()
})
})
describe('batch chunking', () => {
it('should process emails in BATCH_SIZE chunks', async () => {
const emailCount = 100
const expectedBatches = Math.ceil(emailCount / BATCH_SIZE)
// Generate test emails
const emails = generateTestEmails(emailCount)
const filePaths = emails.map(e => e.path)
// Verify chunking logic
const batches = []
for (let i = 0; i < filePaths.length; i += BATCH_SIZE) {
batches.push(filePaths.slice(i, i + BATCH_SIZE))
}
expect(batches.length).toBe(expectedBatches)
expect(batches[0].length).toBe(BATCH_SIZE)
expect(batches[batches.length - 1].length).toBe(emailCount % BATCH_SIZE || BATCH_SIZE)
})
it('should apply BATCH_DELAY_MS between batches', () => {
// Verify the delay constant is correct
expect(BATCH_DELAY_MS).toBe(100)
})
})
describe('30-day filter', () => {
it('should skip emails older than 30 days', () => {
const thirtyDaysAgo = Date.now() - (30 * 24 * 60 * 60 * 1000)
const fortyDaysAgo = Date.now() - (40 * 24 * 60 * 60 * 1000)
// Email 40 days old should be filtered out
expect(fortyDaysAgo).toBeLessThan(thirtyDaysAgo)
// This filter is applied in indexEmails based on dateTimestamp
const emails = generateTestEmails(10, { daysBack: 45 })
const recentEmails = emails.filter(e => e.timestamp >= thirtyDaysAgo)
expect(recentEmails.length).toBeLessThan(emails.length)
})
})
describe('file filtering', () => {
it('should filter out already-indexed files', () => {
const allFiles = ['/path/1.emlx', '/path/2.emlx', '/path/3.emlx']
const indexedFiles = new Set(['/path/1.emlx', '/path/2.emlx'])
const toIndex = allFiles.filter(f => !indexedFiles.has(f))
expect(toIndex).toEqual(['/path/3.emlx'])
expect(toIndex.length).toBe(1)
})
it('should handle both .emlx and .partial.emlx files', () => {
const files = [
'/path/1.emlx',
'/path/2.partial.emlx',
'/path/3.emlx',
'/path/4.partial.emlx'
]
const emlxFiles = files.filter(f => f.endsWith('.emlx') || f.endsWith('.partial.emlx'))
expect(emlxFiles.length).toBe(4)
})
})
describe('return values', () => {
it('should return correct indexed/added counts', () => {
const previousCount = 100
const newCount = 25
const result = { indexed: previousCount, added: newCount }
expect(result.indexed).toBe(100)
expect(result.added).toBe(25)
})
})
})
describe('indexMessages', () => {
let mocks
beforeEach(() => {
vi.clearAllMocks()
mocks = createIndexerMocks()
})
describe('attributedBody extraction', () => {
it('should extract text from attributedBody when text is NULL', () => {
// NSAttributedString extraction is tested via the extractTextFromAttributedBody function
const messages = generateTestMessages(5)
// Simulate message without text but with attributedBodyHex
const msgWithoutText = {
...messages[0],
text: null,
attributedBodyHex: '4e53537472696e67' // "NSString" in hex
}
expect(msgWithoutText.text).toBeNull()
expect(msgWithoutText.attributedBodyHex).toBeDefined()
})
})
describe('group chat detection', () => {
it('should detect group chats when participantCount > 2', () => {
const msg = { participantCount: 5, chatName: '' }
const isGroupChat = (parseInt(msg.participantCount) || 0) > 2 || (msg.chatName && msg.chatName.length > 0)
expect(isGroupChat).toBe(true)
})
it('should detect group chats when chatName exists', () => {
const msg = { participantCount: 2, chatName: 'Family Group' }
const isGroupChat = (parseInt(msg.participantCount) || 0) > 2 || (msg.chatName && msg.chatName.length > 0)
expect(isGroupChat).toBe(true)
})
it('should not detect 1-on-1 chats as group', () => {
const msg = { participantCount: 2, chatName: '' }
const isGroupChat = (parseInt(msg.participantCount) || 0) > 2 || Boolean(msg.chatName && msg.chatName.length > 0)
expect(isGroupChat).toBe(false)
})
})
describe('message filtering', () => {
it('should skip messages without text content', () => {
const messages = [
{ id: 1, text: 'Hello' },
{ id: 2, text: '' },
{ id: 3, text: ' ' },
{ id: 4, text: 'World' },
{ id: 5, text: null }
]
const filtered = messages.filter(msg => msg.text && msg.text.trim() !== '')
expect(filtered.length).toBe(2)
expect(filtered.map(m => m.id)).toEqual([1, 4])
})
})
})
describe('indexCalendar', () => {
let mocks
beforeEach(() => {
vi.clearAllMocks()
mocks = createIndexerMocks()
})
describe('stale entry removal', () => {
it('should detect events no longer in source', () => {
const indexedIds = new Set(['Event A-2024', 'Event B-2024', 'Event C-2024'])
const currentIds = new Set(['Event A-2024', 'Event C-2024'])
const staleIds = [...indexedIds].filter(id => !currentIds.has(id))
expect(staleIds).toEqual(['Event B-2024'])
})
it('should return removed count', () => {
const result = { indexed: 10, added: 2, removed: 3 }
expect(result.removed).toBe(3)
})
})
describe('recurring events', () => {
it('should handle events from OccurrenceCache', () => {
const events = generateCalendarEvents(10, { recurringRate: 0.5 })
// At least some should have recurring notes
const recurringCount = events.filter(e => e.notes?.includes('Recurring')).length
expect(recurringCount).toBeGreaterThan(0)
})
})
describe('event ID generation', () => {
it('should create unique IDs from title-start combination', () => {
const events = generateCalendarEvents(5)
const ids = events.map(e => `${e.title}-${e.start}`)
const uniqueIds = new Set(ids)
expect(uniqueIds.size).toBe(ids.length)
})
})
})
describe('indexAll', () => {
describe('orchestration', () => {
it('should return results for all three sources', () => {
const mockResult = {
emails: { indexed: 100, added: 10 },
messages: { indexed: 50, added: 5 },
calendar: { indexed: 20, added: 2, removed: 1 }
}
expect(mockResult.emails).toBeDefined()
expect(mockResult.messages).toBeDefined()
expect(mockResult.calendar).toBeDefined()
})
})
})