Skip to main content
Glama
indexer-functions.test.js8.37 kB
/** * Unit tests for indexer.js functions * Tests: indexEmails, indexMessages, indexCalendar, indexAll */ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest' import { createIndexerMocks, createLanceDBMock, createEmbeddingMock, createEmailFileSystemMock, BATCH_SIZE, BATCH_DELAY_MS } from '../helpers/indexing-mocks.js' import { generateTestEmails, generateTestMessages, generateCalendarEvents, generateEmailFilePaths } from '../helpers/test-data-generators.js' // Mock all external dependencies vi.mock('fs') vi.mock('@lancedb/lancedb') vi.mock('@xenova/transformers') vi.mock('child_process') describe('indexEmails', () => { let mocks beforeEach(() => { vi.clearAllMocks() mocks = createIndexerMocks() }) afterEach(() => { vi.restoreAllMocks() }) describe('incremental vs full scan detection', () => { it('should use mdfind when lastEmailIndexTime exists', async () => { const { shell } = mocks const lastIndexTime = Date.now() - (24 * 60 * 60 * 1000) // 1 day ago // Set up metadata with lastEmailIndexTime const fs = await import('fs') fs.existsSync.mockReturnValue(true) fs.readFileSync.mockReturnValue(JSON.stringify({ lastEmailIndexTime: lastIndexTime })) // Verify mdfind would be called with date filter shell.execAsync.mockResolvedValue({ stdout: '', stderr: '' }) // The implementation should detect metadata and use mdfind expect(shell.execAsync).toBeDefined() }) it('should use full find scan when no metadata exists', async () => { const fs = await import('fs') fs.existsSync.mockImplementation((p) => !p.includes('index-meta.json')) // When metadata file doesn't exist, should use find instead of mdfind expect(fs.existsSync).toBeDefined() }) }) describe('batch chunking', () => { it('should process emails in BATCH_SIZE chunks', async () => { const emailCount = 100 const expectedBatches = Math.ceil(emailCount / BATCH_SIZE) // Generate test emails const emails = generateTestEmails(emailCount) const filePaths = emails.map(e => e.path) // Verify chunking logic const batches = [] for (let i = 0; i < filePaths.length; i += BATCH_SIZE) { batches.push(filePaths.slice(i, i + BATCH_SIZE)) } expect(batches.length).toBe(expectedBatches) expect(batches[0].length).toBe(BATCH_SIZE) expect(batches[batches.length - 1].length).toBe(emailCount % BATCH_SIZE || BATCH_SIZE) }) it('should apply BATCH_DELAY_MS between batches', () => { // Verify the delay constant is correct expect(BATCH_DELAY_MS).toBe(100) }) }) describe('30-day filter', () => { it('should skip emails older than 30 days', () => { const thirtyDaysAgo = Date.now() - (30 * 24 * 60 * 60 * 1000) const fortyDaysAgo = Date.now() - (40 * 24 * 60 * 60 * 1000) // Email 40 days old should be filtered out expect(fortyDaysAgo).toBeLessThan(thirtyDaysAgo) // This filter is applied in indexEmails based on dateTimestamp const emails = generateTestEmails(10, { daysBack: 45 }) const recentEmails = emails.filter(e => e.timestamp >= thirtyDaysAgo) expect(recentEmails.length).toBeLessThan(emails.length) }) }) describe('file filtering', () => { it('should filter out already-indexed files', () => { const allFiles = ['/path/1.emlx', '/path/2.emlx', '/path/3.emlx'] const indexedFiles = new Set(['/path/1.emlx', '/path/2.emlx']) const toIndex = allFiles.filter(f => !indexedFiles.has(f)) expect(toIndex).toEqual(['/path/3.emlx']) expect(toIndex.length).toBe(1) }) it('should handle both .emlx and .partial.emlx files', () => { const files = [ '/path/1.emlx', '/path/2.partial.emlx', '/path/3.emlx', '/path/4.partial.emlx' ] const emlxFiles = files.filter(f => f.endsWith('.emlx') || f.endsWith('.partial.emlx')) expect(emlxFiles.length).toBe(4) }) }) describe('return values', () => { it('should return correct indexed/added counts', () => { const previousCount = 100 const newCount = 25 const result = { indexed: previousCount, added: newCount } expect(result.indexed).toBe(100) expect(result.added).toBe(25) }) }) }) describe('indexMessages', () => { let mocks beforeEach(() => { vi.clearAllMocks() mocks = createIndexerMocks() }) describe('attributedBody extraction', () => { it('should extract text from attributedBody when text is NULL', () => { // NSAttributedString extraction is tested via the extractTextFromAttributedBody function const messages = generateTestMessages(5) // Simulate message without text but with attributedBodyHex const msgWithoutText = { ...messages[0], text: null, attributedBodyHex: '4e53537472696e67' // "NSString" in hex } expect(msgWithoutText.text).toBeNull() expect(msgWithoutText.attributedBodyHex).toBeDefined() }) }) describe('group chat detection', () => { it('should detect group chats when participantCount > 2', () => { const msg = { participantCount: 5, chatName: '' } const isGroupChat = (parseInt(msg.participantCount) || 0) > 2 || (msg.chatName && msg.chatName.length > 0) expect(isGroupChat).toBe(true) }) it('should detect group chats when chatName exists', () => { const msg = { participantCount: 2, chatName: 'Family Group' } const isGroupChat = (parseInt(msg.participantCount) || 0) > 2 || (msg.chatName && msg.chatName.length > 0) expect(isGroupChat).toBe(true) }) it('should not detect 1-on-1 chats as group', () => { const msg = { participantCount: 2, chatName: '' } const isGroupChat = (parseInt(msg.participantCount) || 0) > 2 || Boolean(msg.chatName && msg.chatName.length > 0) expect(isGroupChat).toBe(false) }) }) describe('message filtering', () => { it('should skip messages without text content', () => { const messages = [ { id: 1, text: 'Hello' }, { id: 2, text: '' }, { id: 3, text: ' ' }, { id: 4, text: 'World' }, { id: 5, text: null } ] const filtered = messages.filter(msg => msg.text && msg.text.trim() !== '') expect(filtered.length).toBe(2) expect(filtered.map(m => m.id)).toEqual([1, 4]) }) }) }) describe('indexCalendar', () => { let mocks beforeEach(() => { vi.clearAllMocks() mocks = createIndexerMocks() }) describe('stale entry removal', () => { it('should detect events no longer in source', () => { const indexedIds = new Set(['Event A-2024', 'Event B-2024', 'Event C-2024']) const currentIds = new Set(['Event A-2024', 'Event C-2024']) const staleIds = [...indexedIds].filter(id => !currentIds.has(id)) expect(staleIds).toEqual(['Event B-2024']) }) it('should return removed count', () => { const result = { indexed: 10, added: 2, removed: 3 } expect(result.removed).toBe(3) }) }) describe('recurring events', () => { it('should handle events from OccurrenceCache', () => { const events = generateCalendarEvents(10, { recurringRate: 0.5 }) // At least some should have recurring notes const recurringCount = events.filter(e => e.notes?.includes('Recurring')).length expect(recurringCount).toBeGreaterThan(0) }) }) describe('event ID generation', () => { it('should create unique IDs from title-start combination', () => { const events = generateCalendarEvents(5) const ids = events.map(e => `${e.title}-${e.start}`) const uniqueIds = new Set(ids) expect(uniqueIds.size).toBe(ids.length) }) }) }) describe('indexAll', () => { describe('orchestration', () => { it('should return results for all three sources', () => { const mockResult = { emails: { indexed: 100, added: 10 }, messages: { indexed: 50, added: 5 }, calendar: { indexed: 20, added: 2, removed: 1 } } expect(mockResult.emails).toBeDefined() expect(mockResult.messages).toBeDefined() expect(mockResult.calendar).toBeDefined() }) }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server