/**
* Accuracy tests for data correctness in indexing
* Tests field mapping, extraction, and transformation accuracy
*/
import { describe, it, expect, beforeEach, vi } from 'vitest'
import {
generateTestEmails,
generateTestMessages,
generateCalendarEvents,
generateEmailContent
} from '../helpers/test-data-generators.js'
import { MAC_ABSOLUTE_EPOCH } from '../helpers/indexing-mocks.js'
describe('Email Field Mapping', () => {
describe('fromEmail extraction', () => {
it('should extract email from "Name <email>" format', () => {
const fromHeader = 'John Doe <john@example.com>'
const extractEmail = (str) => {
if (!str) return ''
const match = str.match(/<([^>]+)>/)
if (match) return match[1].toLowerCase()
if (str.includes('@')) return str.trim().toLowerCase()
return str.trim().toLowerCase()
}
const email = extractEmail(fromHeader)
expect(email).toBe('john@example.com')
})
it('should handle plain email without name', () => {
const fromHeader = 'jane@example.com'
const extractEmail = (str) => {
if (!str) return ''
const match = str.match(/<([^>]+)>/)
if (match) return match[1].toLowerCase()
if (str.includes('@')) return str.trim().toLowerCase()
return str.trim().toLowerCase()
}
const email = extractEmail(fromHeader)
expect(email).toBe('jane@example.com')
})
it('should lowercase email addresses', () => {
const fromHeader = 'John <JOHN@EXAMPLE.COM>'
const extractEmail = (str) => {
const match = str.match(/<([^>]+)>/)
if (match) return match[1].toLowerCase()
return ''
}
const email = extractEmail(fromHeader)
expect(email).toBe('john@example.com')
})
})
describe('toEmails extraction', () => {
it('should extract multiple recipients', () => {
const toHeader = 'Jane <jane@example.com>, Bob <bob@example.com>'
const extractEmails = (str) => {
if (!str) return []
const emails = []
for (const part of str.split(',')) {
const match = part.match(/<([^>]+)>/)
if (match) emails.push(match[1].toLowerCase())
}
return emails
}
const emails = extractEmails(toHeader)
expect(emails).toEqual(['jane@example.com', 'bob@example.com'])
})
it('should handle single recipient', () => {
const toHeader = 'recipient@example.com'
const extractEmails = (str) => {
if (!str) return []
if (str.includes('@') && !str.includes(',')) {
return [str.trim().toLowerCase()]
}
return []
}
const emails = extractEmails(toHeader)
expect(emails).toEqual(['recipient@example.com'])
})
})
describe('attachment detection', () => {
it('should detect Content-Disposition: attachment', () => {
const content = 'Content-Disposition: attachment; filename="doc.pdf"'
const hasAttachment = /Content-Disposition:\s*attachment/i.test(content)
expect(hasAttachment).toBe(true)
})
it('should detect multipart/mixed', () => {
const content = 'Content-Type: multipart/mixed; boundary="boundary"'
const hasAttachment = /multipart\/mixed/i.test(content)
expect(hasAttachment).toBe(true)
})
it('should detect filename= in headers', () => {
const content = 'Content-Type: application/pdf; filename="report.pdf"'
const hasAttachment = /filename=/i.test(content)
expect(hasAttachment).toBe(true)
})
it('should not detect attachment in plain text email', () => {
const content = `From: sender@example.com
Content-Type: text/plain
This is plain text.`
const hasAttachment = /Content-Disposition:\s*attachment/i.test(content) ||
/multipart\/mixed/i.test(content) ||
/filename=/i.test(content)
expect(hasAttachment).toBe(false)
})
})
describe('date to timestamp conversion', () => {
it('should parse RFC 2822 date format', () => {
const dateStr = 'Mon, 1 Jan 2024 12:00:00 -0800'
const timestamp = new Date(dateStr).getTime()
expect(timestamp).toBeGreaterThan(0)
expect(timestamp).toBe(new Date('2024-01-01T20:00:00.000Z').getTime())
})
it('should handle invalid date gracefully', () => {
const dateStr = 'invalid date string'
let timestamp = 0
try {
const d = new Date(dateStr)
timestamp = isNaN(d.getTime()) ? 0 : d.getTime()
} catch {
timestamp = 0
}
expect(timestamp).toBe(0)
})
})
describe('mailbox extraction from path', () => {
it('should extract mailbox name from path', () => {
const filePath = '/Users/test/Library/Mail/V10/Account/INBOX.mbox/123.emlx'
const extractMailbox = (path) => {
const match = path.match(/([^/]+)\.mbox/)
return match ? match[1] : 'Unknown'
}
const mailbox = extractMailbox(filePath)
expect(mailbox).toBe('INBOX')
})
it('should handle Sent Messages mailbox', () => {
const filePath = '/Users/test/Library/Mail/V10/Account/Sent Messages.mbox/456.emlx'
const extractMailbox = (path) => {
const match = path.match(/([^/]+)\.mbox/)
return match ? match[1] : 'Unknown'
}
const mailbox = extractMailbox(filePath)
expect(mailbox).toBe('Sent Messages')
})
})
describe('isSent detection', () => {
it('should detect sent emails from mailbox name', () => {
const mailbox = 'Sent Messages'
const isSent = mailbox.toLowerCase().includes('sent')
expect(isSent).toBe(true)
})
it('should not mark inbox emails as sent', () => {
const mailbox = 'INBOX'
const isSent = mailbox.toLowerCase().includes('sent')
expect(isSent).toBe(false)
})
})
})
describe('Message Field Mapping', () => {
describe('sender handle extraction', () => {
it('should identify "Me" for sent messages', () => {
const message = { is_from_me: 1, handle_id: null }
const sender = message.is_from_me === 1 ? 'Me' : message.handle_id
expect(sender).toBe('Me')
})
it('should use handle_id for received messages', () => {
const message = { is_from_me: 0, handle_id: '+15551234567' }
const sender = message.is_from_me === 1 ? 'Me' : message.handle_id
expect(sender).toBe('+15551234567')
})
})
describe('group chat detection', () => {
it('should detect group chat when participantCount > 2', () => {
const message = { participantCount: 5, chatName: '' }
const isGroupChat = (parseInt(message.participantCount) || 0) > 2 ||
(message.chatName && message.chatName.length > 0)
expect(isGroupChat).toBe(true)
})
it('should detect group chat when chatName exists', () => {
const message = { participantCount: 2, chatName: 'Family Group' }
const isGroupChat = (parseInt(message.participantCount) || 0) > 2 ||
(message.chatName && message.chatName.length > 0)
expect(isGroupChat).toBe(true)
})
it('should not detect 1-on-1 as group chat', () => {
const message = { participantCount: 2, chatName: '' }
const isGroupChat = (parseInt(message.participantCount) || 0) > 2 ||
Boolean(message.chatName && message.chatName.length > 0)
expect(isGroupChat).toBe(false)
})
})
describe('Mac Absolute Time conversion', () => {
it('should convert to Unix timestamp correctly', () => {
// Mac Absolute Time: seconds since 2001-01-01 00:00:00 UTC
// Unix timestamp: seconds since 1970-01-01 00:00:00 UTC
// Difference: 978307200 seconds
const macTime = 0 // 2001-01-01 00:00:00 UTC
const unixTimestamp = macTime + MAC_ABSOLUTE_EPOCH
expect(unixTimestamp).toBe(978307200) // 2001-01-01 in Unix time
})
it('should handle recent timestamps', () => {
const now = Math.floor(Date.now() / 1000)
const macTimeNow = now - MAC_ABSOLUTE_EPOCH
const convertedBack = macTimeNow + MAC_ABSOLUTE_EPOCH
expect(convertedBack).toBe(now)
})
})
})
describe('Calendar Field Mapping', () => {
describe('event title from summary', () => {
it('should map summary to title', () => {
const event = { summary: 'Team Meeting' }
const title = event.summary
expect(title).toBe('Team Meeting')
})
it('should skip events without summary', () => {
const events = [
{ summary: 'Meeting A' },
{ summary: null },
{ summary: '' },
{ summary: 'Meeting B' }
]
const filtered = events.filter(e => e.summary)
expect(filtered).toHaveLength(2)
})
})
describe('all-day event detection', () => {
it('should identify all-day events', () => {
const event = { all_day: 1 }
const isAllDay = event.all_day === 1
expect(isAllDay).toBe(true)
})
it('should identify timed events', () => {
const event = { all_day: 0 }
const isAllDay = event.all_day === 1
expect(isAllDay).toBe(false)
})
})
describe('attendees extraction', () => {
it('should include attendee status', () => {
const attendees = [
{ name: 'John', status: 'accepted' },
{ name: 'Jane', status: 'tentative' },
{ name: 'Bob', status: 'declined' }
]
expect(attendees[0].status).toBe('accepted')
expect(attendees[1].status).toBe('tentative')
expect(attendees[2].status).toBe('declined')
})
it('should map status codes to strings', () => {
const statusMap = {
0: 'unknown',
1: 'accepted',
2: 'declined',
3: 'tentative',
4: 'pending',
7: 'needs-action'
}
expect(statusMap[1]).toBe('accepted')
expect(statusMap[2]).toBe('declined')
expect(statusMap[3]).toBe('tentative')
})
it('should serialize attendees to JSON', () => {
const attendees = [{ name: 'John', status: 'accepted' }]
const serialized = JSON.stringify(attendees)
expect(serialized).toBe('[{"name":"John","status":"accepted"}]')
})
})
describe('time conversions', () => {
it('should format Mac Absolute Time to readable date', () => {
const macTime = 757382400 // Some Mac Absolute Time value
const formatMacAbsoluteDate = (time) => {
const unixMs = (time + MAC_ABSOLUTE_EPOCH) * 1000
return new Date(unixMs).toLocaleString()
}
const formatted = formatMacAbsoluteDate(macTime)
expect(typeof formatted).toBe('string')
expect(formatted.length).toBeGreaterThan(0)
})
})
})
describe('Search Text Building', () => {
it('should truncate email search text to 1000 chars', () => {
const longBody = 'x'.repeat(2000)
const searchText = `From: sender@example.com\nTo: recipient@example.com\nSubject: Test\n${longBody}`.substring(0, 1000)
expect(searchText.length).toBe(1000)
})
it('should truncate message search text to 500 chars', () => {
const longText = 'x'.repeat(1000)
const searchText = `From: Me\nMessage: ${longText}`.substring(0, 500)
expect(searchText.length).toBe(500)
})
it('should truncate calendar search text to 500 chars', () => {
const longNotes = 'x'.repeat(1000)
const searchText = `Event: Meeting\nCalendar: Work\nLocation: Office\nNotes: ${longNotes}`.substring(0, 500)
expect(searchText.length).toBe(500)
})
})