Skip to main content
Glama
parsing.test.js11.5 kB
/** * Unit tests for data parsing functions in indexer.js * Tests email parsing, date conversion, and extraction utilities */ import { describe, it, expect, vi, beforeEach } from 'vitest' import fs from 'fs' import path from 'path' // Mock dependencies vi.mock('fs', () => ({ default: { existsSync: vi.fn(), readFileSync: vi.fn(), writeFileSync: vi.fn(), mkdirSync: vi.fn() }, existsSync: vi.fn(), readFileSync: vi.fn(), writeFileSync: vi.fn(), mkdirSync: vi.fn() })) vi.mock('@lancedb/lancedb', () => ({ connect: vi.fn() })) vi.mock('@xenova/transformers', () => ({ pipeline: vi.fn() })) vi.mock('../../lib/shell.js', () => ({ safeSqlite3Json: vi.fn(), safeOsascript: vi.fn() })) // Import module after mocking (need to test exported functions) // Since most parsing functions are internal, we'll test via the module's behavior // For now, we'll test the utility patterns directly describe('Email Parsing Patterns', () => { describe('extractEmail pattern', () => { // Testing the regex pattern used in extractEmail const extractEmail = (str) => { if (!str) return "" const match = str.match(/<([^>]+)>/) if (match) return match[1].toLowerCase() if (str.includes("@")) return str.trim().toLowerCase() return str.trim().toLowerCase() } it('should extract email from "Name <email>" format', () => { expect(extractEmail('John Doe <john@example.com>')).toBe('john@example.com') }) it('should extract email from complex name format', () => { expect(extractEmail('"Doe, John" <john.doe@company.com>')).toBe('john.doe@company.com') }) it('should handle bare email addresses', () => { expect(extractEmail('john@example.com')).toBe('john@example.com') }) it('should lowercase emails', () => { expect(extractEmail('JOHN@EXAMPLE.COM')).toBe('john@example.com') }) it('should handle empty/null input', () => { expect(extractEmail(null)).toBe('') expect(extractEmail('')).toBe('') expect(extractEmail(undefined)).toBe('') }) it('should trim whitespace', () => { expect(extractEmail(' john@example.com ')).toBe('john@example.com') }) }) describe('extractEmails pattern (multiple recipients)', () => { const extractEmail = (str) => { if (!str) return "" const match = str.match(/<([^>]+)>/) if (match) return match[1].toLowerCase() if (str.includes("@")) return str.trim().toLowerCase() return str.trim().toLowerCase() } const extractEmails = (str) => { if (!str) return [] const emails = [] for (const part of str.split(",")) { const email = extractEmail(part.trim()) if (email && email.includes("@")) { emails.push(email) } } return emails } it('should extract multiple comma-separated emails', () => { const input = 'John <john@a.com>, Jane <jane@b.com>, Bob <bob@c.com>' const result = extractEmails(input) expect(result).toEqual(['john@a.com', 'jane@b.com', 'bob@c.com']) }) it('should handle mixed formats', () => { const input = 'John <john@a.com>, jane@b.com, "Smith, Bob" <bob@c.com>' const result = extractEmails(input) expect(result).toContain('john@a.com') expect(result).toContain('jane@b.com') expect(result).toContain('bob@c.com') }) it('should filter out non-email strings', () => { const input = 'John <john@a.com>, Not An Email, jane@b.com' const result = extractEmails(input) expect(result).toEqual(['john@a.com', 'jane@b.com']) }) it('should handle empty input', () => { expect(extractEmails(null)).toEqual([]) expect(extractEmails('')).toEqual([]) }) }) describe('parseDateTime pattern', () => { const parseDateTime = (dateStr) => { if (!dateStr) return 0 try { let d = new Date(dateStr) if (!isNaN(d.getTime())) return d.getTime() // Handle AppleScript format const appleMatch = dateStr.match(/(\w+), (\w+ \d+, \d+) at (\d+:\d+:\d+ [AP]M)/i) if (appleMatch) { d = new Date(`${appleMatch[2]} ${appleMatch[3]}`) if (!isNaN(d.getTime())) return d.getTime() } return 0 } catch { return 0 } } it('should parse ISO date strings', () => { const result = parseDateTime('2024-01-15T10:30:00Z') expect(result).toBeGreaterThan(0) }) it('should parse RFC 2822 email date format', () => { const result = parseDateTime('Mon, 15 Jan 2024 10:30:00 -0800') expect(result).toBeGreaterThan(0) }) it('should parse AppleScript date format', () => { const result = parseDateTime('Friday, January 10, 2025 at 9:00:00 AM') expect(result).toBeGreaterThan(0) }) it('should return 0 for invalid dates', () => { expect(parseDateTime('not a date')).toBe(0) expect(parseDateTime('')).toBe(0) expect(parseDateTime(null)).toBe(0) }) }) describe('extractMailbox pattern', () => { const extractMailbox = (filePath) => { const match = filePath.match(/([^/]+)\.mbox/) return match ? match[1] : "Unknown" } it('should extract mailbox name from path', () => { expect(extractMailbox('/Users/test/Library/Mail/V10/INBOX.mbox/message.emlx')).toBe('INBOX') }) it('should extract Sent mailbox', () => { expect(extractMailbox('/Users/test/Library/Mail/V10/Sent Messages.mbox/123.emlx')).toBe('Sent Messages') }) it('should return Unknown for invalid paths', () => { expect(extractMailbox('/Users/test/no-mailbox/file.txt')).toBe('Unknown') }) }) }) describe('Mac Absolute Time Conversion', () => { // Mac Absolute Time epoch: Jan 1, 2001 00:00:00 UTC const MAC_ABSOLUTE_EPOCH = 978307200 const macAbsoluteToUnix = (macTime) => { // macTime is in seconds since Jan 1, 2001 return (macTime + MAC_ABSOLUTE_EPOCH) * 1000 // Convert to milliseconds } it('should convert Mac Absolute Time to Unix timestamp', () => { // Mac Absolute Time for a known date // Jan 1, 2024 00:00:00 UTC = 1704067200 Unix timestamp // 1704067200 - 978307200 = 725760000 Mac Absolute Time const macTime = 725760000 const result = macAbsoluteToUnix(macTime) const date = new Date(result) expect(date.getUTCFullYear()).toBe(2024) expect(date.getUTCMonth()).toBe(0) // January expect(date.getUTCDate()).toBe(1) }) it('should handle the epoch correctly', () => { // Mac Absolute Time 0 = Jan 1, 2001 00:00:00 UTC const result = macAbsoluteToUnix(0) const date = new Date(result) expect(date.getUTCFullYear()).toBe(2001) expect(date.getUTCMonth()).toBe(0) expect(date.getUTCDate()).toBe(1) }) }) describe('EMLX Parsing Patterns', () => { describe('Header extraction', () => { const sampleEmlx = `From: John Doe <john@example.com> To: Jane Smith <jane@example.com> Subject: Test Subject Line Date: Mon, 15 Jan 2024 10:30:00 -0800 Message-ID: <unique123@example.com> Content-Type: text/plain; charset="utf-8" This is the body of the email. It has multiple lines. ` it('should extract From header', () => { const match = sampleEmlx.match(/^From:\s*(.+)$/m) expect(match).not.toBeNull() expect(match[1]).toBe('John Doe <john@example.com>') }) it('should extract To header', () => { const match = sampleEmlx.match(/^To:\s*(.+)$/m) expect(match).not.toBeNull() expect(match[1]).toBe('Jane Smith <jane@example.com>') }) it('should extract Subject header', () => { const match = sampleEmlx.match(/^Subject:\s*(.+)$/m) expect(match).not.toBeNull() expect(match[1]).toBe('Test Subject Line') }) it('should extract Date header', () => { const match = sampleEmlx.match(/^Date:\s*(.+)$/m) expect(match).not.toBeNull() expect(match[1]).toBe('Mon, 15 Jan 2024 10:30:00 -0800') }) it('should extract Message-ID header', () => { const match = sampleEmlx.match(/^Message-ID:\s*(.+)$/im) expect(match).not.toBeNull() expect(match[1]).toBe('<unique123@example.com>') }) }) describe('Attachment detection', () => { it('should detect Content-Disposition attachment', () => { const content = 'Content-Disposition: attachment; filename="doc.pdf"' expect(/Content-Disposition:\s*attachment/i.test(content)).toBe(true) }) it('should detect multipart/mixed', () => { const content = 'Content-Type: multipart/mixed; boundary="----=_Part_123"' expect(/multipart\/mixed/i.test(content)).toBe(true) }) it('should detect filename parameter', () => { const content = 'Content-Type: application/pdf; filename="report.pdf"' expect(/filename=/i.test(content)).toBe(true) }) it('should not detect plain text as attachment', () => { const content = 'Content-Type: text/plain; charset="utf-8"\n\nJust text' const hasAttachment = /Content-Disposition:\s*attachment/i.test(content) || /multipart\/mixed/i.test(content) || /filename=/i.test(content) expect(hasAttachment).toBe(false) }) }) describe('Sent mail detection', () => { const isSent = (mailbox) => mailbox.toLowerCase().includes('sent') it('should detect Sent mailbox', () => { expect(isSent('Sent')).toBe(true) expect(isSent('Sent Messages')).toBe(true) expect(isSent('SENT')).toBe(true) }) it('should not flag inbox as sent', () => { expect(isSent('INBOX')).toBe(false) expect(isSent('Archive')).toBe(false) }) }) describe('Body extraction', () => { const extractBody = (content) => { const headerEnd = content.search(/\r?\n\r?\n/) if (headerEnd > 0) { return content.substring(headerEnd + 2, Math.min(headerEnd + 2000, content.length)) } return '' } it('should find header/body boundary', () => { const content = 'Header: value\n\nBody starts here' const body = extractBody(content) expect(body).toContain('Body starts here') }) it('should limit body length', () => { const header = 'Header: value\n\n' const longBody = 'x'.repeat(5000) const content = header + longBody const body = extractBody(content) expect(body.length).toBeLessThanOrEqual(2000) }) }) }) describe('Phone Number Normalization', () => { // Testing the normalizePhone pattern from contacts.js const normalizePhone = (phone) => { if (!phone) return "" const hasPlus = phone.startsWith("+") const digits = phone.replace(/\D/g, "") const normalized = digits.length === 11 && digits.startsWith("1") ? digits.slice(1) : digits return hasPlus ? `+${normalized}` : normalized } it('should strip non-digit characters', () => { expect(normalizePhone('(555) 123-4567')).toBe('5551234567') }) it('should preserve leading + for international', () => { expect(normalizePhone('+1 555 123 4567')).toBe('+5551234567') }) it('should normalize US 11-digit to 10-digit', () => { expect(normalizePhone('1-800-555-0100')).toBe('8005550100') }) it('should handle empty input', () => { expect(normalizePhone(null)).toBe('') expect(normalizePhone('')).toBe('') }) it('should handle international numbers', () => { expect(normalizePhone('+44 20 7946 0958')).toBe('+442079460958') }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server