Skip to main content
Glama
email-edge-cases.test.js11.1 kB
/** * Edge case tests for email indexing * Tests partial IMAP, HTML stripping, flagged emails, and header parsing */ import { describe, it, expect, beforeEach, vi } from 'vitest' import { createEmbeddingMock, EMBEDDING_DIM } from '../helpers/indexing-mocks.js' import { generateEmailContent } from '../helpers/test-data-generators.js' describe('Email Edge Cases', () => { let mockEmbedder beforeEach(() => { vi.clearAllMocks() const mock = createEmbeddingMock() mockEmbedder = mock.mockEmbedder }) describe('.partial.emlx files', () => { it('should recognize partial IMAP file extension', () => { const partialPath = '/Users/test/Library/Mail/V10/Account/INBOX.mbox/123.partial.emlx' const normalPath = '/Users/test/Library/Mail/V10/Account/INBOX.mbox/123.emlx' expect(partialPath.endsWith('.partial.emlx')).toBe(true) expect(normalPath.endsWith('.partial.emlx')).toBe(false) expect(normalPath.endsWith('.emlx')).toBe(true) }) it('should index both .emlx and .partial.emlx patterns', () => { const patterns = ['*.emlx', '*.partial.emlx'] const testFiles = [ '123.emlx', '456.partial.emlx', '789.emlx' ] const matchingFiles = testFiles.filter(f => patterns.some(p => { const regex = new RegExp(p.replace('*', '.*')) return regex.test(f) }) ) expect(matchingFiles.length).toBe(3) }) }) describe('isFlagged detection', () => { it('should detect X-Flagged header', () => { const content = `From: sender@example.com To: recipient@example.com Subject: Important X-Flagged: Yes Body text` const isFlagged = /X-Flagged:\s*Yes/i.test(content) expect(isFlagged).toBe(true) }) it('should handle missing X-Flagged header', () => { const content = `From: sender@example.com To: recipient@example.com Subject: Normal email Body text` const isFlagged = /X-Flagged:\s*Yes/i.test(content) expect(isFlagged).toBe(false) }) it('should be case-insensitive', () => { const variants = [ 'X-Flagged: yes', 'X-FLAGGED: YES', 'x-flagged: Yes', 'X-Flagged:Yes' // No space ] for (const header of variants) { const content = `From: test@test.com\n${header}\n\nBody` const isFlagged = /X-Flagged:\s*Yes/i.test(content) expect(isFlagged).toBe(true) } }) }) describe('messageId extraction', () => { it('should extract Message-ID header', () => { const content = `From: sender@example.com Message-ID: <abc123@example.com> Subject: Test Body` const match = content.match(/Message-ID:\s*<([^>]+)>/i) const messageId = match ? match[1] : null expect(messageId).toBe('abc123@example.com') }) it('should handle complex Message-ID formats', () => { const messageIds = [ '<CADq1234.abc@mail.gmail.com>', '<20240101120000.123456@example.com>', '<unique-id-with-dashes@host.domain.tld>' ] for (const id of messageIds) { const content = `Message-ID: ${id}\n\nBody` const match = content.match(/Message-ID:\s*<([^>]+)>/i) expect(match).not.toBeNull() } }) it('should handle missing Message-ID gracefully', () => { const content = `From: sender@example.com Subject: No message ID Body` const match = content.match(/Message-ID:\s*<([^>]+)>/i) expect(match).toBeNull() }) }) describe('HTML body stripping', () => { it('should strip basic HTML tags', () => { const html = '<html><body><p>Hello <b>World</b>!</p></body></html>' // Safe regex-based stripping (no ReDoS vulnerability) const stripped = html.replace(/<[^>]*>/g, '') expect(stripped).toBe('Hello World!') }) it('should handle nested tags', () => { const html = '<div><span><strong>Text</strong></span></div>' const stripped = html.replace(/<[^>]*>/g, '') expect(stripped).toBe('Text') }) it('should preserve text between tags', () => { const html = '<p>First</p><p>Second</p><p>Third</p>' const stripped = html.replace(/<[^>]*>/g, '') expect(stripped).toBe('FirstSecondThird') }) it('should handle self-closing tags', () => { const html = 'Line1<br/>Line2<hr/>Line3' const stripped = html.replace(/<[^>]*>/g, '') expect(stripped).toBe('Line1Line2Line3') }) it('should handle attributes with quotes', () => { const html = '<a href="https://example.com">Link</a>' const stripped = html.replace(/<[^>]*>/g, '') expect(stripped).toBe('Link') }) it('should not be vulnerable to ReDoS', () => { // Malicious input that could cause ReDoS with naive regex const malicious = '<' + 'a'.repeat(1000) + '>' const start = performance.now() malicious.replace(/<[^>]*>/g, '') const duration = performance.now() - start // Should complete in < 100ms even for malicious input expect(duration).toBeLessThan(100) }) }) describe('body truncation', () => { it('should truncate body to 500 chars', () => { const longBody = 'x'.repeat(1000) const truncated = longBody.substring(0, 500) expect(truncated.length).toBe(500) }) it('should not truncate short bodies', () => { const shortBody = 'Short email body' const maxLength = 500 const result = shortBody.length > maxLength ? shortBody.substring(0, maxLength) : shortBody expect(result).toBe(shortBody) }) it('should handle exactly 500 chars', () => { const exactBody = 'x'.repeat(500) const truncated = exactBody.substring(0, 500) expect(truncated.length).toBe(500) }) }) describe('RFC 2822 date variants', () => { it('should parse standard RFC 2822 format', () => { const dateStr = 'Mon, 1 Jan 2024 12:00:00 -0800' const date = new Date(dateStr) expect(date.getTime()).toBeGreaterThan(0) }) it('should parse without day of week', () => { const dateStr = '1 Jan 2024 12:00:00 -0800' const date = new Date(dateStr) expect(date.getTime()).toBeGreaterThan(0) }) it('should parse with timezone name', () => { const dateStr = 'Mon, 1 Jan 2024 12:00:00 PST' const date = new Date(dateStr) // May be invalid in some JS engines, but should not throw expect(() => new Date(dateStr)).not.toThrow() }) it('should parse ISO 8601 format', () => { const dateStr = '2024-01-01T12:00:00-08:00' const date = new Date(dateStr) expect(date.getTime()).toBeGreaterThan(0) }) it('should handle invalid dates gracefully', () => { const invalidDates = [ 'not a date', '', '32 Jan 2024', 'Feb 30 2024' ] for (const dateStr of invalidDates) { const date = new Date(dateStr) // Should either be NaN or a weird date, but not throw expect(() => new Date(dateStr)).not.toThrow() } }) }) describe('multi-part MIME handling', () => { it('should detect multipart/mixed', () => { const content = `Content-Type: multipart/mixed; boundary="----=_Part_123" ------=_Part_123 Content-Type: text/plain Body text ------=_Part_123 Content-Type: application/pdf Content-Disposition: attachment; filename="doc.pdf" [binary data] ------=_Part_123--` const isMultipart = /multipart\/mixed/i.test(content) expect(isMultipart).toBe(true) }) it('should detect multipart/alternative', () => { const content = `Content-Type: multipart/alternative; boundary="alt_boundary" --alt_boundary Content-Type: text/plain Plain text version --alt_boundary Content-Type: text/html <html>HTML version</html> --alt_boundary--` const isMultipart = /multipart\/(mixed|alternative)/i.test(content) expect(isMultipart).toBe(true) }) it('should extract boundary parameter', () => { const content = 'Content-Type: multipart/mixed; boundary="----=_Part_123_456"' const match = content.match(/boundary="([^"]+)"/i) const boundary = match ? match[1] : null expect(boundary).toBe('----=_Part_123_456') }) }) describe('empty or missing headers', () => { it('should handle missing Subject header', () => { const content = `From: sender@example.com To: recipient@example.com Body without subject` const match = content.match(/^Subject:\s*(.*)$/im) const subject = match ? match[1] : '(No Subject)' expect(subject).toBe('(No Subject)') }) it('should handle empty Subject', () => { const content = `From: sender@example.com Subject: To: recipient@example.com Body` // Use [ \t]* instead of \s* to avoid matching newlines const match = content.match(/^Subject:[ \t]*(.*)$/im) const subject = match ? match[1].trim() || '(No Subject)' : '(No Subject)' expect(subject).toBe('(No Subject)') }) it('should handle missing From header', () => { const content = `To: recipient@example.com Subject: Test Body` const match = content.match(/^From:\s*(.*)$/im) const from = match ? match[1] : 'Unknown Sender' expect(from).toBe('Unknown Sender') }) }) describe('encoded headers (MIME)', () => { it('should detect =?UTF-8?Q? encoded subject', () => { const encoded = '=?UTF-8?Q?Hello_=C3=A9=C3=A8?=' const isEncoded = /=\?[^?]+\?[QBqb]\?[^?]+\?=/.test(encoded) expect(isEncoded).toBe(true) }) it('should detect =?UTF-8?B? base64 encoded', () => { const encoded = '=?UTF-8?B?SGVsbG8gV29ybGQ=?=' const isEncoded = /=\?[^?]+\?[QBqb]\?[^?]+\?=/.test(encoded) expect(isEncoded).toBe(true) }) it('should handle multiple encoded words', () => { const encoded = '=?UTF-8?Q?Part1?= =?UTF-8?Q?Part2?=' const parts = encoded.match(/=\?[^?]+\?[QBqb]\?[^?]+\?=/g) expect(parts.length).toBe(2) }) it('should handle non-encoded subjects', () => { const plain = 'Just a plain subject' const isEncoded = /=\?[^?]+\?[QBqb]\?[^?]+\?=/.test(plain) expect(isEncoded).toBe(false) }) }) describe('search text generation', () => { it('should combine relevant fields for search', async () => { const email = { from: 'John Doe <john@example.com>', to: 'Jane Smith <jane@example.com>', subject: 'Meeting Tomorrow', body: 'Let\'s discuss the project' } const searchText = `${email.subject} ${email.from} ${email.to} ${email.body}` .substring(0, 1000) expect(searchText).toContain('Meeting Tomorrow') expect(searchText).toContain('John Doe') expect(searchText).toContain('project') }) it('should truncate search text to 1000 chars', () => { const longSubject = 'x'.repeat(500) const longBody = 'y'.repeat(1000) const searchText = `${longSubject} ${longBody}`.substring(0, 1000) expect(searchText.length).toBe(1000) }) }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server