Skip to main content
Glama
real-data-edge-cases.test.js9.27 kB
/** * Real Data Edge Case Tests * * Tests edge cases using actual data from the system: * - Unicode/emoji handling * - Special characters * - Large content * - Empty/null fields */ import { describe, it, expect, beforeAll } from 'vitest' import fs from 'fs' import { checkDataSources, buildProductionIndex, isProductionIndexReady, searchProductionIndex, embed, sampleEmails, sampleMessages, sampleCalendarEvents, PRODUCTION_INDEX_DIR } from '../helpers/real-data.js' const sources = checkDataSources() describe.skipIf(!sources.mail && !sources.messages && !sources.calendar || !sources.productionIndex)( 'Real Data Edge Cases', () => { beforeAll(async () => { const ready = await isProductionIndexReady() if (!ready) { throw new Error('Production index not found. Run "npm run rebuild-index" first.') } }, 30000) describe('Unicode Handling', () => { it('should handle emoji in search queries', async () => { // Search with emoji - should not throw const results = await searchProductionIndex('🎉 celebration party', 'emails', 5) expect(Array.isArray(results)).toBe(true) }, 30000) it('should handle CJK characters in search', async () => { // Search with Chinese characters const results = await searchProductionIndex('会议 meeting 会议', 'emails', 5) expect(Array.isArray(results)).toBe(true) }, 30000) it('should handle mixed scripts', async () => { const results = await searchProductionIndex('Hello 你好 مرحبا שלום', 'messages', 5) expect(Array.isArray(results)).toBe(true) }, 30000) it('should generate valid embeddings for unicode text', async () => { const unicodeTexts = [ '会议安排 - Meeting Schedule', 'Réunion avec l\'équipe française', 'Встреча с командой', '🎂 Birthday party 🎉' ] for (const text of unicodeTexts) { const vector = await embed(text) expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) } }, 60000) }) describe('Special Characters', () => { it('should handle special characters in search', async () => { const specialQueries = [ 'email@test.com', 'Re: Fw: Important', 'C:\\Users\\test', '50% off sale!', 'Q&A session', '<script>alert</script>' ] for (const query of specialQueries) { const results = await searchProductionIndex(query, 'emails', 5) expect(Array.isArray(results)).toBe(true) } }, 60000) it('should handle SQL-like characters safely', async () => { // These should not cause SQL injection or errors const queries = [ "O'Brien meeting", 'SELECT * FROM users', "'; DROP TABLE --", 'test = value' ] for (const query of queries) { const results = await searchProductionIndex(query, 'messages', 5) expect(Array.isArray(results)).toBe(true) } }, 60000) }) describe('Empty and Edge Cases', () => { it('should handle empty string search', async () => { // Empty search should return results (all docs) // or handle gracefully try { const results = await searchProductionIndex('', 'emails', 5) expect(Array.isArray(results)).toBe(true) } catch (e) { // Empty search might throw - that's acceptable expect(e.message).toBeDefined() } }, 30000) it('should handle whitespace-only search', async () => { try { const results = await searchProductionIndex(' ', 'emails', 5) expect(Array.isArray(results)).toBe(true) } catch (e) { expect(e.message).toBeDefined() } }, 30000) it('should handle very long search query', async () => { const longQuery = 'meeting '.repeat(100) const results = await searchProductionIndex(longQuery, 'emails', 5) expect(Array.isArray(results)).toBe(true) }, 30000) it('should handle single character search', async () => { const results = await searchProductionIndex('a', 'emails', 5) expect(Array.isArray(results)).toBe(true) }, 30000) }) describe('Search Result Limits', () => { it('should respect limit parameter', async () => { const limit = 3 const results = await searchProductionIndex('the', 'emails', limit) expect(results.length).toBeLessThanOrEqual(limit) }, 30000) it('should handle limit of 1', async () => { const results = await searchProductionIndex('email', 'emails', 1) expect(results.length).toBeLessThanOrEqual(1) }, 30000) it('should handle large limit', async () => { const results = await searchProductionIndex('meeting', 'emails', 1000) expect(Array.isArray(results)).toBe(true) }, 30000) }) describe('Cross-Table Search', () => { it.skipIf(!sources.mail)('should search emails table', async () => { const results = await searchProductionIndex('important', 'emails', 5) expect(Array.isArray(results)).toBe(true) for (const r of results) { expect(r).toHaveProperty('filePath') } }, 30000) it.skipIf(!sources.messages)('should search messages table', async () => { const results = await searchProductionIndex('hello', 'messages', 5) expect(Array.isArray(results)).toBe(true) for (const r of results) { expect(r).toHaveProperty('text') } }, 30000) it.skipIf(!sources.calendar)('should search calendar table', async () => { const results = await searchProductionIndex('meeting', 'calendar', 5) expect(Array.isArray(results)).toBe(true) for (const r of results) { expect(r).toHaveProperty('title') } }, 30000) }) describe('Error Handling', () => { it('should handle non-existent table gracefully', async () => { try { await searchProductionIndex('test', 'nonexistent', 5) } catch (e) { expect(e.message).toContain('not found') } }) it('should handle invalid table name', async () => { try { await searchProductionIndex('test', '', 5) } catch (e) { expect(e).toBeDefined() } }) }) } ) describe('Embedding Edge Cases', () => { it('should handle empty string embedding', async () => { const vector = await embed('') expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) }, 30000) it('should handle very long text embedding', async () => { const longText = 'This is a very long email about various topics. '.repeat(200) const vector = await embed(longText) expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) }, 30000) it('should handle numbers-only text', async () => { const vector = await embed('123456789012345678901234567890') expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) }, 30000) it('should handle URL-like text', async () => { const vector = await embed('https://www.example.com/path/to/page?query=value&other=123') expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) }, 30000) it('should handle email address text', async () => { const vector = await embed('john.doe@company.example.com') expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) }, 30000) it('should handle code-like text', async () => { const code = ` function hello() { console.log("Hello, World!"); return { status: 200, message: "OK" }; } ` const vector = await embed(code) expect(vector).toHaveLength(384) expect(vector.every(v => Number.isFinite(v))).toBe(true) }, 30000) }) describe('Data Sampling', () => { it.skipIf(!sources.mail)('should sample real emails', async () => { const emails = await sampleEmails(10) expect(Array.isArray(emails)).toBe(true) // May have fewer if not enough emails exist expect(emails.length).toBeLessThanOrEqual(10) for (const path of emails) { expect(path).toMatch(/\.emlx$/) expect(fs.existsSync(path)).toBe(true) } }, 30000) it.skipIf(!sources.messages)('should sample real messages', async () => { const messages = await sampleMessages(10) expect(Array.isArray(messages)).toBe(true) for (const msg of messages) { expect(msg).toHaveProperty('id') expect(msg).toHaveProperty('text') } }, 30000) it.skipIf(!sources.calendar)('should sample real calendar events', async () => { const events = await sampleCalendarEvents(10) expect(Array.isArray(events)).toBe(true) for (const event of events) { expect(event).toHaveProperty('id') expect(event).toHaveProperty('title') } }, 30000) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server