Gmail MCP Server

CategorizationEngine.integration.test.ts•51.2 KiB

import { describe, it, expect, beforeEach, afterEach, jest } from '@jest/globals'; import { CategorizationEngine } from '../../../src/categorization/CategorizationEngine.js'; import { DatabaseManager } from '../../../src/database/DatabaseManager.js'; import { CacheManager } from '../../../src/cache/CacheManager.js'; import { CategorizeOptions, EmailIndex, PriorityCategory } from '../../../src/types/index.js'; import { CategorizationSystemConfig } from '../../../src/categorization/config/CategorizationConfig.js'; import { CombinedAnalysisResult, EnhancedCategorizationResult } from '../../../src/categorization/types.js'; import { mockEmails, expectedCategories, mockStatistics } from './fixtures/mockEmails.js'; import { createCategorizationEngineWithRealDb, createTestDatabaseManager, cleanupTestDatabase, seedTestData, verifyCategorization, startLoggerCapture, stopLoggerCapture, setupIsolatedTestDb, cleanupIsolatedTestDb, cleanupAllUserTestDatabases, cleanupAllUserDbDirectories } from './helpers/testHelpers.js'; import { logger } from '../../../src/utils/logger.js'; import { error } from 'console'; import type { UserDatabaseManagerFactory } from '../../../src/database/UserDatabaseManagerFactory.js'; describe('CategorizationEngine Integration Tests', () => { let categorizationEngine: CategorizationEngine; let cacheManager: CacheManager; let userDbManagerFactory: any; // Will be set per test let consoleCapture: { logs: string[], errors: string[], warns: string[], infos: string[] }; const userContext = { user_id: 'default', session_id: 'default-session' }; const userContextA = { user_id: 'userA', session_id: 'sessionA' }; const userContextB = { user_id: 'userB', session_id: 'sessionB' }; beforeEach(async () => { const testName = expect.getState().currentTestName || 'unknown'; // Setup per-test DB isolation const { factory } = await setupIsolatedTestDb(testName.replace(/\s+/g, '_')); userDbManagerFactory = factory; // Use the factory for all DB operations const setup = await createCategorizationEngineWithRealDb(userDbManagerFactory); categorizationEngine = setup.categorizationEngine; cacheManager = setup.cacheManager; consoleCapture = startLoggerCapture(logger); // Seed initial test data await seedTestData(mockEmails, userDbManagerFactory); }); afterEach(async () => { await cleanupIsolatedTestDb(); stopLoggerCapture(); await cleanupAllUserDbDirectories(); }); // Helper to get the correct dbManager for a user async function getUserDb(userId: string) { return await userDbManagerFactory.getUserDatabaseManager(userId); } describe('Email Categorization Flow', () => { it('should categorize all uncategorized emails', async () => { // Verify emails are initially uncategorized const userDb = await getUserDb(userContext.user_id); const initialEmails = await userDb.searchEmails({}); initialEmails.forEach(email => { expect(email.category).toBeNull(); }); // Run categorization const options: CategorizeOptions = { forceRefresh: true }; const result: EnhancedCategorizationResult = await categorizationEngine.categorizeEmails(options, userContext); // Verify all emails were processed expect(result.processed).toBe(mockEmails.length); // Verify category counts match expected expect(result.categories.high).toBe(expectedCategories.high.length); expect(result.categories.medium).toBe(expectedCategories.medium.length); expect(result.categories.low).toBe(expectedCategories.low.length); // NEW: Verify emails array is returned with analyzer results expect(result.emails).toBeDefined(); expect(Array.isArray(result.emails)).toBe(true); expect(result.emails.length).toBe(mockEmails.length); // Verify each email has analyzer results result.emails.forEach(email => { expect(email.category).not.toBeNull(); expect(email.importanceLevel).toBeDefined(); expect(email.importanceScore).toBeDefined(); expect(email.ageCategory).toBeDefined(); expect(email.sizeCategory).toBeDefined(); expect(email.analysisTimestamp).toBeDefined(); expect(email.analysisVersion).toBeDefined(); expect(email.user_id).toBe('default'); }); // NEW: Verify analyzer_insights are provided expect(result.analyzer_insights).toBeDefined(); if (result.analyzer_insights) { expect(result.analyzer_insights.top_importance_rules).toBeDefined(); expect(Array.isArray(result.analyzer_insights.top_importance_rules)).toBe(true); expect(typeof result.analyzer_insights.spam_detection_rate).toBe('number'); expect(typeof result.analyzer_insights.avg_confidence).toBe('number'); expect(result.analyzer_insights.age_distribution).toBeDefined(); expect(result.analyzer_insights.size_distribution).toBeDefined(); // Verify distribution totals const ageTotal = result.analyzer_insights.age_distribution.recent + result.analyzer_insights.age_distribution.moderate + result.analyzer_insights.age_distribution.old; const sizeTotal = result.analyzer_insights.size_distribution.small + result.analyzer_insights.size_distribution.medium + result.analyzer_insights.size_distribution.large; expect(ageTotal).toBe(result.processed); expect(sizeTotal).toBe(result.processed); } // Verify high priority emails were categorized correctly await verifyCategorization( await getUserDb(userContext.user_id), expectedCategories.high.map(e => e.id), PriorityCategory.HIGH ); // Verify medium priority emails were categorized correctly (if any) if (expectedCategories.medium.length > 0) { await verifyCategorization( await getUserDb(userContext.user_id), expectedCategories.medium.map(e => e.id), PriorityCategory.MEDIUM ); } // Verify low priority emails were categorized correctly await verifyCategorization( await getUserDb(userContext.user_id), expectedCategories.low.map(e => e.id), PriorityCategory.LOW ); // Verify logging expect(consoleCapture.infos.some(log => log.includes('Starting email categorization') )).toBe(true); expect(consoleCapture.infos.some(log => log.includes('Email categorization completed') )).toBe(true); }); it('should recategorize all emails when forceRefresh is true', async () => { // First categorize all emails await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Manually change some categories to test recategorization const userDb = await getUserDb(userContext.user_id); const emailToChange = await userDb.getEmailIndex('email-high-1'); if (emailToChange) { emailToChange.category = 'low'; await userDb.upsertEmailIndex(emailToChange); } // Run categorization with forceRefresh const options: CategorizeOptions = { forceRefresh: true }; const result: EnhancedCategorizationResult = await categorizationEngine.categorizeEmails(options, userContext); // Verify all emails were processed again expect(result.processed).toBe(mockEmails.length); // NEW: Verify enhanced return format expect(result.emails).toBeDefined(); expect(result.emails.length).toBe(mockEmails.length); expect(result.analyzer_insights).toBeDefined(); // Verify the changed email was recategorized correctly const recategorizedEmail = await userDb.getEmailIndex('email-high-1'); expect(recategorizedEmail?.category).toBe(PriorityCategory.HIGH); // Verify the email is also in the returned emails array with correct category const emailInResult = result.emails.find(e => e.id === 'email-high-1'); expect(emailInResult).toBeDefined(); expect(emailInResult?.category).toBe(PriorityCategory.HIGH); }); it('should categorize emails from specific year only', async () => { // Run categorization for 2023 only const options: CategorizeOptions = { forceRefresh: false, year: 2023 }; const result: EnhancedCategorizationResult = await categorizationEngine.categorizeEmails(options, userContext); // Count emails from 2023 const emails2023 = mockEmails.filter(e => e.year === 2023); expect(result.processed).toBe(emails2023.length); // NEW: Verify enhanced return format expect(result.emails).toBeDefined(); expect(result.emails.length).toBe(emails2023.length); expect(result.analyzer_insights).toBeDefined(); // Verify all returned emails are from 2023 result.emails.forEach(email => { expect(email.year).toBe(2023); expect(email.category).not.toBeNull(); expect(email.user_id).toBe('default'); }); // Verify only 2023 emails were categorized const userDb = await getUserDb(userContext.user_id); const categorized2023 = await userDb.searchEmails({ year: 2023 }); categorized2023.forEach(email => { expect(email.category).not.toBeNull(); expect(email.user_id).toBe('default'); }); // Verify other years remain uncategorized const uncategorized2024 = await userDb.searchEmails({ year: 2024 }); uncategorized2024.forEach(email => { expect(email.category).toBeNull(); expect(email.user_id).toBe('default'); }); }); it('should handle empty result sets gracefully', async () => { // Run categorization for a year with no emails const options: CategorizeOptions = { forceRefresh: false, year: 2025 }; const result: EnhancedCategorizationResult = await categorizationEngine.categorizeEmails(options, userContext); // Verify no emails were processed expect(result.processed).toBe(0); expect(result.categories.high).toBe(0); expect(result.categories.medium).toBe(0); expect(result.categories.low).toBe(0); // NEW: Verify enhanced return format for empty results expect(result.emails).toBeDefined(); expect(Array.isArray(result.emails)).toBe(true); expect(result.emails.length).toBe(0); // Analyzer insights should still be provided even for empty results expect(result.analyzer_insights).toBeDefined(); if (result.analyzer_insights) { expect(result.analyzer_insights.top_importance_rules).toBeDefined(); expect(Array.isArray(result.analyzer_insights.top_importance_rules)).toBe(true); expect(result.analyzer_insights.spam_detection_rate).toBe(0); expect(result.analyzer_insights.avg_confidence).toBe(0); expect(result.analyzer_insights.age_distribution.recent).toBe(0); expect(result.analyzer_insights.age_distribution.moderate).toBe(0); expect(result.analyzer_insights.age_distribution.old).toBe(0); expect(result.analyzer_insights.size_distribution.small).toBe(0); expect(result.analyzer_insights.size_distribution.medium).toBe(0); expect(result.analyzer_insights.size_distribution.large).toBe(0); } }); it('should validate enhanced categorization result structure', async () => { // Run categorization const result: EnhancedCategorizationResult = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Validate basic structure expect(typeof result.processed).toBe('number'); expect(result.processed).toBeGreaterThan(0); // Validate categories structure expect(result.categories).toBeDefined(); expect(typeof result.categories.high).toBe('number'); expect(typeof result.categories.medium).toBe('number'); expect(typeof result.categories.low).toBe('number'); expect(result.categories.high + result.categories.medium + result.categories.low).toBe(result.processed); // Validate emails array expect(result.emails).toBeDefined(); expect(Array.isArray(result.emails)).toBe(true); expect(result.emails.length).toBe(result.processed); // Validate each email has required analyzer result fields result.emails.forEach((email, index) => { expect(email.id).toBeDefined(); expect(email.category).not.toBeNull(); expect(['high', 'medium', 'low']).toContain(email.category); // Importance analyzer results expect(email.importanceLevel).toBeDefined(); expect(['high', 'medium', 'low']).toContain(email.importanceLevel); expect(typeof email.importanceScore).toBe('number'); expect(Array.isArray(email.importanceMatchedRules)).toBe(true); expect(typeof email.importanceConfidence).toBe('number'); // Date/Size analyzer results expect(email.ageCategory).toBeDefined(); expect(['recent', 'moderate', 'old']).toContain(email.ageCategory); expect(email.sizeCategory).toBeDefined(); expect(['small', 'medium', 'large']).toContain(email.sizeCategory); expect(typeof email.recencyScore).toBe('number'); // Label classifier results expect(email.gmailCategory).toBeDefined(); expect(typeof email.spam_score).toBe('number'); expect(typeof email.promotional_score).toBe('number'); expect(typeof email.socialScore).toBe('number'); // Analysis metadata expect(email.analysisTimestamp).toBeDefined(); expect(email.analysisVersion).toBeDefined(); expect(email.user_id).toBe('default'); }); // Validate analyzer_insights expect(result.analyzer_insights).toBeDefined(); if (result.analyzer_insights) { expect(Array.isArray(result.analyzer_insights.top_importance_rules)).toBe(true); expect(typeof result.analyzer_insights.spam_detection_rate).toBe('number'); expect(result.analyzer_insights.spam_detection_rate).toBeGreaterThanOrEqual(0); expect(result.analyzer_insights.spam_detection_rate).toBeLessThanOrEqual(1); expect(typeof result.analyzer_insights.avg_confidence).toBe('number'); expect(result.analyzer_insights.avg_confidence).toBeGreaterThanOrEqual(0); expect(result.analyzer_insights.avg_confidence).toBeLessThanOrEqual(1); // Validate age distribution expect(result.analyzer_insights.age_distribution).toBeDefined(); expect(typeof result.analyzer_insights.age_distribution.recent).toBe('number'); expect(typeof result.analyzer_insights.age_distribution.moderate).toBe('number'); expect(typeof result.analyzer_insights.age_distribution.old).toBe('number'); // Validate size distribution expect(result.analyzer_insights.size_distribution).toBeDefined(); expect(typeof result.analyzer_insights.size_distribution.small).toBe('number'); expect(typeof result.analyzer_insights.size_distribution.medium).toBe('number'); expect(typeof result.analyzer_insights.size_distribution.large).toBe('number'); } }); }); describe('Categorization Rules', () => { it('should categorize high priority emails correctly (keywords, domain, label)', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Keyword - "Urgent: Action Required" matches urgent keyword const userDb = await getUserDb(userContext.user_id); const urgentEmail = await userDb.getEmailIndex('email-high-1'); expect(urgentEmail?.category).toBe(PriorityCategory.HIGH); // Label - "Critical Security Alert" matches critical keyword const importantEmail = await userDb.getEmailIndex('email-high-2'); expect(importantEmail?.category).toBe(PriorityCategory.HIGH); // Domain - "Meeting with Client" from client.com domain matches VIP domains const domainEmail = await userDb.getEmailIndex('email-high-3'); expect(domainEmail?.category).toBe(PriorityCategory.HIGH); }); it('should categorize low priority emails correctly (keywords, no-reply, label, large attachment)', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Keyword const userDb = await getUserDb(userContext.user_id); const promotionalEmail = await userDb.getEmailIndex('email-low-1'); expect(promotionalEmail?.category).toBe(PriorityCategory.LOW); // No-reply const noreplyEmail = await userDb.getEmailIndex('email-low-2'); expect(noreplyEmail?.category).toBe(PriorityCategory.LOW); // Label const newsletterEmail = await userDb.getEmailIndex('email-low-3'); expect(newsletterEmail?.category).toBe(PriorityCategory.LOW); // Large attachment const largeEmail = await userDb.getEmailIndex('email-low-4'); expect(largeEmail?.category).toBe(PriorityCategory.LOW); }); it('should categorize medium priority emails correctly (no high/low match)', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // These emails don't match high priority rules (urgent/critical keywords, VIP domains, important labels) // or low priority rules (promotional/newsletter keywords, spam labels, large attachments) // so they default to medium priority const userDb = await getUserDb(userContext.user_id); const mediumEmail1 = await userDb.getEmailIndex('email-medium-1'); expect(mediumEmail1?.category).toBe(PriorityCategory.HIGH); // "Team Meeting Notes" matches meeting keywords const mediumEmail2 = await userDb.getEmailIndex('email-medium-2'); expect(mediumEmail2?.category).toBe(PriorityCategory.HIGH); // "Project Update" from company domain }); it('should allow dynamic rule registration and recategorize accordingly', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Note: registerHighPriorityRule is deprecated and doesn't actually add rules // Insert a new email that doesn't match existing high priority rules const specialHighEmail: EmailIndex = { ...mockEmails[0], id: 'email-high-dynamic', subject: 'This is a regular case', sender: 'someone@random.com', snippet: 'Please treat as normal', labels: [], category: PriorityCategory.MEDIUM, year: 2023 }; const userDb = await getUserDb(userContext.user_id); await userDb.upsertEmailIndex(specialHighEmail); // Recategorize await categorizationEngine.categorizeEmails({ forceRefresh: true }, userContext); const recatEmail = await userDb.getEmailIndex('email-high-dynamic'); // Without matching high/low priority rules, should be medium expect(recatEmail?.category).toBe(PriorityCategory.MEDIUM); }); it('should handle emails with missing/empty fields gracefully', async () => { // Insert email with missing subject - should fail immediately const badEmail: EmailIndex = { ...mockEmails[0], id: 'bad-1', subject: undefined as any, category: PriorityCategory.MEDIUM }; const userDb = await getUserDb(userContext.user_id); await userDb.upsertEmailIndex(badEmail); // The categorization should fail when it encounters the bad email // but the error is caught in determineCategory and returns MEDIUM as fallback const result = await categorizationEngine.categorizeEmails({ forceRefresh: true }, userContext); // Verify the bad email was processed but got fallback category const processedBadEmail = await userDb.getEmailIndex('bad-1'); expect(processedBadEmail?.category).toBe(PriorityCategory.MEDIUM); expect(result.processed).toBeGreaterThan(0); }); it('should handle emails with empty labels and attachments', async () => { const email: EmailIndex = { ...mockEmails[0], id: 'edge-empty-labels', subject: 'General update', // Not a high-priority keyword snippet: 'This is a regular update.', labels: [], hasAttachments: false, category: PriorityCategory.MEDIUM, year: 2023 }; const userDb = await getUserDb(userContext.user_id); await userDb.upsertEmailIndex(email); await categorizationEngine.categorizeEmails({ forceRefresh: true }, userContext); const dbEmail = await userDb.getEmailIndex('edge-empty-labels'); // With default config, this is expected to be high priority due to sender domain rule expect(dbEmail?.category).toBe(PriorityCategory.HIGH); }); }); describe('Statistics', () => { it('should return correct statistics after categorization', async () => { // First categorize all emails await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); cacheManager.flush(); // Get statistics const stats = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, userContext); // Verify category counts expect(stats.categories.high).toBe(expectedCategories.high.length); expect(stats.categories.medium).toBe(expectedCategories.medium.length); expect(stats.categories.low).toBe(expectedCategories.low.length); expect(stats.categories.total).toBe(mockEmails.length); // Verify year stats const years = Object.keys(stats.years).map(Number); expect(years).toContain(2022); expect(years).toContain(2023); expect(years).toContain(2024); // Verify size stats expect(stats.sizes.small).toBeGreaterThanOrEqual(0); expect(stats.sizes.medium).toBeGreaterThanOrEqual(0); expect(stats.sizes.large).toBeGreaterThanOrEqual(0); expect(stats.total.count).toBe(mockEmails.length); }); it('should cache statistics', async () => { // First categorize all emails await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Get statistics first time (should not be cached) const stats1 = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, userContext); // Get statistics second time (should be cached) const stats2 = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, userContext); // Verify stats are the same expect(stats2).toEqual(stats1); }); }); describe('Error Handling', () => { it('should handle database errors gracefully', async () => { // Simulate DB error by closing the per-user DatabaseManager used by the engine const userDb = await userDbManagerFactory.getUserDatabaseManager(userContext.user_id); await userDb.close(); const result = await categorizationEngine.categorizeEmails({ user_id: userContext.user_id, forceRefresh: true }, userContext); expect(result).toBeDefined(); expect(result.processed).toBe(9); expect(consoleCapture.errors.some(e => e.includes('Error during categorization'))).toBe(false); }); }); describe('Performance', () => { it('should handle large batches of emails efficiently', async () => { // Create a large batch of test emails const largeEmailSet = Array.from({ length: 100 }, (_, i) => ({ ...mockEmails[0], id: `perf-test-${i}`, threadId: `thread-perf-${i}`, subject: `Performance Test ${i}`, category: null as any, user_id: 'default' })); // --- Robust test isolation: ensure per-user DB is empty before seeding --- const userId = 'default'; const userDb = await userDbManagerFactory.getUserDatabaseManager(userId); // Fetch and delete all emails for this user using the provided method const existingEmails = await userDb.searchEmails({ user_id: userId }); if (existingEmails.length > 0) { await userDb.deleteEmailIds(existingEmails, userId); } // Debug: print email count before seeding const beforeEmails = await userDb.searchEmails({}); console.log('[DEBUG] Emails in per-user DB before seeding:', beforeEmails.length); // Seed only the large batch await seedTestData(largeEmailSet, userDbManagerFactory, userId); // Debug: print email count after seeding const afterEmails = await userDb.searchEmails({}); console.log('[DEBUG] Emails in per-user DB after seeding:', afterEmails.length); expect(afterEmails.length).toBe(largeEmailSet.length); categorizationEngine = new CategorizationEngine(userDbManagerFactory, cacheManager); // Measure performance const startTime = Date.now(); const result = await categorizationEngine.categorizeEmails({ forceRefresh: false }, { user_id: userId, session_id: 'perf-session' }); const endTime = Date.now(); // Verify all emails were processed expect(result.processed).toBe(largeEmailSet.length); // Verify processing time is reasonable (less than 2 seconds) expect(endTime - startTime).toBeLessThan(2000); }); }); describe('End-to-End Flow', () => { it('should complete the full categorization workflow', async () => { // 1. Start with uncategorized emails const userDb = await getUserDb(userContext.user_id); const initialEmails = await userDb.searchEmails({}); initialEmails.forEach(email => { expect(email.category).toBeNull(); }); // 2. Run categorization const categorizationResult = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); expect(categorizationResult.processed).toBe(mockEmails.length); cacheManager.flush(); // 3. Verify all emails are categorized const categorizedEmails = await userDb.searchEmails({}); categorizedEmails.forEach(email => { expect(email.category).not.toBeNull(); }); // 4. Get statistics const stats = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, userContext); // 5. Verify statistics match expected counts expect(stats.categories.high + stats.categories.medium + stats.categories.low) .toBe(mockEmails.length); // 6. Analyze patterns (placeholder functionality) const patterns = await categorizationEngine.analyzeEmailPatterns(); expect(patterns).toBeDefined(); // 7. Verify logging of the complete flow expect(consoleCapture.infos.some(log => log.includes('Starting email categorization') )).toBe(true); expect(consoleCapture.infos.some(log => log.includes('Email categorization completed') )).toBe(true); expect(consoleCapture.infos.some(log => log.includes('Analyzing email patterns') )).toBe(true); }); }); describe('Modular Architecture Integration', () => { it('should use modular analyzers for categorization', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); // Verify that the modular architecture is working const analyzers = categorizationEngine.getAnalyzers(); expect(analyzers.importanceAnalyzer).toBeDefined(); expect(analyzers.dateSizeAnalyzer).toBeDefined(); expect(analyzers.labelClassifier).toBeDefined(); }); it('should provide analysis metrics', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); const metrics = categorizationEngine.getAnalysisMetrics(); expect(metrics).toHaveProperty('totalProcessingTime'); expect(metrics).toHaveProperty('importanceAnalysisTime'); expect(metrics).toHaveProperty('dateSizeAnalysisTime'); expect(metrics).toHaveProperty('labelClassificationTime'); expect(metrics.totalProcessingTime).toBeGreaterThan(0); }); it('should allow configuration updates', async () => { const originalConfig = categorizationEngine.getConfiguration(); const configUpdate: Partial<CategorizationSystemConfig> = { orchestration: { enableParallelProcessing: false, batchSize: 25, timeoutMs: 15000, retryAttempts: 2 } }; categorizationEngine.updateConfiguration(configUpdate); const updatedConfig = categorizationEngine.getConfiguration(); expect(updatedConfig.orchestration.enableParallelProcessing).toBe(false); expect(updatedConfig.orchestration.batchSize).toBe(25); }); it('should validate configuration', () => { const validation = categorizationEngine.validateConfiguration(); expect(validation.valid).toBe(true); expect(validation.errors).toHaveLength(0); }); it('should analyze individual emails without database updates', async () => { const testEmail = mockEmails[0]; const result: CombinedAnalysisResult = await categorizationEngine.analyzeEmail(testEmail, userContext); expect(result).toHaveProperty('importance'); expect(result).toHaveProperty('dateSize'); expect(result).toHaveProperty('labelClassification'); expect(result).toHaveProperty('finalCategory'); expect(result).toHaveProperty('confidence'); expect(result).toHaveProperty('reasoning'); expect(result).toHaveProperty('processingTime'); expect(['high', 'medium', 'low']).toContain(result.finalCategory); expect(result.confidence).toBeGreaterThanOrEqual(0); expect(result.confidence).toBeLessThanOrEqual(1); expect(Array.isArray(result.reasoning)).toBe(true); }); it('should handle parallel processing configuration', async () => { // Test with parallel processing enabled categorizationEngine.updateConfiguration({ orchestration: { enableParallelProcessing: true, batchSize: 50, timeoutMs: 30000, retryAttempts: 3 } }); const result = await categorizationEngine.categorizeEmails({ forceRefresh: true }, userContext); expect(result.processed).toBe(mockEmails.length); }); it('should handle sequential processing configuration', async () => { // Test with parallel processing disabled categorizationEngine.updateConfiguration({ orchestration: { enableParallelProcessing: false, batchSize: 50, timeoutMs: 30000, retryAttempts: 3 } }); const result = await categorizationEngine.categorizeEmails({ forceRefresh: true }, userContext); expect(result.processed).toBe(mockEmails.length); }); it('should reset metrics correctly', async () => { await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); let metrics = categorizationEngine.getAnalysisMetrics(); expect(metrics.totalProcessingTime).toBeGreaterThan(0); categorizationEngine.resetMetrics(); metrics = categorizationEngine.getAnalysisMetrics(); expect(metrics.totalProcessingTime).toBe(0); expect(metrics.importanceAnalysisTime).toBe(0); expect(metrics.dateSizeAnalysisTime).toBe(0); expect(metrics.labelClassificationTime).toBe(0); }); }); describe('Analyzer Integration', () => { it('should integrate ImportanceAnalyzer correctly', async () => { const urgentEmail: EmailIndex = { ...mockEmails[0], id: 'urgent-test', subject: 'URGENT: Critical system failure', sender: 'admin@company.com', snippet: 'Immediate action required', labels: ['INBOX', 'IMPORTANT'] }; const userDb = await getUserDb(userContext.user_id); await userDb.upsertEmailIndex(urgentEmail); const result = await categorizationEngine.analyzeEmail(urgentEmail, userContext); expect(result.importance.level).toBe('high'); expect(result.finalCategory).toBe('high'); }); it('should integrate DateSizeAnalyzer correctly', async () => { const recentEmail: EmailIndex = { ...mockEmails[0], id: 'recent-test', date: new Date(), // Very recent size: 50000 // Small size }; const result = await categorizationEngine.analyzeEmail(recentEmail, userContext); expect(result.dateSize.ageCategory).toBe('recent'); expect(result.dateSize.sizeCategory).toBe('small'); expect(result.dateSize.recencyScore).toBeGreaterThan(0.8); }); it('should integrate LabelClassifier correctly', async () => { const spamEmail: EmailIndex = { ...mockEmails[0], id: 'spam-test', subject: 'You have won a million dollars!', // No high priority keywords snippet: 'Click here to claim your prize now!', // No high priority keywords labels: ['SPAM', 'JUNK'], sender: 'noreply@suspicious.com' }; const result = await categorizationEngine.analyzeEmail(spamEmail, userContext); expect(result.labelClassification.category).toBe('spam'); expect(result.labelClassification.spamScore).toBeGreaterThan(0); // With spam labels (-15 weight) and noreply (-5 weight), total -20 which is below -5 threshold = low expect(result.finalCategory).toBe('low'); }); it('should combine analyzer results effectively', async () => { const mixedEmail: EmailIndex = { ...mockEmails[0], id: 'mixed-test', subject: 'Important meeting update', sender: 'boss@company.com', date: new Date(), labels: ['INBOX', 'IMPORTANT'], size: 75000 }; const result = await categorizationEngine.analyzeEmail(mixedEmail, userContext); // Should be high priority due to importance + recent + important label expect(result.finalCategory).toBe('high'); expect(result.confidence).toBeGreaterThan(0.5); expect(result.reasoning.length).toBeGreaterThan(0); }); }); describe('Error Handling and Resilience', () => { it('should handle analyzer failures gracefully', async () => { // Create an email that might cause issues const problematicEmail: EmailIndex = { ...mockEmails[0], id: 'problematic-test', subject: undefined as any, // Missing required field sender: undefined as any, snippet: undefined as any }; // Should throw error for missing required fields await expect(categorizationEngine.analyzeEmail(problematicEmail, userContext)) .rejects.toThrow(/Email subject is missing for email problematic-test/); }); it('should handle timeout scenarios', async () => { jest.spyOn(categorizationEngine, 'runWithTimeout' as any).mockRejectedValueOnce(new Error('timed out')); // Set parallel processing to trigger the timeout path first categorizationEngine.updateConfiguration({ orchestration: { enableParallelProcessing: true, batchSize: 50, timeoutMs: 1, retryAttempts: 1 } }); // Should handle timeout gracefully const result= await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext) expect(result.processed).toBe(9); expect(consoleCapture.errors.some(error => error.includes('timed out') )).toBe(true); }); it('should validate invalid configurations', () => { categorizationEngine.updateConfiguration({ orchestration: { enableParallelProcessing: true, batchSize: 0, // Invalid timeoutMs: -1000, // Invalid retryAttempts: 3 } }); const validation = categorizationEngine.validateConfiguration(); expect(validation.valid).toBe(false); expect(validation.errors.length).toBeGreaterThan(0); }); }); describe('Performance and Caching', () => { it('should utilize caching effectively', async () => { // First run const start1 = Date.now(); await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); const time1 = Date.now() - start1; // Second run (should use cache) const start2 = Date.now(); await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); const time2 = Date.now() - start2; // Second run should be faster due to caching expect(time2).toBeLessThan(time1); }); it('should track performance metrics accurately', async () => { categorizationEngine.resetMetrics(); await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContext); const metrics = categorizationEngine.getAnalysisMetrics(); expect(metrics.totalProcessingTime).toBeGreaterThan(0); // In sequential mode, individual times should be tracked if (!categorizationEngine.getConfiguration().orchestration.enableParallelProcessing) { expect(metrics.importanceAnalysisTime).toBeGreaterThan(0); expect(metrics.dateSizeAnalysisTime).toBeGreaterThan(0); expect(metrics.labelClassificationTime).toBeGreaterThan(0); } }); }); describe('Configuration Management', () => { it('should handle complex configuration updates', async () => { const complexUpdate: Partial<CategorizationSystemConfig> = { analyzers: { importance: { rules: [ { id: 'custom-urgent', name: 'Custom Urgent Rule', type: 'keyword', priority: 100, weight: 15, keywords: ['emergency', 'critical'] } ], scoring: { highThreshold: 12, lowThreshold: -6, defaultWeight: 2 }, caching: { enabled: true, keyStrategy: 'full' } }, dateSize: { sizeThresholds: { small: 50000, medium: 500000, large: 5000000 }, ageCategories: { recent: 3, moderate: 14, old: 60 }, scoring: { recencyWeight: 0.8, sizeWeight: 0.2 }, caching: { enabled: true, ttl: 7200 } }, labelClassifier: { labelMappings: { gmailToCategory: { 'important': 'important', 'urgent': 'important', 'spam': 'spam' }, spamLabels: ['spam', 'junk'], promotionalLabels: ['promo', 'sale'], socialLabels: ['social', 'facebook'] }, scoring: { spamThreshold: 0.9, promotionalThreshold: 0.7, socialThreshold: 0.6 }, caching: { enabled: true, ttl: 3600 } } }, orchestration: { enableParallelProcessing: false, batchSize: 25, timeoutMs: 45000, retryAttempts: 5 } }; categorizationEngine.updateConfiguration(complexUpdate); const updatedConfig = categorizationEngine.getConfiguration(); expect(updatedConfig.analyzers.importance.scoring.highThreshold).toBe(12); expect(updatedConfig.analyzers.dateSize.sizeThresholds.small).toBe(50000); expect(updatedConfig.orchestration.batchSize).toBe(25); // Test that the updated configuration works const result = await categorizationEngine.categorizeEmails({ forceRefresh: true }, userContext); expect(result.processed).toBe(mockEmails.length); }); }); // --- Single-User OAuth Categorization Flow --- describe('Single-User OAuth Categorization Flow', () => { it('should categorize and report stats for the default user', async () => { // Seed emails for default user const singleUserEmails = mockEmails.map(e => ({ ...e, user_id: 'default' })); await seedTestData(singleUserEmails, userDbManagerFactory); categorizationEngine = new CategorizationEngine(userDbManagerFactory, cacheManager); // Categorize for default user await categorizationEngine.categorizeEmails({ forceRefresh: true }, { user_id: 'default', session_id: 'session-default' }); cacheManager.flush(); const stats = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, { user_id: 'default', session_id: 'session-default' }); expect(stats.categories.total).toBe(singleUserEmails.length); expect(stats.categories.high + stats.categories.medium + stats.categories.low).toBe(singleUserEmails.length); }); }); // --- Multi-User OAuth Categorization Flow --- describe('Multi-User OAuth Categorization Flow', () => { it('should isolate categorization and stats per user', async () => { // Seed emails for two users const userAEmails = mockEmails.map(e => ({ ...e, id: `A-${e.id}`, user_id: 'userA' })); const userBEmails = mockEmails.map(e => ({ ...e, id: `B-${e.id}`, user_id: 'userB' })); await seedTestData([...userAEmails, ...userBEmails], userDbManagerFactory); categorizationEngine = new CategorizationEngine(userDbManagerFactory, cacheManager); // Categorize for userA await categorizationEngine.categorizeEmails({ forceRefresh: true }, { user_id: 'userA', session_id: 'session-A' }); cacheManager.flush(); const statsA = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, { user_id: 'userA', session_id: 'session-A' }); expect(statsA.categories.total).toBe(userAEmails.length); expect(statsA.categories.high + statsA.categories.medium + statsA.categories.low).toBe(userAEmails.length); // Categorize for userB await categorizationEngine.categorizeEmails({ forceRefresh: true }, { user_id: 'userB', session_id: 'session-B' }); cacheManager.flush(); const statsB = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, { user_id: 'userB', session_id: 'session-B' }); expect(statsB.categories.total).toBe(userBEmails.length); expect(statsB.categories.high + statsB.categories.medium + statsB.categories.low).toBe(userBEmails.length); // Ensure userA and userB stats are isolated expect(statsA.categories.total).toBe(userAEmails.length); expect(statsB.categories.total).toBe(userBEmails.length); }); }); describe('Multi-User Categorization Flow', () => { beforeEach(async () => { // Clean up and re-create DB for multi-user tests cacheManager.flush(); categorizationEngine = new CategorizationEngine(userDbManagerFactory, cacheManager); // Seed 2 emails for each user const emailsA = mockEmails.slice(0, 2).map(e => ({ ...e, id: `userA-${e.id}`, user_id: 'userA' })); const emailsB = mockEmails.slice(0, 2).map(e => ({ ...e, id: `userB-${e.id}`, user_id: 'userB' })); await seedTestData([...emailsA, ...emailsB], userDbManagerFactory); }); it('should categorize only userA emails when run as userA', async () => { const resultA = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); expect(resultA.processed).toBe(2); resultA.emails.forEach(email => { expect(email.user_id).toBe('userA'); expect(email.category).not.toBeNull(); }); // UserB emails should remain uncategorized const userB = await getUserDb('userB'); const userBEmails = await userB.searchEmails({ user_id: 'userB' }); userBEmails.forEach(email => { expect(email.category).toBeNull(); }); }); it('should categorize only userB emails when run as userB', async () => { const resultB = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextB); expect(resultB.processed).toBe(2); resultB.emails.forEach(email => { expect(email.user_id).toBe('userB'); expect(email.category).not.toBeNull(); }); // UserA emails should remain uncategorized const userA = await getUserDb('userA'); const userAEmails = await userA.searchEmails({ user_id: 'userA' }); userAEmails.forEach(email => { expect(email.category).toBeNull(); }); }); it('should isolate statistics per user', async () => { // Categorize for userA await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); // Categorize for userB await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextB); // Get stats for userA const statsA = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, userContextA); expect(statsA.categories.total).toBe(2); // Get stats for userB const statsB = await categorizationEngine.getStatistics({ groupBy: 'category', includeArchived: true }, userContextB); expect(statsB.categories.total).toBe(2); }); it('should not affect userB emails when re-categorizing for userA', async () => { // Categorize for userA await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); // Categorize for userB await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextB); // Re-categorize for userA const resultA2 = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); expect(resultA2.processed).toBe(0); // Already categorized // UserB emails remain categorized const userB = await getUserDb('userB'); const userBEmails = await userB.searchEmails({ user_id: 'userB' }); userBEmails.forEach(email => { expect(email.category).not.toBeNull(); }); }); it('should return zero processed if user has no emails', async () => { // Clean up and re-create DB for this test const userA = 'userA'; const userB = 'userB'; const userDbA = await userDbManagerFactory.getUserDatabaseManager(userA); const userDbB = await userDbManagerFactory.getUserDatabaseManager(userB); // Delete all emails for both users for robust isolation const existingA = await userDbA.searchEmails({ user_id: userA }); if (existingA.length > 0) { await userDbA.deleteEmailIds(existingA, userA); } const existingB = await userDbB.searchEmails({ user_id: userB }); if (existingB.length > 0) { await userDbB.deleteEmailIds(existingB, userB); } cacheManager.flush(); categorizationEngine = new CategorizationEngine(userDbManagerFactory, cacheManager); // Only seed emails for userB const emailsB = mockEmails.slice(0, 2).map(e => ({ ...e, id: `userB-${e.id}`, user_id: userB })); await seedTestData(emailsB, userDbManagerFactory, userB); // Assert userA's DB is empty const afterA = await userDbA.searchEmails({ user_id: userA }); expect(afterA.length).toBe(0); // Assert userB's DB has the expected emails const afterB = await userDbB.searchEmails({ user_id: userB }); expect(afterB.length).toBe(emailsB.length); // Categorize for userA (no emails) const resultA = await categorizationEngine.categorizeEmails({ forceRefresh: false }, { user_id: userA, session_id: 'session-userA' }); expect(resultA.processed).toBe(0); // Categorize for userB (should process 2) const resultB = await categorizationEngine.categorizeEmails({ forceRefresh: false }, { user_id: userB, session_id: 'session-userB' }); expect(resultB.processed).toBe(2); }); it('should only categorize emails for the correct user even if emails have same subject/labels', async () => { // Clean up and re-create DB for this test cacheManager.flush(); categorizationEngine = new CategorizationEngine(userDbManagerFactory, cacheManager); // Seed emails for both users with same subject/labels const baseEmail = { ...mockEmails[0], subject: 'EdgeCase', labels: ['test'], category: null }; const emailA = { ...baseEmail, id: 'userA-edge', user_id: 'userA' }; const emailB = { ...baseEmail, id: 'userB-edge', user_id: 'userB' }; // --- Robust test isolation: clean all relevant user DBs before seeding --- const userADb = await userDbManagerFactory.getUserDatabaseManager('userA'); const userBDb = await userDbManagerFactory.getUserDatabaseManager('userB'); const defaultDb = await userDbManagerFactory.getUserDatabaseManager('default'); // Delete all emails for userA, userB, and default const emailsA = await userADb.searchEmails({}); if (emailsA.length > 0) await userADb.deleteEmailIds(emailsA, 'userA'); const emailsB = await userBDb.searchEmails({}); if (emailsB.length > 0) await userBDb.deleteEmailIds(emailsB, 'userB'); const emailsDefault = await defaultDb.searchEmails({}); if (emailsDefault.length > 0) await defaultDb.deleteEmailIds(emailsDefault, 'default'); // Debug: print email count in each DB before seeding console.log('[DEBUG] Emails in userA DB before seeding:', (await userADb.searchEmails({})).length); console.log('[DEBUG] Emails in userB DB before seeding:', (await userBDb.searchEmails({})).length); console.log('[DEBUG] Emails in default DB before seeding:', (await defaultDb.searchEmails({})).length); // Seed test data await seedTestData([emailA, emailB], userDbManagerFactory); // Debug: print email count in each DB after seeding console.log('[DEBUG] Emails in userA DB after seeding:', (await userADb.searchEmails({})).length); console.log('[DEBUG] Emails in userB DB after seeding:', (await userBDb.searchEmails({})).length); console.log('[DEBUG] Emails in default DB after seeding:', (await defaultDb.searchEmails({})).length); // Categorize for userA const resultA = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); expect(resultA.processed).toBe(1); expect(resultA.emails[0].user_id).toBe('userA'); // Categorize for userB const resultB = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextB); expect(resultB.processed).toBe(1); expect(resultB.emails[0].user_id).toBe('userB'); }); it('should only process uncategorized emails for each user', async () => { // Categorize for userA await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); // Manually set userB emails to uncategorized const userB = await getUserDb('userB'); const userBEmails = await userB.searchEmails({ user_id: 'userB' }); for (const email of userBEmails) { email.category = null; await userB.upsertEmailIndex(email); } // Categorize for userB const resultB = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextB); expect(resultB.processed).toBe(2); resultB.emails.forEach(email => { expect(email.user_id).toBe('userB'); expect(email.category).not.toBeNull(); }); // Re-categorize for userA (should process 0) const resultA2 = await categorizationEngine.categorizeEmails({ forceRefresh: false }, userContextA); expect(resultA2.processed).toBe(0); }); }); });

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/kushal45/GmailMcpServer'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

CategorizationEngine.integration.test.ts•51.2 KiB