/**
* Hierarchical Email Categorization System
* Provides sophisticated multi-level categorization with automatic subcategory detection
*/
export class HierarchicalCategorizer {
constructor() {
this.categoryHierarchy = {
'Financial': {
subcategories: {
'Banking': {
keywords: ['bank', 'checking', 'savings', 'statement', 'balance', 'account'],
domains: ['chase.com', 'bankofamerica.com', 'wellsfargo.com', 'citibank.com'],
subfolders: ['Chase', 'Bank of America', 'Wells Fargo', 'Citi', 'Credit Unions', 'Statements', 'Alerts']
},
'Receipts': {
keywords: ['receipt', 'purchase', 'transaction', 'payment', 'confirmation', 'order'],
domains: ['paypal.com', 'stripe.com', 'square.com'],
subfolders: ['PayPal', 'Credit Cards', 'Digital Payments', 'Purchase Confirmations']
},
'Taxes': {
keywords: ['tax', 'irs', 'w2', '1099', 'refund', 'turbotax'],
domains: ['irs.gov', 'turbotax.com', 'hrblock.com'],
subfolders: ['IRS', 'Tax Software', 'W2 Forms', '1099 Forms', 'Refunds']
},
'Bills': {
keywords: ['bill', 'due', 'payment', 'utility', 'electric', 'gas', 'water', 'internet'],
domains: ['comcast.com', 'verizon.com', 'att.com', 'pge.com'],
subfolders: ['Electric', 'Gas', 'Water', 'Internet', 'Phone', 'Cable']
}
}
},
'Commerce': {
subcategories: {
'Orders': {
keywords: ['order', 'purchase', 'bought', 'confirmation', 'shipped', 'delivered'],
domains: ['amazon.com', 'ebay.com', 'walmart.com', 'target.com', 'bestbuy.com'],
subfolders: ['Amazon', 'eBay', 'Walmart', 'Target', 'Best Buy', 'Other Stores', 'Shipping Updates']
},
'Travel': {
keywords: ['travel', 'flight', 'hotel', 'booking', 'reservation', 'itinerary'],
domains: ['expedia.com', 'booking.com', 'airbnb.com', 'uber.com', 'lyft.com'],
subfolders: ['Flights', 'Hotels', 'Airbnb', 'Car Rentals', 'Rideshare']
},
'Subscriptions': {
keywords: ['subscription', 'service', 'software', 'app', 'streaming', 'renewal'],
domains: ['netflix.com', 'spotify.com', 'adobe.com', 'microsoft.com'],
subfolders: ['Netflix', 'Spotify', 'Software', 'Apps', 'Renewals']
}
}
},
'Personal': {
subcategories: {
'Family': {
keywords: ['family', 'mom', 'dad', 'sister', 'brother', 'cousin'],
domains: ['gmail.com', 'yahoo.com', 'outlook.com'],
subfolders: ['Parents', 'Siblings', 'Extended Family', 'Family Events']
},
'Friends': {
keywords: ['friend', 'buddy', 'pal'],
domains: ['gmail.com', 'yahoo.com', 'outlook.com'],
subfolders: ['Close Friends', 'Social Plans', 'Group Events']
},
'Health': {
keywords: ['doctor', 'appointment', 'medical', 'health', 'prescription'],
domains: ['kaiser.org', 'bluecross.com', 'mychart.org'],
subfolders: ['Doctors', 'Appointments', 'Prescriptions', 'Lab Results']
}
}
},
'Education': {
subcategories: {
'Courses': {
keywords: ['course', 'class', 'lesson', 'homework', 'assignment'],
domains: ['coursera.org', 'edx.org', 'udemy.com'],
subfolders: ['Coursera', 'Udemy', 'edX', 'Other Platforms']
},
'Schools': {
keywords: ['university', 'college', 'school', 'academic', 'student'],
domains: ['.edu'],
subfolders: ['Merrimack College', 'Other Universities', 'Admissions']
}
}
},
'Technical': {
subcategories: {
'Development': {
keywords: ['code', 'programming', 'github', 'gitlab', 'commit', 'pull request'],
domains: ['github.com', 'gitlab.com', 'bitbucket.com'],
subfolders: ['GitHub', 'GitLab', 'Code Reviews', 'Deployments']
},
'Security': {
keywords: ['security', 'alert', 'mfa', '2fa', 'password', 'login'],
domains: ['1password.com', 'okta.com'],
subfolders: ['MFA Codes', 'Login Alerts', 'Security Warnings']
},
'Software': {
keywords: ['software', 'license', 'subscription', 'renewal'],
domains: ['microsoft.com', 'adobe.com', 'google.com'],
subfolders: ['Microsoft', 'Adobe', 'Google', 'Renewals']
}
}
},
'Information': {
subcategories: {
'News': {
keywords: ['news', 'daily', 'morning', 'briefing', 'breaking', 'headline'],
domains: ['wsj.com', 'nytimes.com', 'cnn.com', 'bbc.com', 'csmonitor.com'],
subfolders: ['Wall Street Journal', 'NY Times', 'CNN', 'BBC', 'Other Sources']
},
'Newsletters': {
keywords: ['newsletter', 'digest', 'weekly', 'update', 'roundup', 'summary'],
domains: ['substack.com', 'medium.com'],
subfolders: ['Tech Newsletters', 'Finance Newsletters', 'Other Newsletters']
}
}
},
'Spam': {
subcategories: {
'Phishing': {
keywords: ['verify account', 'suspend account', 'click here now', 'urgent action'],
domains: ['secure-update', 'account-verify', 'security-alert'],
subfolders: ['Account Phishing', 'Suspicious Links', 'Fake Domains', 'Domain Spoofing']
},
'Spam': {
keywords: ['make money fast', 'work from home', 'free trial', 'buy now'],
domains: [],
subfolders: ['Marketing Spam', 'Promotional Spam', 'Suspicious Format', 'General']
}
}
}
};
}
/**
* Categorize email with hierarchical structure, spam/phishing detection, and action detection
* @param {Object} email - Email object with from, subject, preview
* @returns {Object} - Category structure with main category, subcategory, specific folder, and action status
*/
categorizeEmail(email) {
const from = email.from[0]?.email?.toLowerCase() || '';
const subject = email.subject?.toLowerCase() || '';
const preview = email.preview?.toLowerCase() || '';
const combinedText = (subject + ' ' + preview).toLowerCase();
// FIRST: Check for spam/phishing (top priority)
const securityAnalysis = this.analyzeSpamPhishing(subject, preview, from, email);
// If spam/phishing detected, send to spam folder immediately
if (securityAnalysis.isSpam || securityAnalysis.isPhishing) {
return {
mainCategory: 'Spam',
subcategory: securityAnalysis.isPhishing ? 'Phishing' : 'Spam',
specificFolder: securityAnalysis.category,
fullPath: `Spam/${securityAnalysis.isPhishing ? 'Phishing' : 'Spam'}/${securityAnalysis.category}`,
confidence: securityAnalysis.confidence,
actionRequired: false,
importance: 'spam',
keepInInbox: false,
starred: false,
securityThreat: securityAnalysis.isPhishing,
spamReason: securityAnalysis.reason
};
}
// SECOND: Determine action requirements and importance for legitimate emails
const actionAnalysis = this.analyzeActionRequired(subject, preview, from);
// First pass: Find main category
for (const [mainCategory, categoryData] of Object.entries(this.categoryHierarchy)) {
const subcategoryResult = this.findSubcategory(categoryData.subcategories, from, combinedText);
if (subcategoryResult) {
const specificFolder = this.determineSpecificFolder(subcategoryResult, from, combinedText);
return {
mainCategory,
subcategory: subcategoryResult.name,
specificFolder,
fullPath: `${mainCategory}/${subcategoryResult.name}/${specificFolder}`,
confidence: subcategoryResult.confidence,
actionRequired: actionAnalysis.actionRequired,
importance: actionAnalysis.importance,
keepInInbox: actionAnalysis.actionRequired,
starred: actionAnalysis.importance === 'high'
};
}
}
// Fallback to Archive if no match found
return {
mainCategory: 'Archive',
subcategory: 'General',
specificFolder: 'Uncategorized',
fullPath: 'Archive/General/Uncategorized',
confidence: 0.1,
actionRequired: false,
importance: 'low',
keepInInbox: false,
starred: false
};
}
/**
* Analyze email for spam and phishing threats (TOP PRIORITY)
*/
analyzeSpamPhishing(subject, preview, from, email) {
const text = (subject + ' ' + preview).toLowerCase();
let isSpam = false;
let isPhishing = false;
let confidence = 0;
let reason = '';
let category = 'General';
// PHISHING INDICATORS (High Priority)
const phishingKeywords = [
'verify account', 'suspend account', 'locked account', 'security breach',
'click here now', 'urgent action', 'confirm identity', 'update payment',
'your account will be', 'limited access', 'unusual activity',
'verify credit card', 'tax refund', 'irs notice', 'government refund',
'congratulations you won', 'claim your prize', 'inheritance',
'prince', 'lottery', 'millions of dollars', 'beneficiary'
];
const phishingPatterns = [
/act now or/i,
/click (here|this link) (to|and)/i,
/confirm.{0,20}(account|identity|payment)/i,
/suspend.{0,20}account/i,
/your.{0,10}account.{0,10}(will be|has been).{0,20}(suspended|closed|limited)/i,
/verify.{0,20}(immediately|now|today)/i,
/congratulations.{0,20}(winner|won)/i
];
const phishingDomains = [
'secure-update', 'account-verify', 'security-alert',
'paypal-update', 'amazon-security', 'apple-verification',
'microsoft-security', 'google-verify', 'bank-secure'
];
// SPAM INDICATORS
const spamKeywords = [
'make money fast', 'work from home', 'get rich quick',
'lose weight fast', 'miracle cure', 'free trial',
'limited time offer', 'buy now', 'discount', 'sale',
'viagra', 'cialis', 'pharmacy', 'prescription',
'refinance', 'mortgage rates', 'credit repair',
'bitcoin', 'cryptocurrency', 'investment opportunity'
];
const spamPatterns = [
/\$\d+.{0,20}(per|a) (day|week|month|hour)/i,
/make \$\d+/i,
/free.{0,20}(money|cash|gift|trial)/i,
/lose \d+.{0,20}pounds/i,
/\d+% (off|discount)/i,
/no.{0,10}(obligation|commitment|risk)/i
];
// Check for phishing
for (const keyword of phishingKeywords) {
if (text.includes(keyword)) {
isPhishing = true;
confidence = 0.9;
reason = `Phishing keyword: "${keyword}"`;
category = 'Account Phishing';
break;
}
}
if (!isPhishing) {
for (const pattern of phishingPatterns) {
if (pattern.test(text)) {
isPhishing = true;
confidence = 0.85;
reason = `Phishing pattern detected`;
category = 'Suspicious Links';
break;
}
}
}
// Check sender domain for phishing attempts
if (!isPhishing) {
for (const suspiciousDomain of phishingDomains) {
if (from.includes(suspiciousDomain)) {
isPhishing = true;
confidence = 0.95;
reason = `Suspicious domain: ${suspiciousDomain}`;
category = 'Fake Domains';
break;
}
}
}
// Check for misspelled legitimate domains
const legitimateDomains = [
'paypal.com', 'amazon.com', 'apple.com', 'microsoft.com',
'google.com', 'facebook.com', 'chase.com', 'bankofamerica.com'
];
if (!isPhishing) {
for (const legitDomain of legitimateDomains) {
const domainVariants = this.generateDomainVariants(legitDomain);
for (const variant of domainVariants) {
if (from.includes(variant) && !from.includes(legitDomain)) {
isPhishing = true;
confidence = 0.8;
reason = `Domain spoofing: ${variant} mimicking ${legitDomain}`;
category = 'Domain Spoofing';
break;
}
}
if (isPhishing) break;
}
}
// Check for spam if not phishing
if (!isPhishing) {
for (const keyword of spamKeywords) {
if (text.includes(keyword)) {
isSpam = true;
confidence = 0.7;
reason = `Spam keyword: "${keyword}"`;
category = 'Marketing Spam';
break;
}
}
if (!isSpam) {
for (const pattern of spamPatterns) {
if (pattern.test(text)) {
isSpam = true;
confidence = 0.6;
reason = `Spam pattern detected`;
category = 'Promotional Spam';
break;
}
}
}
}
// Additional spam indicators
if (!isSpam && !isPhishing) {
// Excessive punctuation/caps
const capsRatio = (subject.match(/[A-Z]/g) || []).length / subject.length;
const exclamationCount = (text.match(/!/g) || []).length;
if (capsRatio > 0.7 && subject.length > 10) {
isSpam = true;
confidence = 0.5;
reason = 'Excessive capital letters';
category = 'Suspicious Format';
} else if (exclamationCount > 3) {
isSpam = true;
confidence = 0.4;
reason = 'Excessive exclamation marks';
category = 'Suspicious Format';
}
}
return {
isSpam,
isPhishing,
confidence,
reason,
category
};
}
/**
* Generate common domain spoofing variants
*/
generateDomainVariants(domain) {
const variants = [];
const baseName = domain.split('.')[0];
// Common character substitutions
variants.push(baseName.replace('a', '4') + '.com');
variants.push(baseName.replace('o', '0') + '.com');
variants.push(baseName.replace('e', '3') + '.com');
variants.push(baseName.replace('l', '1') + '.com');
// Common domain additions
variants.push(baseName + '-security.com');
variants.push(baseName + '-verify.com');
variants.push(baseName + '-update.com');
variants.push('secure-' + baseName + '.com');
return variants;
}
/**
* Analyze if email requires action and determine importance level
*/
analyzeActionRequired(subject, preview, from) {
const text = (subject + ' ' + preview).toLowerCase();
// High importance action keywords
const highImportanceKeywords = [
'urgent', 'immediate', 'asap', 'deadline', 'expires', 'overdue',
'action required', 'response needed', 'please confirm', 'verify',
'suspended', 'locked', 'fraud', 'security alert', 'breach'
];
// Medium importance action keywords
const mediumImportanceKeywords = [
'reminder', 'due', 'payment', 'appointment', 'meeting', 'rsvp',
'review', 'approval', 'confirm', 'update required', 'please'
];
// Action-oriented patterns
const actionPatterns = [
/please (reply|respond|confirm|review|update)/,
/action (required|needed)/,
/respond by/,
/deadline/,
/due date/,
/expires/
];
// Determine if action is required
let actionRequired = false;
let importance = 'low';
// Check for high importance
for (const keyword of highImportanceKeywords) {
if (text.includes(keyword)) {
actionRequired = true;
importance = 'high';
break;
}
}
// Check for medium importance if not already high
if (importance !== 'high') {
for (const keyword of mediumImportanceKeywords) {
if (text.includes(keyword)) {
actionRequired = true;
importance = 'medium';
break;
}
}
}
// Check action patterns if not already detected
if (!actionRequired) {
for (const pattern of actionPatterns) {
if (pattern.test(text)) {
actionRequired = true;
importance = importance === 'low' ? 'medium' : importance;
break;
}
}
}
// Special cases for certain senders
if (from.includes('noreply') || from.includes('no-reply') ||
from.includes('donotreply') || from.includes('automated')) {
// Most automated emails don't need action unless explicitly stated
if (!actionRequired) {
actionRequired = false;
importance = 'low';
}
}
// Personal domain emails often need responses
const personalDomains = ['gmail.com', 'yahoo.com', 'outlook.com', 'icloud.com'];
if (personalDomains.some(domain => from.includes(domain))) {
// Personal emails likely need some form of response
if (!actionRequired && !text.includes('newsletter') && !text.includes('update')) {
actionRequired = true;
importance = 'medium';
}
}
return {
actionRequired,
importance
};
}
/**
* Find the best matching subcategory
*/
findSubcategory(subcategories, from, combinedText) {
let bestMatch = null;
let bestScore = 0;
for (const [subcategoryName, subcategoryData] of Object.entries(subcategories)) {
let score = 0;
// Check domain matches (high weight)
for (const domain of subcategoryData.domains) {
if (from.includes(domain)) {
score += 10;
}
}
// Check keyword matches
for (const keyword of subcategoryData.keywords) {
if (combinedText.includes(keyword)) {
score += 1;
}
}
if (score > bestScore) {
bestScore = score;
bestMatch = {
name: subcategoryName,
data: subcategoryData,
confidence: Math.min(score / 10, 1.0)
};
}
}
return bestMatch;
}
/**
* Determine specific folder within subcategory
*/
determineSpecificFolder(subcategoryResult, from, combinedText) {
const subfolders = subcategoryResult.data.subfolders;
// Simple keyword matching for specific folders
const folderKeywords = {
'Statements': ['statement', 'monthly', 'quarterly'],
'Alerts': ['alert', 'notification', 'warning'],
'Confirmations': ['confirmation', 'confirmed', 'receipt'],
'Tracking': ['tracking', 'shipped', 'delivery'],
'Orders': ['order', 'purchase', 'bought'],
'Receipts': ['receipt', 'transaction'],
'Bills': ['bill', 'due', 'payment'],
'Claims': ['claim', 'incident'],
'Appointments': ['appointment', 'scheduled', 'visit']
};
for (const folder of subfolders) {
const keywords = folderKeywords[folder] || [folder.toLowerCase()];
for (const keyword of keywords) {
if (combinedText.includes(keyword)) {
return folder;
}
}
}
// Default to first subfolder if no specific match
return subfolders[0] || 'General';
}
/**
* Get all possible categories for auto-creation
*/
getAllCategoryPaths() {
const paths = [];
for (const [mainCategory, categoryData] of Object.entries(this.categoryHierarchy)) {
for (const [subcategory, subcategoryData] of Object.entries(categoryData.subcategories)) {
for (const folder of subcategoryData.subfolders) {
paths.push(`${mainCategory}/${subcategory}/${folder}`);
}
}
}
return paths;
}
/**
* Create folder hierarchy structure for FastMail
*/
getRequiredMailboxes() {
const mailboxes = [];
for (const [mainCategory, categoryData] of Object.entries(this.categoryHierarchy)) {
// Add main category
mailboxes.push({
name: mainCategory,
type: 'main',
parent: null
});
for (const [subcategory, subcategoryData] of Object.entries(categoryData.subcategories)) {
// Add subcategory
mailboxes.push({
name: `${mainCategory}/${subcategory}`,
type: 'subcategory',
parent: mainCategory
});
// Add specific folders
for (const folder of subcategoryData.subfolders) {
mailboxes.push({
name: `${mainCategory}/${subcategory}/${folder}`,
type: 'folder',
parent: `${mainCategory}/${subcategory}`
});
}
}
}
return mailboxes;
}
}
export default HierarchicalCategorizer;