Skip to main content
Glama
simple-categorizer.js18.4 kB
/** * Simple Email Categorization System * Uses the working 13-label structure that was successfully created */ export class SimpleCategorizer { constructor() { // Hierarchical folder structure with proper parent/child relationships this.simpleLabels = { 'Financial/Banking': { keywords: ['bank', 'checking', 'savings', 'statement', 'balance', 'account', 'deposit', 'withdraw', 'fraud', 'alert', 'unusual activity', 'suspicious activity'], domains: ['chase.com', 'bankofamerica.com', 'wellsfargo.com', 'citibank.com', 'usbank.com', 'pnc.com', 'capitalone.com'] }, 'Financial/Bills': { keywords: ['bill', 'due', 'payment', 'utility', 'electric', 'gas', 'water', 'internet', 'phone', 'cable'], domains: ['comcast.com', 'verizon.com', 'att.com', 'pge.com', 'xfinity.com'] }, 'Financial/Receipts': { keywords: ['receipt', 'purchase', 'transaction', 'payment', 'confirmation', 'order', 'paid', 'apple receipt', 'transfer completed', 'trade confirmation'], domains: ['paypal.com', 'stripe.com', 'square.com', 'venmo.com', 'apple.com', 'flourish.com', 'fidelity.com', 'monarchmoney.com'] }, 'Commerce/Orders': { keywords: ['order', 'purchase', 'bought', 'confirmation', 'shipped', 'delivered', 'tracking'], domains: ['amazon.com', 'ebay.com', 'walmart.com', 'target.com', 'bestbuy.com', 'costco.com'] }, 'Commerce/Subscriptions': { keywords: ['subscription', 'service', 'software', 'app', 'streaming', 'renewal', 'membership'], domains: ['netflix.com', 'spotify.com', 'adobe.com', 'microsoft.com', 'google.com', 'apple.com'] }, 'Personal/Health': { keywords: ['doctor', 'appointment', 'medical', 'health', 'prescription', 'pharmacy', 'lab', 'test'], domains: ['kaiser.org', 'bluecross.com', 'mychart.org', 'anthem.com', 'aetna.com', 'humana.com'] }, 'Personal/Family': { keywords: ['family', 'mom', 'dad', 'sister', 'brother', 'cousin', 'aunt', 'uncle', 'dinner', 'hi honey'], domains: ['gmail.com', 'yahoo.com', 'outlook.com', 'icloud.com', 'hotmail.com'] }, 'Personal/Travel': { keywords: ['flight', 'hotel', 'booking', 'reservation', 'trip', 'travel', 'vacation', 'itinerary'], domains: ['airbnb.com', 'southwest.com', 'amtrak.com', 'booking.com', 'expedia.com', 'hotels.com'] }, 'Professional/GitHub': { keywords: ['github', 'git', 'commit', 'pull request', 'merge', 'repository', 'code', 'programming'], domains: ['github.com', 'gitlab.com', 'bitbucket.com'] }, 'Professional/Services': { keywords: ['service', 'support', 'ticket', 'help', 'technical', 'billing', 'account'], domains: ['leavemealone.com', 'slack.com', 'anthropic.com', 'openai.com'] }, 'Professional/Security': { keywords: ['security', 'alert', 'mfa', '2fa', 'password', 'login', 'authentication', 'verification'], domains: ['1password.com', 'okta.com', 'auth0.com'] }, 'Information/News-WSJ': { keywords: ['wall street journal', 'wsj', 'morning', 'briefing', 'markets', 'finance'], domains: ['wsj.com'] }, 'Information/News-Other': { keywords: ['news', 'daily', 'breaking', 'headline', 'update', 'report'], domains: ['nytimes.com', 'cnn.com', 'bbc.com', 'csmonitor.com', 'washingtonpost.com', 'reuters.com'] }, 'Information/Newsletters': { keywords: ['newsletter', 'digest', 'weekly', 'monthly', 'update', 'unsubscribe', 'offers', 'deals', 'refinance', 'scholarship', 'promotion', 'sale', 'discount', 'coupon', 'special offer', 'limited time', 'exclusive', 'members only'], domains: ['sololearn.com', 'grove.co', 'substackcdn.com', 'mailchimp.com', 'constantcontact.com', 'offers.com', 'groupon.com', 'retailmenot.com', 'honey.com'] }, 'Spam': { keywords: ['verify account', 'suspend account', 'click here now', 'urgent action', 'confirm identity', 'update payment', 'verify your account', 'account will be suspended', 'suspended!!!', 'make money fast', 'work from home', 'get rich quick', 'miracle cure', 'lose weight fast', 'pharmacy online', 'viagra', 'cialis', 'harvard scientists', 'harvard researchers', 'harvard doctors', 'stanford scientists', 'stanford researchers', 'bedtime ritual', 'mega hard-ons', 'penis pump', 'erection quality', 'dementia test', 'blood sugar ritual', 'prostate shrinking', 'neuropathy pain', 'big pharma nervous', 'doctors shocked', 'breakthrough discovery', 'medical breakthrough', 'health breakthrough', 'home depot reward', 'kohls reward', 'sams club reward', 'gift card deal', 'congratulations youve won', 'milf hookup', 'sexual enhancement', 'penis enlargement', 'adult dating', 'auto insurance rate reduced', 'student loan refinancing', 'final notice', 'lower your rate by 50%', 'pre-approved refinancing', 'skin care', 'anti-aging', 'weight loss', 'erectile dysfunction', 'ed treatment'], domains: ['secure-update', 'account-verify', 'security-alert', 'paypal-update', 'amazon-security', 'suspicious-domain.com', 'okudzetoablakwa.com', 'cladweb.com', 'vprhousing.com', 'bottleofale.com', 'thetechbross.com', 'unleashpetsupplies.com', 'petreliyski.com', 'supercrewdeals.com', 'satisfyinglights.com', 'digitalgizmodeals.com', 'accuratetechnoservices.com', 'snacstyle.com', 'unitedpersonalfitness.com', 'worldcargofrieght.com'] } }; } /** * Categorize email using simple 13-label structure with multi-label support * @param {Object} email - Email object with from, subject, preview * @returns {Object} - Category information with multiple labels */ categorizeEmail(email) { const from = email.from[0]?.email?.toLowerCase() || ''; const subject = email.subject?.toLowerCase() || ''; const preview = email.preview?.toLowerCase() || ''; const combinedText = (subject + ' ' + preview).toLowerCase(); const matches = []; const confidenceThreshold = 0.1; // Minimum confidence to include a label // Check each label for matches for (const [labelName, labelData] of Object.entries(this.simpleLabels)) { const matchResult = this.calculateLabelMatch(labelName, labelData, from, combinedText, subject, preview); if (matchResult.confidence >= confidenceThreshold) { matches.push(matchResult); } } // Sort matches by confidence (highest first) matches.sort((a, b) => b.confidence - a.confidence); // Handle no matches case if (matches.length === 0) { return { categories: ['Archive'], primaryCategory: 'Archive', confidence: 0.05, // Very low confidence for unmatched actionRequired: this.checkActionRequired(subject, preview, from), keepInInbox: this.checkActionRequired(subject, preview, from), starred: false, isSpam: false, matchDetails: [] }; } // Special handling for spam/phishing detection (highest priority) // Only treat as spam if confidence is reasonably high const spamMatches = matches.filter(m => m.label === 'Spam' && m.confidence > 0.15 ); if (spamMatches.length > 0) { const topSpam = spamMatches[0]; return { categories: [topSpam.label], primaryCategory: topSpam.label, confidence: topSpam.confidence, actionRequired: false, keepInInbox: false, starred: false, isSpam: true, matchDetails: spamMatches }; } // Filter out low-confidence secondary matches (keep only strong ones) const strongMatches = matches.filter(m => m.confidence >= 0.25); const finalMatches = strongMatches.length > 0 ? strongMatches : [matches[0]]; // Check for overlapping categories and add them const overlappingMatches = this.findOverlappingCategories(finalMatches, combinedText, from); const allMatches = [...finalMatches, ...overlappingMatches]; // Remove duplicates and sort const uniqueMatches = this.deduplicateMatches(allMatches); uniqueMatches.sort((a, b) => b.confidence - a.confidence); // Check if email needs action const actionRequired = this.checkActionRequired(subject, preview, from); const isHighPriority = actionRequired && this.isHighPriority(subject, preview); return { categories: uniqueMatches.map(m => m.label), primaryCategory: uniqueMatches[0].label, confidence: uniqueMatches[0].confidence, actionRequired: actionRequired, keepInInbox: actionRequired, starred: isHighPriority, isSpam: false, matchDetails: uniqueMatches }; } /** * Check if email requires action */ checkActionRequired(subject, preview, from) { const text = (subject + ' ' + preview).toLowerCase(); const actionKeywords = [ 'urgent', 'immediate', 'asap', 'deadline', 'expires', 'overdue', 'action required', 'response needed', 'please confirm', 'please reply', 'reminder', 'due', 'appointment', 'meeting', 'rsvp' ]; // Check for action keywords for (const keyword of actionKeywords) { if (text.includes(keyword)) { return true; } } // Personal emails (non-automated) usually need responses const personalDomains = ['gmail.com', 'yahoo.com', 'outlook.com', 'icloud.com']; if (personalDomains.some(domain => from.includes(domain))) { const automatedKeywords = ['newsletter', 'unsubscribe', 'automated', 'no-reply', 'noreply']; const isAutomated = automatedKeywords.some(keyword => text.includes(keyword) || from.includes(keyword)); return !isAutomated; } return false; } /** * Check if email is high priority */ isHighPriority(subject, preview) { const text = (subject + ' ' + preview).toLowerCase(); const highPriorityKeywords = [ 'urgent', 'immediate', 'asap', 'deadline', 'expires today', 'security alert', 'fraud', 'suspended', 'locked' ]; return highPriorityKeywords.some(keyword => text.includes(keyword)); } /** * Calculate confidence score for a specific label match * @param {string} labelName - Label name * @param {Object} labelData - Label configuration * @param {string} from - Sender email * @param {string} combinedText - Subject + preview text * @param {string} subject - Email subject * @param {string} preview - Email preview * @returns {Object} - Match result with confidence */ calculateLabelMatch(labelName, labelData, from, combinedText, subject, preview) { let score = 0; let maxScore = 0; let matchTypes = []; let keywordMatches = []; let domainMatches = []; // Domain matching (high weight) let domainScore = 0; for (const domain of labelData.domains) { maxScore += 50; // Each possible domain match adds to max // Handle different domain match types if (from.includes(domain)) { // Full email address match (e.g., alerts@chase.com) if (from === domain || from.endsWith('@' + domain)) { domainScore += 50; domainMatches.push(domain); } // Subdomain match (e.g., security-noreply@github.com) else if (from.includes('@') && from.split('@')[1] === domain) { domainScore += 50; domainMatches.push(domain); } // Domain within email (e.g., no-reply@amazon.com) else if (from.includes('.' + domain) || from.includes('@' + domain)) { domainScore += 50; domainMatches.push(domain); } // Partial domain match else { domainScore += 25; domainMatches.push(domain + ' (partial)'); } } } score += domainScore; if (domainScore > 0) matchTypes.push('domain'); // Keyword matching with weighted importance let keywordScore = 0; for (const keyword of labelData.keywords) { maxScore += 10; // Each possible keyword adds to max // Check subject (higher weight) if (subject.includes(keyword)) { keywordScore += 10; keywordMatches.push(keyword + ' (subject)'); } // Check preview (medium weight) else if (preview.includes(keyword)) { keywordScore += 7; keywordMatches.push(keyword + ' (preview)'); } // Check combined text (lower weight, avoid double counting) else if (combinedText.includes(keyword)) { keywordScore += 3; keywordMatches.push(keyword + ' (text)'); } } score += keywordScore; if (keywordScore > 0) matchTypes.push('keyword'); // Context-based bonus scoring let contextBonus = 0; // Sender reputation bonus (trusted domains) const trustedDomains = ['github.com', 'paypal.com', 'amazon.com', 'google.com', 'microsoft.com']; if (trustedDomains.some(domain => from.includes(domain))) { contextBonus += 10; matchTypes.push('trusted-sender'); } // Multiple keyword presence bonus if (keywordMatches.length > 2) { contextBonus += 5; matchTypes.push('multiple-keywords'); } // Subject line relevance bonus if (subject.length > 0 && keywordMatches.some(k => k.includes('(subject)'))) { contextBonus += 5; matchTypes.push('subject-relevance'); } score += contextBonus; maxScore += 20; // Max possible context bonus // Ensure we have a reasonable max score maxScore = Math.max(maxScore, 100); // Calculate confidence (0.0 to 1.0) const confidence = Math.min(score / maxScore, 1.0); return { label: labelName, confidence: confidence, score: score, maxScore: maxScore, matchTypes: matchTypes, keywordMatches: keywordMatches, domainMatches: domainMatches, details: { domainScore, keywordScore, contextBonus, totalScore: score } }; } /** * Find overlapping categories that should be applied together * @param {Array} matches - Current matches * @param {string} combinedText - Email text * @param {string} from - Sender email * @returns {Array} - Additional overlapping matches */ findOverlappingCategories(matches, combinedText, from) { const overlapping = []; const currentLabels = matches.map(m => m.label); // Define overlapping category rules with hierarchical names const overlapRules = { // GitHub + Security alerts 'Professional/GitHub': { triggers: ['security', 'alert', 'vulnerability', 'breach', 'suspicious', 'sign-in', 'login'], addLabel: 'Professional/Security', minConfidence: 0.2 }, // Financial + Security (banking alerts) 'Financial/Banking': { triggers: ['fraud', 'suspicious', 'alert', 'security', 'locked', 'blocked', 'unusual activity', 'suspicious activity'], addLabel: 'Professional/Security', minConfidence: 0.2 }, // Shopping + Financial (receipts for purchases) 'Commerce/Orders': { triggers: ['payment', 'receipt', 'transaction', 'charged', 'billed', 'confirmation'], addLabel: 'Financial/Receipts', minConfidence: 0.2 }, // Work + Personal (family business) 'Personal/Family': { triggers: ['business', 'work', 'contract', 'invoice'], addLabel: 'Financial/Bills', minConfidence: 0.5 }, // Professional Services + Security 'Professional/Services': { triggers: ['security', 'alert', 'password', 'mfa', '2fa'], addLabel: 'Professional/Security', minConfidence: 0.3 }, // Travel + Financial (bookings/payments) 'Personal/Travel': { triggers: ['payment', 'receipt', 'confirmation', 'charged'], addLabel: 'Financial/Receipts', minConfidence: 0.3 } }; for (const match of matches) { const rule = overlapRules[match.label]; if (rule && !currentLabels.includes(rule.addLabel)) { // Check if any trigger words are present const hasTrigger = rule.triggers.some(trigger => combinedText.includes(trigger)); if (hasTrigger && match.confidence >= rule.minConfidence) { // Calculate confidence for the overlapping category const overlapConfidence = Math.min(match.confidence * 0.8, 0.9); overlapping.push({ label: rule.addLabel, confidence: overlapConfidence, matchTypes: ['overlap-rule'], keywordMatches: rule.triggers.filter(t => combinedText.includes(t)), domainMatches: [], details: { derivedFrom: match.label, rule: 'overlap-detection' } }); } } } return overlapping; } /** * Remove duplicate matches and keep highest confidence * @param {Array} matches - Array of matches * @returns {Array} - Deduplicated matches */ deduplicateMatches(matches) { const seen = new Map(); for (const match of matches) { if (!seen.has(match.label) || seen.get(match.label).confidence < match.confidence) { seen.set(match.label, match); } } return Array.from(seen.values()); } /** * Get the simple label structure for creation */ getSimpleLabels() { return Object.keys(this.simpleLabels); } /** * Create hierarchical folders using FastMail client */ async createSimpleLabels(client) { const labels = this.getSimpleLabels(); const results = { created: 0, skipped: 0, failed: 0 }; console.log(`📁 Creating ${labels.length} hierarchical folders...`); for (const labelName of labels) { try { const result = await client.createHierarchicalMailbox(labelName); console.log(`✅ Created hierarchical folder: ${labelName} (ID: ${result.id})`); results.created++; } catch (error) { if (error.message.includes('already exists') || error.message.includes('duplicate')) { console.log(`⏭️ Skipped: ${labelName} (already exists)`); results.skipped++; } else { console.log(`❌ Failed: ${labelName} - ${error.message}`); results.failed++; } } } return results; } } export default SimpleCategorizer;

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/gr3enarr0w/fastmail-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server