Skip to main content
Glama
indexer.js66.3 kB
import fs from "fs"; import path from "path"; import { exec } from "child_process"; import { promisify } from "util"; import * as lancedb from "@lancedb/lancedb"; import { pipeline } from "@xenova/transformers"; import { validateEmailPath, validateMailboxName, escapeAppleScript, validateLimit, validateLanceDBId, escapeSQL, stripHtmlTags } from "./lib/validators.js"; import { safeSqlite3Json, safeOsascript } from "./lib/shell.js"; // Re-export contact functions for use by other modules export { loadContacts, resolveEmail, resolvePhone, resolveByName, lookupContact, searchContacts, getContactIdentifiers, formatContact, getContactStats } from "./contacts.js"; const execAsync = promisify(exec); // Support env var overrides for testing with separate index export const INDEX_DIR = process.env.APPLE_TOOLS_INDEX_DIR || path.join(process.env.HOME, ".apple-tools-mcp", "vector-index"); const META_FILE = process.env.APPLE_TOOLS_META_FILE || path.join(process.env.HOME, ".apple-tools-mcp", "index-meta.json"); // Support filtering by date for testing (default: null = no filter) const DAYS_BACK = process.env.APPLE_TOOLS_INDEX_DAYS_BACK ? parseInt(process.env.APPLE_TOOLS_INDEX_DAYS_BACK, 10) : null; const MAIL_DIR = path.join(process.env.HOME, "Library", "Mail"); // Load/save index metadata (timestamps, etc.) function loadIndexMeta() { try { if (fs.existsSync(META_FILE)) { return JSON.parse(fs.readFileSync(META_FILE, "utf-8")); } } catch {} return {}; } function saveIndexMeta(meta) { const dir = path.dirname(META_FILE); if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } fs.writeFileSync(META_FILE, JSON.stringify(meta, null, 2)); } const MESSAGES_DB = path.join(process.env.HOME, "Library", "Messages", "chat.db"); const CALENDAR_DB = path.join(process.env.HOME, "Library", "Group Containers", "group.com.apple.calendar", "Calendar.sqlitedb"); const BATCH_SIZE = 32; // Optimized for batch embedding throughput const BATCH_DELAY_MS = 100; // Throttle to prevent thermal crashes // Mac Absolute Time epoch: Jan 1, 2001 00:00:00 UTC const MAC_ABSOLUTE_EPOCH = 978307200; let embeddingPipeline = null; let db = null; let tables = {}; async function getEmbedder() { if (!embeddingPipeline) { console.error("Loading embedding model (first time may take a minute)..."); embeddingPipeline = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2"); console.error("Embedding model loaded."); } return embeddingPipeline; } export async function embed(text) { const embedder = await getEmbedder(); const result = await embedder(text, { pooling: "mean", normalize: true }); return Array.from(result.data); } // True batch embedding - single forward pass for multiple texts const EMBEDDING_DIM = 384; // all-MiniLM-L6-v2 dimension // Timeout wrapper for promises function withTimeout(promise, timeoutMs, operation = "Operation") { return Promise.race([ promise, new Promise((_, reject) => setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs) ) ]); } export async function embedBatch(texts) { if (texts.length === 0) return []; const embedder = await getEmbedder(); // Add timeout protection: 30 seconds per batch // This prevents the process from hanging if embedding gets stuck const EMBEDDING_TIMEOUT_MS = 30000; try { // Process all texts in a single forward pass const result = await withTimeout( embedder(texts, { pooling: "mean", normalize: true }), EMBEDDING_TIMEOUT_MS, "Batch embedding" ); // Result.data is a flat Float32Array of shape [batch_size * embedding_dim] const embeddings = []; for (let i = 0; i < texts.length; i++) { const start = i * EMBEDDING_DIM; const end = start + EMBEDDING_DIM; embeddings.push(Array.from(result.data.slice(start, end))); } return embeddings; } catch (error) { if (error.message.includes("timed out")) { console.error(`⚠️ Embedding batch timed out (${texts.length} texts). Retrying with smaller batches...`); // Fall back to processing one at a time with timeouts const embeddings = []; for (const text of texts) { try { const emb = await withTimeout(embed(text), 10000, "Single embedding"); embeddings.push(emb); } catch (e) { console.error(`Failed to embed text: ${e.message}`); // Return zero vector as fallback embeddings.push(new Array(EMBEDDING_DIM).fill(0)); } } return embeddings; } throw error; } } // ============ UTILITY FUNCTIONS ============ // Extract email address from "Name <email>" format function extractEmail(str) { if (!str) return ""; const match = str.match(/<([^>]+)>/); if (match) return match[1].toLowerCase(); // If no angle brackets, check if it looks like an email if (str.includes("@")) return str.trim().toLowerCase(); return ""; // Return empty string if no email found } // Extract all email addresses from a string (handles multiple recipients) function extractEmails(str) { if (!str) return []; const emails = []; // Split by comma and process each for (const part of str.split(",")) { const email = extractEmail(part.trim()); if (email && email.includes("@")) { emails.push(email); } } return emails; } // Parse date string to timestamp function parseDateTime(dateStr) { if (!dateStr) return 0; try { // Try direct parsing let d = new Date(dateStr); if (!isNaN(d.getTime())) return d.getTime(); // Handle AppleScript format: "Friday, January 10, 2025 at 9:00:00 AM" const appleMatch = dateStr.match(/(\w+), (\w+ \d+, \d+) at (\d+:\d+:\d+ [AP]M)/i); if (appleMatch) { d = new Date(`${appleMatch[2]} ${appleMatch[3]}`); if (!isNaN(d.getTime())) return d.getTime(); } return 0; } catch { return 0; } } // ============ EMAIL INDEXING ============ // Extract mailbox name from file path (e.g., "INBOX.mbox" -> "INBOX") function extractMailbox(filePath) { const match = filePath.match(/([^/]+)\.mbox/); return match ? match[1] : "Unknown"; } // Mailbox priority for deduplication - lower number = higher priority // When the same email exists in multiple folders (IMAP behavior), we prefer // to index the copy from higher-priority folders so mailbox searches work correctly const MAILBOX_PRIORITY = { // Highest priority - user's primary mailboxes "INBOX": 1, "Sent": 2, "Sent Messages": 2, "Sent Mail": 2, "Drafts": 3, "Flagged": 4, // Medium priority - organizational folders "Archive": 5, "Archives": 5, // Low priority - catch-all folders "All Mail": 90, "[Gmail]": 90, // Lowest priority - folders users rarely search intentionally "Junk": 95, "Spam": 95, "Junk E-mail": 95, "Trash": 99, "Deleted Messages": 99, "Deleted Items": 99, "Bin": 99 }; // Get priority for a mailbox (lower = higher priority) // Unknown/custom folders get priority 50 (between Archive and All Mail) function getMailboxPriority(filePath) { const mailbox = extractMailbox(filePath); return MAILBOX_PRIORITY[mailbox] ?? 50; } function parseEmlx(filePath) { try { const rawContent = fs.readFileSync(filePath, "utf-8"); const isPartial = filePath.endsWith(".partial.emlx"); // Handle Apple Mail envelope format: first line is byte count, followed by newline // Strip the preamble to get the actual RFC822 email content let content = rawContent; const lines = rawContent.split("\n"); if (lines[0] && /^\d+\s*$/.test(lines[0])) { // First line is a number (byte count) with optional whitespace, skip it content = lines.slice(1).join("\n"); } // Extract headers const fromMatch = content.match(/^From:\s*(.+)$/m); const subjectMatch = content.match(/^Subject:\s*(.+)$/m); const dateMatch = content.match(/^Date:\s*(.+)$/m); const toMatch = content.match(/^To:\s*(.+)$/m); const ccMatch = content.match(/^Cc:\s*(.+)$/m); const messageIdMatch = content.match(/^Message-ID:\s*(.+)$/im); const flaggedMatch = content.match(/^X-Flagged:\s*(.+)$/im) || content.match(/flags.*flagged/i); // Check for attachments const hasAttachment = /Content-Disposition:\s*attachment/i.test(content) || /multipart\/mixed/i.test(content) || /filename=/i.test(content); // Extract body const headerEnd = content.search(/\r?\n\r?\n/); let body = ""; if (headerEnd > 0) { body = content.substring(headerEnd + 2, Math.min(headerEnd + 2000, content.length)); // Use safe HTML stripping to prevent ReDoS body = stripHtmlTags(body); } const fromRaw = fromMatch?.[1]?.trim() || ""; const toRaw = toMatch?.[1]?.trim() || ""; const ccRaw = ccMatch?.[1]?.trim() || ""; const subject = subjectMatch?.[1]?.trim() || ""; const date = dateMatch?.[1]?.trim() || ""; // Extract normalized email addresses const fromEmail = extractEmail(fromRaw); const toEmails = extractEmails(toRaw); const ccEmails = extractEmails(ccRaw); const allRecipients = [...toEmails, ...ccEmails]; // Parse date to timestamp const dateTimestamp = parseDateTime(date); // Determine mailbox and if sent const mailbox = extractMailbox(filePath); const isSent = mailbox.toLowerCase().includes("sent"); const isFlagged = !!flaggedMatch; const searchText = `From: ${fromRaw}\nTo: ${toRaw}\nSubject: ${subject}\n${body}`.substring(0, 1000); const messageId = messageIdMatch?.[1]?.trim() || ""; // For partial emails, allow indexing even with minimal data // Partial emails may lack complete headers but should still be indexed const hasMinimalContent = subject.length > 0 || fromRaw.length > 0 || searchText.length > 10; if (!hasMinimalContent && isPartial) { console.error(`[PARTIAL] Skipping ${filePath}: insufficient content (subject: ${subject.length}, from: ${fromRaw.length}, searchText: ${searchText.length})`); return null; } return { from: fromRaw, fromEmail, to: toRaw, toEmails: allRecipients.join(","), subject, date, dateTimestamp, hasAttachment, mailbox, isSent, isFlagged, messageId, body: body.substring(0, 500), searchText, filePath }; } catch (e) { const fileName = filePath.split("/").pop(); const isPartial = filePath.endsWith(".partial.emlx"); console.error(`[PARSE_ERROR] ${isPartial ? "PARTIAL" : "COMPLETE"} ${fileName}: ${e.message}`); return null; } } // Full scan - used for first run and rebuild_index // Includes both .emlx and .partial.emlx files (partial = not fully downloaded via IMAP) async function findAllEmlxFiles() { try { // Find both .emlx and .partial.emlx files const cmd = `find "${MAIL_DIR}" \\( -name "*.emlx" -o -name "*.partial.emlx" \\) 2>/dev/null`; const { stdout } = await execAsync(cmd, { encoding: "utf-8", maxBuffer: 50 * 1024 * 1024, timeout: 120000 }); return stdout.trim().split("\n").filter(f => f); } catch (e) { console.error("Error finding emlx files:", e.message); return []; } } // Fast incremental scan - uses find with -mtime filter (more reliable than mdfind/Spotlight) async function findNewEmlxFiles(sinceTimestamp) { if (!sinceTimestamp) { // First run - fall back to full scan console.error("No previous index timestamp, doing full scan..."); return findAllEmlxFiles(); } try { // Convert timestamp to days ago for -mtime filter // -mtime -N means modified in the last N days const daysAgo = Math.ceil((Date.now() - sinceTimestamp) / (24 * 60 * 60 * 1000)); // Use find with -mtime instead of mdfind for reliability // find is more reliable than Spotlight which can have stale/incomplete indexes const cmd = `find "${MAIL_DIR}" \\( -name "*.emlx" -o -name "*.partial.emlx" \\) -mtime -${daysAgo} 2>/dev/null`; const { stdout } = await execAsync(cmd, { encoding: "utf-8", maxBuffer: 50 * 1024 * 1024, timeout: 120000 }); const files = stdout.trim().split("\n").filter(f => f); console.error(`find found ${files.length} new/modified emails in last ${daysAgo} days`); return files; } catch (e) { console.error("find failed, falling back to full scan:", e.message); return findAllEmlxFiles(); } } // ============ MESSAGES INDEXING ============ /** * Extract text from NSAttributedString BLOB (attributedBody field) * macOS stores message text in attributedBody as NSKeyedArchiver format * The text is embedded as UTF-8 after the NSString class marker */ /** * Validate extracted text to ensure it's not garbage */ function validateExtractedText(text) { if (!text || typeof text !== 'string') return false; text = text.trim(); if (text.length < 1 || text.length > 10000) return false; // Must be mostly printable characters const printableRatio = (text.match(/[\x20-\x7E\u00A0-\uFFFF]/g) || []).length / text.length; return printableRatio >= 0.8; } /** * Strategy 1: Current NSString+'+' pattern (backward compatibility) */ function extractStrategy1_CurrentPattern(buf) { const nsStringMarker = Buffer.from('NSString'); let markerIndex = buf.indexOf(nsStringMarker); if (markerIndex === -1) return null; const searchStart = markerIndex + nsStringMarker.length; const plusIndex = buf.indexOf(0x2B, searchStart); // '+' character if (plusIndex === -1 || plusIndex >= buf.length - 2) return null; const lengthByte = buf[plusIndex + 1]; const textStart = plusIndex + 2; if (lengthByte === 0 || textStart >= buf.length) return null; let textLength = lengthByte; let actualTextStart = textStart; if (lengthByte & 0x80) { actualTextStart = textStart; textLength = Math.min(500, buf.length - actualTextStart); } const textEnd = Math.min(actualTextStart + textLength, buf.length); let text = buf.slice(actualTextStart, textEnd).toString('utf-8'); const cleanEnd = text.search(/[\x00-\x08\x0B\x0C\x0E-\x1F]|(\x84\x84)/); if (cleanEnd > 0) { text = text.substring(0, cleanEnd); } return text.trim(); } /** * Strategy 2: Direct UTF-8 scanning - find longest printable sequence */ function extractStrategy2_DirectUTF8Scan(buf) { const fullText = buf.toString('utf-8'); // Find sequences of printable characters at least 20 chars long const regex = /[\x20-\x7E\u00A0-\uFFFF]{20,}/g; const matches = fullText.match(regex); if (!matches || matches.length === 0) return null; // Return longest match return matches.reduce((a, b) => a.length > b.length ? a : b).trim(); } /** * Strategy 3: NSKeyedArchiver $objects parser - handles modern macOS format */ function extractStrategy3_NSKeyedArchiver(buf) { const objectsMarker = Buffer.from('$objects'); const objectsIndex = buf.indexOf(objectsMarker); if (objectsIndex === -1) return null; const searchStart = objectsIndex + objectsMarker.length; const nsStringMarker = Buffer.from('NSString'); const extractedTexts = []; let currentPos = searchStart; while (currentPos < buf.length - 100) { const nsIdx = buf.indexOf(nsStringMarker, currentPos); if (nsIdx === -1) break; // Try to extract text after this NSString marker const textStart = nsIdx + nsStringMarker.length + 10; // Skip class definition const potentialText = buf.slice(textStart, Math.min(textStart + 500, buf.length)) .toString('utf-8'); const cleanEnd = potentialText.search(/[\x00-\x08\x0B\x0C\x0E-\x1F]/); if (cleanEnd > 5) { const extracted = potentialText.substring(0, cleanEnd).trim(); if (extracted.length > 10) { extractedTexts.push(extracted); } } currentPos = nsIdx + 20; } if (extractedTexts.length === 0) return null; // Return longest extracted text return extractedTexts.reduce((a, b) => a.length > b.length ? a : b); } /** * Strategy 4: Regex-based pattern matching for common message structures */ function extractStrategy4_RegexBased(buf) { const text = buf.toString('utf-8', 0, Math.min(buf.length, 2000)); // Common patterns in messages const patterns = [ /(?:wrote|said):\s*\n?\s*(.{20,})/i, // Reply pattern /"(.{20,})"/, // Quoted text /\n\s*([A-Z].{20,})\s*\n/, // Paragraph pattern ]; for (const pattern of patterns) { const match = text.match(pattern); if (match && match[1]) { const extracted = match[1].trim(); // Clean up control characters const cleanEnd = extracted.search(/[\x00-\x08\x0B\x0C\x0E-\x1F]/); if (cleanEnd > 20) { return extracted.substring(0, cleanEnd).trim(); } else if (cleanEnd === -1 && extracted.length > 20) { return extracted; } } } return null; } /** * Multi-strategy attributedBody text extraction * Tries multiple approaches to extract text from binary NSAttributedString format */ function extractTextFromAttributedBody(buffer) { if (!buffer || buffer.length === 0) return null; try { const buf = Buffer.isBuffer(buffer) ? buffer : Buffer.from(buffer); // Try multiple strategies in order const strategies = [ extractStrategy1_CurrentPattern, // Keep for backward compatibility extractStrategy2_DirectUTF8Scan, // Fast, works on most formats extractStrategy3_NSKeyedArchiver, // Modern macOS NSAttributedString extractStrategy4_RegexBased // Pattern-based extraction ]; for (const strategy of strategies) { try { const text = strategy(buf); if (text && validateExtractedText(text)) { return text; } } catch (e) { // Try next strategy continue; } } return null; } catch (e) { return null; } } function getMessages(sinceTimestamp = null) { try { // Enhanced query to get chat info, group chat detection, and attachments // Include attributedBody for messages where text is NULL (newer macOS format) // sinceTimestamp is Unix ms - convert to Mac Absolute Time nanoseconds let dateFilter = ''; // NOTE: Index ALL messages without any date filtering // Only emails use the DAYS_BACK filter - messages and calendar are comprehensive if (sinceTimestamp) { const macAbsoluteNs = (sinceTimestamp / 1000 - MAC_ABSOLUTE_EPOCH) * 1000000000; dateFilter = `AND m.date >= ${macAbsoluteNs}`; } const query = ` SELECT m.ROWID as id, datetime(m.date/1000000000 + 978307200, 'unixepoch', 'localtime') as date, m.date/1000000000 + 978307200 as dateTimestamp, CASE WHEN m.is_from_me = 1 THEN 'Me' ELSE coalesce(h.id, 'Unknown') END as sender, m.text, CASE WHEN m.text IS NULL OR m.text = '' THEN hex(m.attributedBody) ELSE NULL END as attributedBodyHex, c.ROWID as chatId, c.chat_identifier as chatIdentifier, c.display_name as chatName, (SELECT COUNT(*) FROM chat_handle_join WHERE chat_id = c.ROWID) as participantCount, (SELECT COUNT(*) FROM message_attachment_join WHERE message_id = m.ROWID) as attachmentCount FROM message m LEFT JOIN handle h ON m.handle_id = h.ROWID LEFT JOIN chat_message_join cmj ON m.ROWID = cmj.message_id LEFT JOIN chat c ON cmj.chat_id = c.ROWID WHERE ((m.text IS NOT NULL AND m.text <> '') OR m.attributedBody IS NOT NULL) ${dateFilter} ORDER BY m.date DESC `; const results = safeSqlite3Json(MESSAGES_DB, query, { timeout: 60000 }); // Post-process: extract text from attributedBody where text is NULL let extractedCount = 0; for (const msg of results) { if ((!msg.text || msg.text === '') && msg.attributedBodyHex) { try { const buffer = Buffer.from(msg.attributedBodyHex, 'hex'); const extracted = extractTextFromAttributedBody(buffer); if (extracted) { msg.text = extracted; extractedCount++; } } catch (e) { // Skip this message if extraction fails } } // Clean up - don't keep the hex blob in memory delete msg.attributedBodyHex; } if (extractedCount > 0) { console.error(`Extracted text from attributedBody for ${extractedCount} messages`); } // Filter out messages that still have no text return results.filter(msg => msg.text && msg.text.trim() !== ''); } catch (e) { console.error("Error reading messages:", e.message); return []; } } // ============ CALENDAR INDEXING ============ // Convert Mac Absolute Time to Unix timestamp (ms) function macAbsoluteToUnixMs(macTime) { if (!macTime) return 0; return (macTime + MAC_ABSOLUTE_EPOCH) * 1000; } // Convert Unix timestamp (ms) to Mac Absolute Time function unixMsToMacAbsolute(unixMs) { return (unixMs / 1000) - MAC_ABSOLUTE_EPOCH; } // Format date from Mac Absolute Time function formatMacAbsoluteDate(macTime) { if (!macTime) return ""; const date = new Date(macAbsoluteToUnixMs(macTime)); return date.toLocaleString(); } // Map participant status codes to human-readable strings function getParticipantStatus(status) { const statusMap = { 0: "unknown", 1: "accepted", 2: "declined", 3: "tentative", 4: "pending", 7: "needs-action" }; return statusMap[status] || "unknown"; } function getCalendarEvents() { try { // NOTE: We index ALL calendar events, not filtered by date // Calendar events don't have a "file modification time" like emails do, // so we can't use the mdfind + DAYS_BACK approach. // Calendar indexing is always comprehensive - filtering by date would lose historical context. const now = Date.now(); const pastDate = unixMsToMacAbsolute(now - 10 * 365 * 24 * 60 * 60 * 1000); // 10 years back const futureDate = unixMsToMacAbsolute(now + 10 * 365 * 24 * 60 * 60 * 1000); // 10 years ahead // Query OccurrenceCache for recurring events and their calculated occurrences // This includes both recurring and non-recurring events // GROUP BY to avoid duplicate entries for recurring events const query = ` SELECT ci.ROWID as id, ci.summary, MIN(COALESCE(oc.occurrence_end_date - (ci.end_date - ci.start_date), ci.start_date)) as start_date, MAX(COALESCE(oc.occurrence_end_date, ci.end_date)) as end_date, ci.all_day, ci.description, c.title as calendar_name, l.title as location, COUNT(DISTINCT oc.day) as occurrenceCount FROM OccurrenceCache oc INNER JOIN CalendarItem ci ON oc.event_id = ci.ROWID LEFT JOIN Calendar c ON ci.calendar_id = c.ROWID LEFT JOIN Location l ON ci.location_id = l.ROWID WHERE oc.day IS NOT NULL AND oc.day >= ${pastDate} AND oc.day <= ${futureDate} GROUP BY ci.ROWID ORDER BY MIN(oc.day) ASC `; const rows = safeSqlite3Json(CALENDAR_DB, query, { timeout: 30000 }); // Get attendees for events that have them (separate query for efficiency) const attendeesQuery = ` SELECT p.owner_id, COALESCE(i.display_name, p.email, 'Unknown') as name, p.status FROM Participant p LEFT JOIN Identity i ON p.identity_id = i.ROWID WHERE p.entity_type = 0 `; let attendeesMap = new Map(); try { const attendeesRows = safeSqlite3Json(CALENDAR_DB, attendeesQuery, { timeout: 10000 }); // Group attendees by owner_id (event id) for (const att of attendeesRows) { if (!attendeesMap.has(att.owner_id)) { attendeesMap.set(att.owner_id, []); } attendeesMap.get(att.owner_id).push({ name: att.name, status: getParticipantStatus(att.status) }); } } catch (e) { console.error("Warning: Could not fetch attendees:", e.message); } const events = []; for (const row of rows) { if (!row.summary) continue; const startTimestamp = macAbsoluteToUnixMs(row.start_date); const attendees = attendeesMap.get(row.id) || []; events.push({ dbId: row.id, // Stable database ID for deduplication title: row.summary, start: formatMacAbsoluteDate(row.start_date), end: formatMacAbsoluteDate(row.end_date), calendar: row.calendar_name || "Unknown", location: row.location || "", notes: row.description || "", isAllDay: row.all_day === 1, startTimestamp, attendees: JSON.stringify(attendees), attendeeCount: attendees.length }); } console.error(`Calendar: Retrieved ${events.length} events via SQLite (~${Math.round((Date.now() - now))}ms)`); return events; } catch (e) { console.error("Error reading calendar:", e.message); return []; } } // ============ DATABASE FUNCTIONS ============ export async function initDB() { if (db) return { db, tables }; fs.mkdirSync(INDEX_DIR, { recursive: true }); db = await lancedb.connect(INDEX_DIR); const tableNames = await db.tableNames(); if (tableNames.includes("emails")) { tables.emails = await db.openTable("emails"); } if (tableNames.includes("messages")) { tables.messages = await db.openTable("messages"); } if (tableNames.includes("calendar")) { tables.calendar = await db.openTable("calendar"); } return { db, tables }; } export async function clearEmailsTable() { await initDB(); if (tables.emails) { await db.dropTable("emails"); tables.emails = null; console.error("Emails table dropped. Re-run indexing to rebuild."); return { cleared: true }; } return { cleared: false, message: "No emails table exists" }; } export async function clearMessagesTable() { await initDB(); if (tables.messages) { await db.dropTable("messages"); tables.messages = null; console.error("Messages table dropped. Re-run indexing to rebuild."); return { cleared: true }; } return { cleared: false, message: "No messages table exists" }; } export async function clearCalendarTable() { await initDB(); if (tables.calendar) { await db.dropTable("calendar"); tables.calendar = null; console.error("Calendar table dropped. Re-run indexing to rebuild."); return { cleared: true }; } return { cleared: false, message: "No calendar table exists" }; } export async function rebuildIndex(sources = ["emails", "messages", "calendar"], progressCallback = null) { const results = { cleared: {}, indexed: {}, errors: [] }; // Clear requested sources for (const source of sources) { try { if (source === "emails") { const clearResult = await clearEmailsTable(); results.cleared.emails = clearResult.cleared; } else if (source === "messages") { const clearResult = await clearMessagesTable(); results.cleared.messages = clearResult.cleared; } else if (source === "calendar") { const clearResult = await clearCalendarTable(); results.cleared.calendar = clearResult.cleared; } } catch (e) { results.errors.push({ source, phase: "clear", error: e.message }); } } // Reset module-level cache after dropping tables // This ensures that initDB() will re-initialize and pick up the newly created tables db = null; tables = {}; // Re-index requested sources for (const source of sources) { try { if (source === "emails") { if (progressCallback) progressCallback("Indexing emails..."); // Force full scan for rebuild (forceFullScan = true) const indexResult = await indexEmails(progressCallback, true); results.indexed.emails = indexResult; } else if (source === "messages") { if (progressCallback) progressCallback("Indexing messages..."); // Force full scan for rebuild (forceFullScan = true) const indexResult = await indexMessages(true); results.indexed.messages = indexResult; } else if (source === "calendar") { if (progressCallback) progressCallback("Indexing calendar..."); const indexResult = await indexCalendar(); results.indexed.calendar = indexResult; } } catch (e) { results.errors.push({ source, phase: "index", error: e.message }); } } return results; } async function getIndexedIds(tableName, idField) { await initDB(); if (!tables[tableName]) return new Set(); try { const results = await tables[tableName].query().select([idField]).toArray(); return new Set(results.map(r => r[idField])); } catch (e) { // THROW instead of returning empty Set to prevent duplicate entries console.error(`Error getting indexed IDs from ${tableName}: ${e.message}`); throw new Error(`Failed to get indexed IDs from ${tableName}: ${e.message}`); } } /** * Get indexed IDs with retry logic to prevent race conditions * @param {string} tableName - Table name * @param {string} idField - ID field name * @param {number} maxRetries - Maximum retry attempts (default: 3) * @returns {Promise<Set>} Set of indexed IDs */ async function getIndexedIdsWithRetry(tableName, idField, maxRetries = 3) { let lastError; for (let attempt = 1; attempt <= maxRetries; attempt++) { try { return await getIndexedIds(tableName, idField); } catch (e) { lastError = e; if (attempt < maxRetries) { const backoff = Math.min(1000 * Math.pow(2, attempt - 1), 5000); // Exponential backoff, max 5s console.error(`Retry ${attempt}/${maxRetries} after ${backoff}ms...`); await new Promise(resolve => setTimeout(resolve, backoff)); } } } throw lastError; } // ============ INDEX ALL CONTENT ============ export async function indexEmails(progressCallback = null, forceFullScan = false) { await initDB(); // Load last index timestamp for incremental scanning const meta = loadIndexMeta(); // Use a 1-day buffer to catch any edge cases (files modified during previous scan, etc.) const ONE_DAY_MS = 24 * 60 * 60 * 1000; // Determine the timestamp for mdfind filtering let lastEmailIndexTime; if (DAYS_BACK) { // DAYS_BACK is set - use it for mdfind filter (e.g., for testing or rebuild with constraints) lastEmailIndexTime = Date.now() - (DAYS_BACK * 24 * 60 * 60 * 1000); console.error(`Using DAYS_BACK=${DAYS_BACK} for mdfind filter`); } else if (forceFullScan) { // Force full scan only when no DAYS_BACK constraint lastEmailIndexTime = null; console.error("Force full scan requested - finding all emails"); } else if (meta.lastEmailIndexTime) { lastEmailIndexTime = meta.lastEmailIndexTime - ONE_DAY_MS; } else { lastEmailIndexTime = null; } // Save the current time BEFORE we start - any emails arriving during indexing // will be picked up on the next incremental scan const indexStartTime = Date.now(); // Use fast incremental scan if we have a previous timestamp const startTime = Date.now(); const newFiles = await findNewEmlxFiles(lastEmailIndexTime); console.error(`Found ${newFiles.length} new/modified email files (${Date.now() - startTime}ms)`); const indexedPaths = await getIndexedIdsWithRetry("emails", "filePath"); const indexedMessageIds = await getIndexedIdsWithRetry("emails", "messageId"); console.error(`Already indexed: ${indexedPaths.size} emails`); // Filter out already-indexed files let toIndex = newFiles.filter(f => !indexedPaths.has(f)); console.error(`Need to index: ${toIndex.length} emails`); // Sort by mailbox priority so higher-priority folders are indexed first // This ensures INBOX copies are kept over Junk/Trash copies during deduplication toIndex.sort((a, b) => getMailboxPriority(a) - getMailboxPriority(b)); if (toIndex.length === 0) { // Save timestamp for next incremental scan saveIndexMeta({ ...meta, lastEmailIndexTime: indexStartTime }); return { indexed: indexedPaths.size, added: 0 }; } let processed = 0; let skippedCount = { parseNull: 0, shortSearchText: 0, duplicateMessageId: 0 }; for (let i = 0; i < toIndex.length; i += BATCH_SIZE) { const batch = toIndex.slice(i, i + BATCH_SIZE); // Parse all emails in batch first const parsedEmails = []; const searchTexts = []; for (const filePath of batch) { const parsed = parseEmlx(filePath); if (!parsed) { skippedCount.parseNull++; const fileName = filePath.split("/").pop(); console.error(`[SKIPPED] Parse failed: ${fileName}`); continue; } if (parsed.searchText.length <= 20) { skippedCount.shortSearchText++; const fileName = filePath.split("/").pop(); console.error(`[SKIPPED] Short searchText (${parsed.searchText.length} chars): ${fileName} - Subject: "${parsed.subject}"`); continue; } // Skip duplicate messageIds - same email in multiple folders (IMAP behavior) if (parsed.messageId && indexedMessageIds.has(parsed.messageId)) { skippedCount.duplicateMessageId++; continue; // Skip this duplicate } // NOTE: Don't filter by email header date - mdfind already filters by file modification time // The DAYS_BACK filter is applied at the mdfind level (finding recently modified files), // not at the email content level (email header dates can be old for recently modified messages) parsedEmails.push({ filePath, parsed }); searchTexts.push(parsed.searchText); } // Generate all embeddings in a single batch call let vectors = []; if (searchTexts.length > 0) { try { vectors = await embedBatch(searchTexts); } catch (e) { console.error("Batch embedding error:", e.message); continue; } } // Build records with vectors const records = parsedEmails.map((item, idx) => ({ filePath: item.filePath, from: item.parsed.from, fromEmail: item.parsed.fromEmail, to: item.parsed.to, toEmails: item.parsed.toEmails, subject: item.parsed.subject, date: item.parsed.date, dateTimestamp: item.parsed.dateTimestamp, hasAttachment: item.parsed.hasAttachment, mailbox: item.parsed.mailbox, isSent: item.parsed.isSent, isFlagged: item.parsed.isFlagged, messageId: item.parsed.messageId, body: item.parsed.body, vector: vectors[idx] })).filter(r => r.vector); // Only records with vectors if (records.length > 0) { // Defensive duplicate check: remove any duplicates within the batch itself const uniqueRecords = []; const seenPaths = new Set(); for (const record of records) { if (!seenPaths.has(record.filePath)) { seenPaths.add(record.filePath); uniqueRecords.push(record); } } if (uniqueRecords.length !== records.length) { const duplicatePaths = records .map(r => r.filePath) .filter((path, idx, arr) => arr.indexOf(path) !== idx); console.error(`WARNING: Removed ${records.length - uniqueRecords.length} duplicate(s) from batch. Paths: ${duplicatePaths.slice(0, 5).join(', ')}`); console.error(`This suggests findNewEmlxFiles() is returning duplicate paths. Investigate mdfind query.`); } if (uniqueRecords.length > 0) { if (!tables.emails) { tables.emails = await db.createTable("emails", uniqueRecords, { mode: "overwrite" }); } else { // Double-check: verify these IDs truly aren't in the index const currentIndexed = await getIndexedIdsWithRetry("emails", "filePath"); const trulyNew = uniqueRecords.filter(r => !currentIndexed.has(r.filePath)); if (trulyNew.length < uniqueRecords.length) { console.error(`WARNING: ${uniqueRecords.length - trulyNew.length} records were already indexed. Filtering them out.`); } if (trulyNew.length > 0) { const beforeCount = await tables.emails.countRows(); await tables.emails.add(trulyNew); const afterCount = await tables.emails.countRows(); const actualAdded = afterCount - beforeCount; if (actualAdded !== trulyNew.length) { console.error(`WARNING: Expected to add ${trulyNew.length} records but index grew by ${actualAdded}. Possible duplicate issue.`); } // FIX: Update the messageId tracking Set with newly added records // This ensures subsequent batches can detect messageIds from previous batches for (const record of trulyNew) { if (record.messageId) { indexedMessageIds.add(record.messageId); } } } } } } processed += batch.length; console.error(`Indexed ${processed}/${toIndex.length} emails...`); // Report progress after each batch if (progressCallback) { progressCallback(`emails-batch-${processed}/${toIndex.length}`); } // Throttle to prevent thermal crashes if (i + BATCH_SIZE < toIndex.length) { await new Promise(r => setTimeout(r, BATCH_DELAY_MS)); } } // Save timestamp for next incremental scan saveIndexMeta({ ...meta, lastEmailIndexTime: indexStartTime }); // Log skip summary const totalSkipped = skippedCount.parseNull + skippedCount.shortSearchText + skippedCount.duplicateMessageId; console.error(`\nEmail indexing summary:`); console.error(` Files to index: ${toIndex.length}`); console.error(` Successfully indexed: ${processed}`); console.error(` Skipped - parse errors: ${skippedCount.parseNull}`); console.error(` Skipped - short searchText: ${skippedCount.shortSearchText}`); console.error(` Skipped - duplicate messageId: ${skippedCount.duplicateMessageId}`); console.error(` Total skipped: ${totalSkipped}`); console.error(` Discrepancy: ${toIndex.length - processed - totalSkipped}\n`); // Return actual indexed count from database, not stale cache const finalIndexedPaths = await getIndexedIdsWithRetry("emails", "filePath"); return { indexed: finalIndexedPaths.size, added: processed }; } export async function indexMessages(forceFullScan = false) { await initDB(); // Load last index timestamp for incremental scanning const meta = loadIndexMeta(); // Use a 1-hour buffer to catch any edge cases const ONE_HOUR_MS = 60 * 60 * 1000; const lastMessageIndexTime = forceFullScan ? null : (meta.lastMessageIndexTime ? meta.lastMessageIndexTime - ONE_HOUR_MS : null); // Save the current time BEFORE we start const indexStartTime = Date.now(); // Use incremental scan if we have a previous timestamp const messages = getMessages(lastMessageIndexTime); console.error(`Found ${messages.length} messages${lastMessageIndexTime ? ' (incremental)' : ' (full scan)'}`); const indexed = await getIndexedIdsWithRetry("messages", "id"); console.error(`Already indexed: ${indexed.size} messages`); const toIndex = messages.filter(m => !indexed.has(String(m.id))); console.error(`Need to index: ${toIndex.length} messages`); if (toIndex.length === 0) { // Save timestamp for next incremental scan saveIndexMeta({ ...meta, lastMessageIndexTime: indexStartTime }); return { indexed: indexed.size, added: 0 }; } let processed = 0; for (let i = 0; i < toIndex.length; i += BATCH_SIZE) { const batch = toIndex.slice(i, i + BATCH_SIZE); // Prepare all messages and search texts const preparedMsgs = []; const searchTexts = []; for (const msg of batch) { const searchText = `From: ${msg.sender}\nMessage: ${msg.text}`.substring(0, 500); if (searchText.length > 10) { preparedMsgs.push(msg); searchTexts.push(searchText); } } // Generate all embeddings in a single batch call let vectors = []; if (searchTexts.length > 0) { try { vectors = await embedBatch(searchTexts); } catch (e) { console.error("Batch embedding error:", e.message); continue; } } // Build records with vectors const records = preparedMsgs.map((msg, idx) => { // Explicitly cast to boolean to avoid LanceDB schema inference issues const isGroupChat = Boolean((parseInt(msg.participantCount) || 0) > 2 || (msg.chatName && msg.chatName.length > 0)); const hasAttachment = Boolean((parseInt(msg.attachmentCount) || 0) > 0); return { id: String(msg.id), date: msg.date, dateTimestamp: msg.dateTimestamp || 0, sender: msg.sender, text: msg.text?.substring(0, 500) || "", chatId: String(msg.chatId || ""), chatIdentifier: msg.chatIdentifier || "", chatName: msg.chatName || "", isGroupChat, hasAttachment, vector: vectors[idx] }; }).filter(r => r.vector); // Only records with vectors if (records.length > 0) { // Defensive duplicate check: remove any duplicates within the batch itself const uniqueRecords = []; const seenIds = new Set(); for (const record of records) { if (!seenIds.has(record.id)) { seenIds.add(record.id); uniqueRecords.push(record); } } if (uniqueRecords.length !== records.length) { const duplicateIds = records .map(r => r.id) .filter((id, idx, arr) => arr.indexOf(id) !== idx); console.error(`WARNING: Removed ${records.length - uniqueRecords.length} duplicate(s) from batch. IDs: ${duplicateIds.join(', ')}`); console.error(`This suggests getMessages() is returning duplicate rows. Investigate SQL query.`); } if (uniqueRecords.length > 0) { if (!tables.messages) { tables.messages = await db.createTable("messages", uniqueRecords, { mode: "overwrite" }); } else { // Double-check: verify these IDs truly aren't in the index const currentIndexed = await getIndexedIdsWithRetry("messages", "id"); const trulyNew = uniqueRecords.filter(r => !currentIndexed.has(r.id)); if (trulyNew.length < uniqueRecords.length) { console.error(`WARNING: ${uniqueRecords.length - trulyNew.length} records were already indexed. Filtering them out.`); } if (trulyNew.length > 0) { const beforeCount = await tables.messages.countRows(); await tables.messages.add(trulyNew); const afterCount = await tables.messages.countRows(); const actualAdded = afterCount - beforeCount; if (actualAdded !== trulyNew.length) { console.error(`WARNING: Expected to add ${trulyNew.length} records but index grew by ${actualAdded}. Possible duplicate issue.`); } } } } } processed += batch.length; console.error(`Indexed ${processed}/${toIndex.length} messages...`); // Throttle to prevent thermal crashes if (i + BATCH_SIZE < toIndex.length) { await new Promise(r => setTimeout(r, BATCH_DELAY_MS)); } } // Save timestamp for next incremental scan saveIndexMeta({ ...meta, lastMessageIndexTime: indexStartTime }); return { indexed: indexed.size, added: processed }; } export async function indexCalendar() { await initDB(); const events = getCalendarEvents(); console.error(`Found ${events.length} calendar events`); // Get already indexed event IDs for incremental indexing const indexed = await getIndexedIdsWithRetry("calendar", "id"); console.error(`Already indexed: ${indexed.size} calendar events`); // Build set of current event IDs for stale detection // Use dbId only - must match the ID format used when storing events (line 1139) const currentIds = new Set(events.map(evt => `${evt.dbId}`)); // Find and remove stale entries (indexed but no longer in calendar) const staleIds = [...indexed].filter(id => !currentIds.has(id)); if (staleIds.length > 0 && tables.calendar) { console.error(`Removing ${staleIds.length} stale calendar entries...`); // Validate all IDs first const validIds = []; for (const staleId of staleIds) { const validatedId = validateLanceDBId(staleId); if (!validatedId) { console.error(`Skipping invalid stale ID: ${staleId.substring(0, 50)}...`); continue; } validIds.push(escapeSQL(validatedId)); } // Batch delete in chunks of 100 to avoid query size limits const BATCH_DELETE_SIZE = 100; for (let i = 0; i < validIds.length; i += BATCH_DELETE_SIZE) { const batch = validIds.slice(i, i + BATCH_DELETE_SIZE); if (batch.length > 0) { try { // Use OR conditions for batch delete (LanceDB compatible) const conditions = batch.map(id => `id = '${id}'`).join(' OR '); await tables.calendar.delete(conditions); } catch (e) { console.error(`Failed to delete batch: ${e.message}`); } } } } // Prepare only NEW events and search texts const validEvents = []; const searchTexts = []; for (const evt of events) { const eventId = `${evt.dbId}`; // Remove timestamp from ID to avoid duplicates // Skip if already indexed if (indexed.has(eventId)) continue; const searchText = `Event: ${evt.title}\nCalendar: ${evt.calendar}\nLocation: ${evt.location}\nNotes: ${evt.notes}`.substring(0, 500); if (searchText.length > 10) { validEvents.push({ ...evt, id: eventId }); searchTexts.push(searchText); } } if (validEvents.length === 0) { console.error("No new calendar events to index"); return { indexed: indexed.size, added: 0, removed: staleIds.length }; } console.error(`Indexing ${validEvents.length} new calendar events...`); // Generate embeddings in batches const allVectors = []; for (let i = 0; i < searchTexts.length; i += BATCH_SIZE) { const batchTexts = searchTexts.slice(i, i + BATCH_SIZE); try { const batchVectors = await embedBatch(batchTexts); allVectors.push(...batchVectors); } catch (e) { console.error("Batch embedding error:", e.message); } console.error(`Embedded ${Math.min(i + BATCH_SIZE, searchTexts.length)}/${validEvents.length} new calendar events...`); } // Build records with vectors const records = validEvents.map((evt, idx) => ({ id: evt.id, title: evt.title, start: evt.start, end: evt.end, startTimestamp: evt.startTimestamp, calendar: evt.calendar, location: evt.location, notes: evt.notes?.substring(0, 200) || "", isAllDay: evt.isAllDay, attendees: evt.attendees || "[]", attendeeCount: evt.attendeeCount || 0, vector: allVectors[idx] })).filter(r => r.vector); // Only include records with valid vectors if (records.length > 0) { // Defensive duplicate check: remove any duplicates within the batch itself const uniqueRecords = []; const seenIds = new Set(); for (const record of records) { if (!seenIds.has(record.id)) { seenIds.add(record.id); uniqueRecords.push(record); } } if (uniqueRecords.length !== records.length) { console.error(`Removed ${records.length - uniqueRecords.length} duplicate(s) from batch`); } if (uniqueRecords.length > 0) { if (tables.calendar) { // Add to existing table await tables.calendar.add(uniqueRecords); } else { // Create new table tables.calendar = await db.createTable("calendar", uniqueRecords); } console.error(`Indexed ${uniqueRecords.length} new calendar events`); } } return { indexed: indexed.size, added: records.length, removed: staleIds.length }; } export async function indexAll(progressCallback = null) { console.error("Starting full index..."); if (progressCallback) progressCallback('emails-start'); const emailResult = await indexEmails(progressCallback); if (progressCallback) progressCallback('emails-complete'); if (progressCallback) progressCallback('messages-start'); const messageResult = await indexMessages(); if (progressCallback) progressCallback('messages-complete'); if (progressCallback) progressCallback('calendar-start'); const calendarResult = await indexCalendar(); if (progressCallback) progressCallback('calendar-complete'); console.error("Full index complete."); return { emails: emailResult, messages: messageResult, calendar: calendarResult }; } export async function isIndexReady(type = "emails") { await initDB(); return tables[type] !== null && tables[type] !== undefined; } // ============ DIRECT QUERIES (for recent items) ============ export async function getRecentEmails(limit = 10, daysBack = 7) { await initDB(); if (!tables.emails) return []; const cutoff = Date.now() - (daysBack * 24 * 60 * 60 * 1000); try { // Fetch all and filter in JavaScript (LanceDB where clause with quoted columns is unreliable) const results = await tables.emails.query() .select(["filePath", "from", "fromEmail", "to", "subject", "date", "dateTimestamp", "hasAttachment", "mailbox", "isSent", "isFlagged", "messageId", "body"]) .toArray(); // Filter by date, sort by timestamp descending, and limit return results .filter(r => r.dateTimestamp >= cutoff) .sort((a, b) => b.dateTimestamp - a.dateTimestamp) .slice(0, limit); } catch (e) { // Fallback with minimal columns for older indexes console.error("Using fallback query:", e.message); try { const results = await tables.emails.query() .select(["filePath", "from", "fromEmail", "to", "subject", "date", "dateTimestamp", "hasAttachment", "body"]) .toArray(); return results .filter(r => r.dateTimestamp >= cutoff) .sort((a, b) => b.dateTimestamp - a.dateTimestamp) .slice(0, limit); } catch (e2) { console.error("Error getting recent emails:", e2.message); return []; } } } export async function getEmailsByDateRange(startTs, endTs) { await initDB(); if (!tables.emails) return []; try { // LanceDB where clause with quoted column names doesn't work reliably // So we fetch all and filter in JavaScript const results = await tables.emails.query() .select(["filePath", "from", "fromEmail", "to", "subject", "date", "dateTimestamp", "hasAttachment", "mailbox", "isSent", "isFlagged", "messageId", "body"]) .toArray(); // Filter by date range and sort by timestamp descending return results .filter(r => r.dateTimestamp >= startTs && r.dateTimestamp < endTs) .sort((a, b) => b.dateTimestamp - a.dateTimestamp); } catch (e) { // Fallback with minimal columns for older indexes console.error("Using fallback query for date range:", e.message); try { const results = await tables.emails.query() .select(["filePath", "from", "fromEmail", "to", "subject", "date", "dateTimestamp", "hasAttachment", "body"]) .toArray(); return results .filter(r => r.dateTimestamp >= startTs && r.dateTimestamp < endTs) .sort((a, b) => b.dateTimestamp - a.dateTimestamp); } catch (e2) { console.error("Error getting emails by date range:", e2.message); return []; } } } export async function getRecentMessages(limit = 10, daysBack = 1) { await initDB(); if (!tables.messages) return { messages: [], hasMore: false }; try { const cutoff = Date.now() / 1000 - (daysBack * 24 * 60 * 60); // Messages use Unix timestamp const results = await tables.messages.query() .select(["id", "date", "dateTimestamp", "sender", "text", "chatId", "isGroupChat"]) .toArray(); const filtered = results .filter(r => r.dateTimestamp >= cutoff) .sort((a, b) => b.dateTimestamp - a.dateTimestamp); const hasMore = filtered.length > limit; const messages = filtered.slice(0, limit); return { messages, hasMore }; } catch (e) { console.error("Error getting recent messages:", e.message); return { messages: [], hasMore: false }; } } export async function getConversation(contact, limit = 50) { await initDB(); if (!tables.messages) return []; try { const contactLower = contact.toLowerCase(); const results = await tables.messages.query() .select(["id", "date", "dateTimestamp", "sender", "text", "chatId", "chatIdentifier"]) .toArray(); // Find messages where sender or chatIdentifier contains the contact const filtered = results.filter(r => { const sender = (r.sender || "").toLowerCase(); const chatId = (r.chatIdentifier || "").toLowerCase(); return sender.includes(contactLower) || chatId.includes(contactLower); }); // Sort chronologically (oldest first for conversation view) return filtered .sort((a, b) => a.dateTimestamp - b.dateTimestamp) .slice(-limit); // Take last N messages } catch (e) { console.error("Error getting conversation:", e.message); return []; } } export async function getCalendarByDate(startTimestamp, endTimestamp) { await initDB(); if (!tables.calendar) return []; try { const results = await tables.calendar.query() .select(["id", "title", "start", "end", "startTimestamp", "calendar", "location", "notes", "isAllDay"]) .toArray(); console.error(`[Calendar Query] Searching between ${new Date(startTimestamp).toISOString()} and ${new Date(endTimestamp).toISOString()}`); console.error(`[Calendar Query] Total events in index: ${results.length}`); const filtered = results.filter(r => r.startTimestamp >= startTimestamp && r.startTimestamp < endTimestamp); console.error(`[Calendar Query] Matched events: ${filtered.length}`); if (filtered.length === 0 && results.length > 0) { // Show events near the boundary for debugging const nearby = results.filter(r => r.startTimestamp >= startTimestamp - 86400000 && r.startTimestamp <= endTimestamp + 86400000 ); console.error(`[Calendar Query] Events within ±1 day: ${nearby.length}`); nearby.forEach(e => { console.error(` - "${e.title}" at ${new Date(e.startTimestamp).toISOString()} (all-day: ${e.isAllDay})`); }); } return filtered.sort((a, b) => a.startTimestamp - b.startTimestamp); } catch (e) { console.error("Error getting calendar by date:", e.message); return []; } } export async function getAllCalendarEvents() { await initDB(); if (!tables.calendar) return []; try { const results = await tables.calendar.query() .select(["id", "title", "start", "end", "startTimestamp", "calendar", "location", "isAllDay"]) .toArray(); return results.sort((a, b) => a.startTimestamp - b.startTimestamp); } catch (e) { console.error("Error getting all calendar events:", e.message); return []; } } // ============ NEW TOOLS - PHASE 1 ============ // Mailboxes to exclude by default const EXCLUDED_MAILBOXES = ['junk', 'trash', 'deleted messages', 'spam']; // mail_senders: List most frequent email senders export async function getFrequentSenders(limit = 30, daysBack = 0, includeJunk = false) { await initDB(); if (!tables.emails) return []; try { let emails = await tables.emails.query().select(["fromEmail", "from", "dateTimestamp", "mailbox"]).toArray(); if (daysBack > 0) { const cutoff = Date.now() - daysBack * 24 * 60 * 60 * 1000; emails = emails.filter(e => e.dateTimestamp >= cutoff); } // Exclude junk/trash by default if (!includeJunk) { emails = emails.filter(e => !EXCLUDED_MAILBOXES.some(mb => (e.mailbox || "").toLowerCase().includes(mb) ) ); } const senderCounts = {}; emails.forEach(e => { const key = e.fromEmail || e.from || "Unknown"; senderCounts[key] = (senderCounts[key] || 0) + 1; }); return Object.entries(senderCounts) .sort((a, b) => b[1] - a[1]) .slice(0, limit) .map(([email, count]) => ({ email, messageCount: count })); } catch (e) { console.error("Error getting frequent senders:", e.message); return []; } } // messages_contacts: List all contacts you've messaged export function getMessageContacts(limit = 50) { try { // Validate limit to prevent SQL issues const safeLimit = validateLimit(limit, 50, 500); const query = ` SELECT h.id as contact, COUNT(m.ROWID) as messageCount, datetime(MAX(m.date)/1000000000 + 978307200, 'unixepoch', 'localtime') as lastMessageDate FROM message m LEFT JOIN handle h ON m.handle_id = h.ROWID WHERE h.id IS NOT NULL GROUP BY h.id ORDER BY MAX(m.date) DESC LIMIT ${safeLimit} `; return safeSqlite3Json(MESSAGES_DB, query, { timeout: 30000 }); } catch (e) { console.error("Error getting message contacts:", e.message); return []; } } // calendar_upcoming: Get next N upcoming events export function getUpcomingEvents(limit = 10) { try { // Validate limit to prevent SQL issues const safeLimit = validateLimit(limit, 10, 100); const nowMac = Math.floor(Date.now() / 1000) - 978307200; const fetchLimit = safeLimit + 1; // Fetch one extra to detect if more exist // Query OccurrenceCache to include recurring event occurrences const query = ` SELECT ci.summary as title, datetime(COALESCE(oc.occurrence_end_date - (ci.end_date - ci.start_date), ci.start_date) + 978307200, 'unixepoch', 'localtime') as start, datetime(COALESCE(oc.occurrence_end_date, ci.end_date) + 978307200, 'unixepoch', 'localtime') as end, ci.all_day as isAllDay, c.title as calendar, l.title as location, oc.day as sort_day FROM OccurrenceCache oc INNER JOIN CalendarItem ci ON oc.event_id = ci.ROWID LEFT JOIN Calendar c ON ci.calendar_id = c.ROWID LEFT JOIN Location l ON ci.location_id = l.ROWID WHERE oc.day >= ${nowMac} AND ci.summary IS NOT NULL AND ci.summary <> '' ORDER BY sort_day ASC LIMIT ${fetchLimit} `; const events = safeSqlite3Json(CALENDAR_DB, query, { timeout: 10000 }); const hasMore = events.length > safeLimit; const limitedEvents = events.slice(0, safeLimit); return { events: limitedEvents, showing: limitedEvents.length, hasMore }; } catch (e) { console.error("Error getting upcoming events:", e.message); return { events: [], showing: 0, hasMore: false }; } } // ============ NEW TOOLS - PHASE 2 ============ // mail_unread_count: Get count of unread emails via AppleScript export function getUnreadCount(mailbox = null) { try { let script; if (mailbox) { // Validate mailbox name to prevent AppleScript injection const validatedMailbox = validateMailboxName(mailbox); if (!validatedMailbox) { return { unreadCount: 0, mailbox, error: "Invalid mailbox name. Only alphanumeric characters, spaces, hyphens, underscores, and periods are allowed." }; } // Escape for AppleScript double-quoted string (belt and suspenders) const escapedMailbox = escapeAppleScript(validatedMailbox); script = `tell application "Mail" set unreadCount to 0 repeat with acc in accounts try set mb to mailbox "${escapedMailbox}" of acc set unreadCount to unreadCount + (count of (messages of mb whose read status is false)) end try end repeat return unreadCount end tell`; } else { script = `tell application "Mail" return unread count of inbox end tell`; } const result = safeOsascript(script, { timeout: 15000 }); return { unreadCount: parseInt(result.trim()) || 0, mailbox: mailbox || "INBOX" }; } catch (e) { console.error("Error getting unread count:", e.message); return { unreadCount: 0, mailbox: mailbox || "INBOX", error: e.message }; } } // calendar_week: Get events for current or next week export function getWeekEvents(weekOffset = 0) { try { const now = new Date(); // Get Monday of the current week const monday = new Date(now); const day = now.getDay(); const diff = day === 0 ? -6 : 1 - day; // If Sunday, go back 6 days; otherwise go to Monday monday.setDate(now.getDate() + diff + (weekOffset * 7)); monday.setHours(0, 0, 0, 0); // Get Sunday end of week const sunday = new Date(monday); sunday.setDate(monday.getDate() + 6); sunday.setHours(23, 59, 59, 999); const startMac = Math.floor(monday.getTime() / 1000) - 978307200; const endMac = Math.floor(sunday.getTime() / 1000) - 978307200; // Query OccurrenceCache to include recurring event occurrences const query = ` SELECT ci.summary as title, datetime(COALESCE(oc.occurrence_end_date - (ci.end_date - ci.start_date), ci.start_date) + 978307200, 'unixepoch', 'localtime') as start, datetime(COALESCE(oc.occurrence_end_date, ci.end_date) + 978307200, 'unixepoch', 'localtime') as end, ci.all_day as isAllDay, c.title as calendar, l.title as location FROM OccurrenceCache oc INNER JOIN CalendarItem ci ON oc.event_id = ci.ROWID LEFT JOIN Calendar c ON ci.calendar_id = c.ROWID LEFT JOIN Location l ON ci.location_id = l.ROWID WHERE oc.day >= ${startMac} AND oc.day <= ${endMac} AND ci.summary IS NOT NULL AND ci.summary <> '' ORDER BY oc.day ASC `; const events = safeSqlite3Json(CALENDAR_DB, query, { timeout: 15000 }); const weekStart = monday.toLocaleDateString("en-US", { weekday: "short", month: "short", day: "numeric" }); const weekEnd = sunday.toLocaleDateString("en-US", { weekday: "short", month: "short", day: "numeric" }); return { events, weekLabel: weekOffset === 0 ? "This Week" : weekOffset === 1 ? "Next Week" : `Week of ${weekStart}`, dateRange: `${weekStart} - ${weekEnd}` }; } catch (e) { console.error("Error getting week events:", e.message); return { events: [], weekLabel: "Unknown", dateRange: "", error: e.message }; } } // ============ NEW TOOLS - PHASE 3 ============ // mail_thread: Get email thread by searching for related emails // Uses subject-based matching since Message-ID isn't indexed export async function getEmailThread(filePath, limit = 20) { await initDB(); if (!tables.emails) return { error: "Email index not ready", emails: [] }; try { // Validate file path to prevent path traversal attacks let validatedPath; try { validatedPath = validateEmailPath(filePath, MAIL_DIR); } catch (e) { return { error: `Invalid file path: ${e.message}`, emails: [] }; } // Verify file exists if (!fs.existsSync(validatedPath)) { return { error: "Email file not found", emails: [] }; } // Read the email to get subject const content = fs.readFileSync(validatedPath, "utf-8"); const subjectMatch = content.match(/^Subject:\s*(.+)$/m); if (!subjectMatch) { return { error: "Could not extract subject from email", emails: [] }; } // Clean subject - remove Re:, Fwd:, etc. let subject = subjectMatch[1].trim(); const baseSubject = subject.replace(/^(Re|Fwd|Fw):\s*/gi, "").trim(); if (baseSubject.length < 5) { return { error: "Subject too short to find thread", emails: [] }; } // Search for emails with similar subjects const allEmails = await tables.emails.query() .select(["filePath", "from", "to", "subject", "date", "dateTimestamp"]) .toArray(); // Filter to emails with matching base subject const threadEmails = allEmails .filter(e => { const eBaseSubject = (e.subject || "").replace(/^(Re|Fwd|Fw):\s*/gi, "").trim(); return eBaseSubject.toLowerCase() === baseSubject.toLowerCase(); }) .sort((a, b) => a.dateTimestamp - b.dateTimestamp) .slice(0, limit); return { emails: threadEmails, baseSubject, threadCount: threadEmails.length }; } catch (e) { console.error("Error getting email thread:", e.message); return { error: e.message, emails: [] }; } } // calendar_recurring: Get recurring events (shows events with recurrence rules and their upcoming occurrences) export function getRecurringEvents(limit = 20) { try { // Validate limit to prevent SQL issues const safeLimit = validateLimit(limit, 20, 100); const nowMac = Math.floor(Date.now() / 1000) - 978307200; const fetchLimit = safeLimit + 1; // Fetch one extra to detect if more exist // Query events that have recurrence rules defined in the Recurrence table // Show the next upcoming occurrence of each recurring event const query = ` SELECT DISTINCT ci.summary as title, datetime(COALESCE(MIN(oc.occurrence_end_date) - (ci.end_date - ci.start_date), MIN(oc.day)) + 978307200, 'unixepoch', 'localtime') as start, c.title as calendar, ci.all_day as isAllDay, COUNT(DISTINCT oc.day) as occurrenceCount FROM Recurrence r INNER JOIN CalendarItem ci ON r.owner_id = ci.ROWID INNER JOIN OccurrenceCache oc ON oc.event_id = ci.ROWID LEFT JOIN Calendar c ON ci.calendar_id = c.ROWID WHERE oc.day >= ${nowMac} AND ci.summary IS NOT NULL AND ci.summary <> '' GROUP BY ci.ROWID, ci.summary, c.title, ci.all_day ORDER BY occurrenceCount DESC, MIN(oc.day) ASC LIMIT ${fetchLimit} `; const events = safeSqlite3Json(CALENDAR_DB, query, { timeout: 30000 }); const hasMore = events.length > safeLimit; const limitedEvents = events.slice(0, safeLimit); return { events: limitedEvents, showing: limitedEvents.length, hasMore }; } catch (e) { console.error("Error getting recurring events:", e.message); return { events: [], showing: 0, hasMore: false }; } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server