Skip to main content
Glama
real-data.js8.61 kB
/** * Real Data Helper Module for Indexing Tests * * NEW ARCHITECTURE: Real Data Only, No Rebuilding * * This module provides utilities for: * - Checking availability of real data sources (Mail, Messages, Calendar) * - Accessing the system index at ~/.apple-tools-mcp/vector-index/ * - Real embedding pipeline access (Xenova/all-MiniLM-L6-v2 only) * * IMPORTANT: Tests assume the system index already exists. * Build it separately with: npm run build-index * Tests NEVER rebuild the index - they only validate it. */ import fs from 'fs' import path from 'path' import { execSync } from 'child_process' const HOME = process.env.HOME // Data directories const DATA_DIR = path.join(HOME, '.apple-tools-mcp') const PRODUCTION_INDEX_DIR = path.join(DATA_DIR, 'vector-index') // Real data source paths on macOS export const MAIL_DIR = path.join(HOME, 'Library/Mail') export const MESSAGES_DB = path.join(HOME, 'Library/Messages/chat.db') export const CALENDAR_DB = path.join(HOME, 'Library/Group Containers/group.com.apple.calendar/Calendar.sqlitedb') export const CONTACTS_DB = path.join(HOME, 'Library/Application Support/AddressBook/AddressBook-v22.abcddb') // Export paths for tests export { PRODUCTION_INDEX_DIR, DATA_DIR } /** * Check if real data sources exist on this system * Tests can use this to skip if data isn't available */ export function checkDataSources() { return { mail: fs.existsSync(MAIL_DIR), messages: fs.existsSync(MESSAGES_DB), calendar: fs.existsSync(CALENDAR_DB), contacts: fs.existsSync(CONTACTS_DB), productionIndex: fs.existsSync(PRODUCTION_INDEX_DIR) } } /** * Get real embedding pipeline (Xenova/all-MiniLM-L6-v2) * Pipeline is cached for reuse across tests */ let embeddingPipeline = null export async function getEmbedder() { if (!embeddingPipeline) { const { pipeline } = await import('@xenova/transformers') embeddingPipeline = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2') } return embeddingPipeline } /** * Generate real embedding for text * @param {string} text - Text to embed * @returns {Promise<Float32Array>} 384-dim embedding vector */ export async function embed(text) { const embedder = await getEmbedder() const output = await embedder(text, { pooling: 'mean', normalize: true }) return output.data } /** * Generate embeddings for multiple texts * @param {string[]} texts - Array of texts * @returns {Promise<Float32Array[]>} Array of 384-dim vectors */ export async function embedBatch(texts) { const embedder = await getEmbedder() const results = [] for (const text of texts) { const output = await embedder(text, { pooling: 'mean', normalize: true }) results.push(output.data) } return results } /** * Sample real emails from Mail directory * @param {number} limit - Max emails to return * @param {number} daysBack - Only include emails from last N days (default: 30) * @returns {Promise<string[]>} Array of .emlx file paths */ export async function sampleEmails(limit = 50, daysBack = 30) { if (!fs.existsSync(MAIL_DIR)) { return [] } try { // Calculate cutoff date (N days ago) const cutoffDate = new Date() cutoffDate.setDate(cutoffDate.getDate() - daysBack) const cutoffTimestamp = Math.floor(cutoffDate.getTime() / 1000) // Use mdfind to find emails modified in the last N days const output = execSync( `mdfind -onlyin "${MAIL_DIR}" "kMDItemContentType == 'com.apple.mail.emlx' && kMDItemFSContentChangeDate >= \\$time.iso(${cutoffDate.toISOString()})" | head -${limit}`, { encoding: 'utf-8', timeout: 30000 } ) return output.trim().split('\n').filter(Boolean) } catch (e) { console.warn('Failed to sample emails:', e.message) return [] } } /** * Sample real messages from Messages database * @param {number} limit - Max messages to return * @param {number} daysBack - Only include messages from last N days (default: 30) * @returns {Promise<Object[]>} Array of message objects */ export async function sampleMessages(limit = 50, daysBack = 30) { if (!fs.existsSync(MESSAGES_DB)) { return [] } try { // Messages uses Cocoa Core Data timestamp (seconds since 2001-01-01) const cocoaEpoch = new Date('2001-01-01T00:00:00Z').getTime() const now = Date.now() const cutoffDate = now - (daysBack * 24 * 60 * 60 * 1000) const cutoffTimestamp = (cutoffDate - cocoaEpoch) / 1000 const query = ` SELECT m.ROWID as id, m.text, m.date, m.is_from_me, h.id as sender FROM message m LEFT JOIN handle h ON m.handle_id = h.ROWID WHERE m.text IS NOT NULL AND m.text != '' AND m.date >= ${cutoffTimestamp} ORDER BY m.date DESC LIMIT ${limit} ` const output = execSync( `sqlite3 -json "${MESSAGES_DB}" "${query}"`, { encoding: 'utf-8', timeout: 30000 } ) return JSON.parse(output || '[]') } catch (e) { console.warn('Failed to sample messages:', e.message) return [] } } /** * Sample real calendar events * @param {number} limit - Max events to return * @param {number} daysBack - Only include events from last N days (default: 30) * @returns {Promise<Object[]>} Array of event objects */ export async function sampleCalendarEvents(limit = 50, daysBack = 30) { if (!fs.existsSync(CALENDAR_DB)) { return [] } try { // Calendar uses Cocoa Core Data timestamp (seconds since 2001-01-01) const cocoaEpoch = new Date('2001-01-01T00:00:00Z').getTime() const now = Date.now() const cutoffDate = now - (daysBack * 24 * 60 * 60 * 1000) const cutoffTimestamp = (cutoffDate - cocoaEpoch) / 1000 const query = ` SELECT ci.ROWID as id, ci.summary as title, ci.start_date, ci.end_date, c.title as calendar_name FROM CalendarItem ci JOIN Calendar c ON ci.calendar_id = c.ROWID WHERE ci.summary IS NOT NULL AND ci.start_date >= ${cutoffTimestamp} ORDER BY ci.start_date DESC LIMIT ${limit} ` const output = execSync( `sqlite3 -json "${CALENDAR_DB}" "${query}"`, { encoding: 'utf-8', timeout: 30000 } ) return JSON.parse(output || '[]') } catch (e) { console.warn('Failed to sample calendar events:', e.message) return [] } } /** * NOTE: buildProductionIndex() has been removed. * The system index at ~/.apple-tools-mcp/vector-index/ is built and maintained * by the main application, not by tests. Tests assume it exists and only validate it. * * To rebuild the system index, users run: npm run build-index * Tests should NEVER rebuild the index. */ /** * Get production index table counts * @returns {Promise<Object>} Counts for each table */ export async function getProductionIndexStats() { const lancedb = await import('@lancedb/lancedb') if (!fs.existsSync(PRODUCTION_INDEX_DIR)) { return { emails: 0, messages: 0, calendar: 0 } } try { const db = await lancedb.connect(PRODUCTION_INDEX_DIR) const tables = await db.tableNames() const stats = { emails: 0, messages: 0, calendar: 0 } for (const tableName of tables) { if (stats.hasOwnProperty(tableName)) { const table = await db.openTable(tableName) stats[tableName] = await table.countRows() } } return stats } catch (e) { console.warn('Failed to get production index stats:', e.message) return { emails: 0, messages: 0, calendar: 0, error: e.message } } } /** * Check if production index is ready (has all tables with data) * @returns {Promise<boolean>} */ export async function isProductionIndexReady() { const stats = await getProductionIndexStats() return stats.emails > 0 || stats.messages > 0 || stats.calendar > 0 } /** * Search production index * @param {string} query - Search query * @param {string} table - Table to search (emails, messages, calendar) * @param {number} limit - Max results * @returns {Promise<Object[]>} Search results */ export async function searchProductionIndex(query, table = 'emails', limit = 10) { const lancedb = await import('@lancedb/lancedb') const db = await lancedb.connect(PRODUCTION_INDEX_DIR) const tables = await db.tableNames() if (!tables.includes(table)) { throw new Error(`Table ${table} not found in production index`) } const tbl = await db.openTable(table) const queryVector = await embed(query) const results = await tbl.search(Array.from(queryVector)) .limit(limit) .toArray() return results }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server