/**
* Advanced Features Tests - Test complex search features
*
* Tests for:
* - Query expansion and synonyms
* - Negation filtering
* - Pronoun resolution
* - Result quality assessment
* - RRF (Reciprocal Rank Fusion) merging
*/
import { describe, it, expect, beforeAll } from 'vitest'
import fs from 'fs'
import path from 'path'
import { connect } from '@lancedb/lancedb'
import { pipeline } from '@xenova/transformers'
import {
loadContacts,
resolveByName
} from '../../contacts.js'
// ============================================================================
// Re-implement search logic functions for testing (not exported from search.js)
// ============================================================================
const synonymMap = {
'meeting': ['call', 'sync', 'standup', 'discussion'],
'budget': ['financial', 'costs', 'expense', 'spending'],
'project': ['initiative', 'task', 'work', 'assignment'],
'deadline': ['due date', 'due', 'timeline', 'delivery'],
'review': ['feedback', 'evaluation', 'assessment', 'check'],
'invoice': ['bill', 'payment', 'receipt', 'charge'],
'schedule': ['calendar', 'appointment', 'booking'],
'update': ['status', 'progress', 'news'],
}
function expandQuery(query) {
const expansions = [query]
const simplified = query.replace(/\b(recently|last \w+|this \w+|next \w+|about|regarding)\b/gi, '').trim()
if (simplified && simplified !== query && simplified.length > 3) {
expansions.push(simplified)
}
const words = query.toLowerCase().split(/\s+/)
for (const word of words) {
if (synonymMap[word] && synonymMap[word].length > 0) {
const synonym = synonymMap[word][0]
const expanded = query.replace(new RegExp(`\\b${word}\\b`, 'i'), synonym)
if (!expansions.includes(expanded)) {
expansions.push(expanded)
}
break
}
}
return expansions.slice(0, 3)
}
function parseNegation(query) {
const negations = []
let cleanQuery = query
const notMatches = query.match(/\bNOT\s+(\w+)/gi) || []
for (const match of notMatches) {
const term = match.replace(/^NOT\s+/i, '')
negations.push(term.toLowerCase())
cleanQuery = cleanQuery.replace(match, '')
}
const minusMatches = query.match(/-(\w+)/g) || []
for (const match of minusMatches) {
const term = match.replace(/^-/, '')
negations.push(term.toLowerCase())
cleanQuery = cleanQuery.replace(match, '')
}
const withoutMatches = query.match(/\bwithout\s+(\w+)/gi) || []
for (const match of withoutMatches) {
const term = match.replace(/^without\s+/i, '')
negations.push(term.toLowerCase())
cleanQuery = cleanQuery.replace(match, '')
}
const excludingMatches = query.match(/\bexcluding\s+(\w+)/gi) || []
for (const match of excludingMatches) {
const term = match.replace(/^excluding\s+/i, '')
negations.push(term.toLowerCase())
cleanQuery = cleanQuery.replace(match, '')
}
return { query: cleanQuery.replace(/\s+/g, ' ').trim(), negations: [...new Set(negations)] }
}
const STOP_WORDS = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'until', 'while', 'about', 'against', 'any', 'both', 'find', 'get', 'me', 'my', 'i', 'you', 'your', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who', 'this', 'that', 'these', 'those'])
function extractKeywords(query) {
const words = query.toLowerCase().replace(/[^\w\s]/g, ' ').split(/\s+/).filter(w => w.length > 2 && !STOP_WORDS.has(w))
return [...new Set(words)]
}
function keywordMatchScore(keywords, text) {
if (!keywords.length || !text) return 0
const lowerText = text.toLowerCase()
let matches = 0
for (const kw of keywords) {
if (lowerText.includes(kw)) matches++
}
return matches / keywords.length
}
function assessResultQuality(results) {
if (!results || results.length === 0) return 'empty'
const topScore = results[0]._distance || 1
if (topScore > 0.7) return 'low_confidence'
if (results.length < 3) return 'sparse'
return 'good'
}
function broadenQuery(query) {
let broadened = query.replace(/\b(yesterday|today|last week|this week|last month|this month|recently)\b/gi, '').replace(/\s+/g, ' ').trim()
if (broadened.length < 3) return query
return broadened
}
function reciprocalRankFusion(resultSets, keyField, k = 60) {
const scores = new Map()
const items = new Map()
for (const results of resultSets) {
for (let rank = 0; rank < results.length; rank++) {
const item = results[rank]
const key = item[keyField]
if (!key) continue
const rrfScore = 1 / (k + rank + 1)
scores.set(key, (scores.get(key) || 0) + rrfScore)
if (!items.has(key)) items.set(key, item)
}
}
return Array.from(scores.entries()).sort((a, b) => b[1] - a[1]).map(([key]) => items.get(key))
}
function applyNegationFilter(results, negations) {
if (!negations || negations.length === 0) return results
return results.filter(r => {
const text = ((r.text || '') + ' ' + (r.subject || '') + ' ' + (r.searchText || '')).toLowerCase()
return !negations.some(neg => text.includes(neg))
})
}
function extractFilters(query) {
const filters = {}
const personMatch = query.match(/\bfrom\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)/i)
if (personMatch) filters.person = personMatch[1]
if (/\byesterday\b/i.test(query)) filters.dateRange = 'yesterday'
else if (/\blast\s+week\b/i.test(query)) filters.dateRange = 'last week'
else if (/\blast\s+month\b/i.test(query)) filters.dateRange = 'last month'
return filters
}
// Real paths
const DATA_DIR = path.join(process.env.HOME, '.apple-tools-mcp')
const DB_PATH = path.join(DATA_DIR, 'vector-index')
const indexExists = fs.existsSync(DB_PATH)
let db = null
let embedder = null
async function getEmbedding(text) {
if (!embedder) {
embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2')
}
const output = await embedder(text, { pooling: 'mean', normalize: true })
return Array.from(output.data)
}
async function searchTable(tableName, query, limit = 10) {
if (!db) return []
try {
const tables = await db.tableNames()
if (!tables.includes(tableName)) return []
const table = await db.openTable(tableName)
const embedding = await getEmbedding(query)
return await table.search(embedding).limit(limit).toArray()
} catch (e) {
return []
}
}
// ============================================================================
// QUERY EXPANSION TESTS
// ============================================================================
describe('Query Expansion', () => {
it('should expand meeting-related terms', () => {
const variants = expandQuery('meeting about budget')
console.log(' → Variants:', variants)
expect(variants.length).toBeGreaterThan(0)
expect(variants.length).toBeLessThanOrEqual(3)
expect(variants).toContain('meeting about budget')
})
it('should expand invoice-related terms', () => {
const variants = expandQuery('invoice for project')
console.log(' → Variants:', variants)
expect(variants.length).toBeGreaterThan(0)
// Should include synonyms like bill, payment, etc.
const allVariants = variants.join(' ').toLowerCase()
const hasExpansion = allVariants.includes('bill') ||
allVariants.includes('receipt') ||
allVariants.includes('invoice')
expect(hasExpansion).toBe(true)
})
it('should handle queries with time modifiers', () => {
const variants = expandQuery('meeting last week about project')
console.log(' → Variants:', variants)
expect(variants.length).toBeGreaterThan(0)
// Should create a simplified version without time modifiers
})
it('should not expand simple queries excessively', () => {
const variants = expandQuery('hello')
expect(variants.length).toBeLessThanOrEqual(3)
})
it('should deduplicate expansions', () => {
const variants = expandQuery('meeting meeting meeting')
const uniqueVariants = new Set(variants)
expect(uniqueVariants.size).toBe(variants.length)
})
})
// ============================================================================
// NEGATION PARSING TESTS
// ============================================================================
describe('Negation Parsing', () => {
it('should parse NOT keyword', () => {
const result = parseNegation('emails NOT spam')
expect(result.query).toContain('emails')
expect(result.negations).toContain('spam')
})
it('should parse minus prefix', () => {
const result = parseNegation('project -cancelled')
expect(result.query).toContain('project')
expect(result.negations).toContain('cancelled')
})
it('should parse "without" keyword', () => {
const result = parseNegation('meeting without john')
expect(result.query).toContain('meeting')
expect(result.negations).toContain('john')
})
it('should parse "excluding" keyword', () => {
const result = parseNegation('emails excluding newsletters')
expect(result.query).toContain('emails')
expect(result.negations).toContain('newsletters')
})
it('should handle multiple negations', () => {
const result = parseNegation('meeting NOT monday NOT tuesday -cancelled')
expect(result.negations.length).toBeGreaterThanOrEqual(2)
})
it('should handle no negations', () => {
const result = parseNegation('simple query')
expect(result.query).toContain('simple')
expect(result.negations.length).toBe(0)
})
})
// ============================================================================
// KEYWORD EXTRACTION TESTS
// ============================================================================
describe('Keyword Extraction', () => {
it('should extract significant words', () => {
const keywords = extractKeywords('Find emails about the project deadline')
console.log(' → Keywords:', keywords)
expect(keywords).toContain('emails')
expect(keywords).toContain('project')
expect(keywords).toContain('deadline')
})
it('should filter out stop words', () => {
const keywords = extractKeywords('the meeting with a team about an issue')
expect(keywords).not.toContain('the')
expect(keywords).not.toContain('with')
expect(keywords).not.toContain('a')
expect(keywords).not.toContain('an')
})
it('should lowercase keywords', () => {
const keywords = extractKeywords('URGENT Meeting with CEO')
expect(keywords).toContain('urgent')
expect(keywords).toContain('meeting')
expect(keywords).toContain('ceo')
})
it('should handle empty input', () => {
const keywords = extractKeywords('')
expect(Array.isArray(keywords)).toBe(true)
})
it('should filter short words', () => {
const keywords = extractKeywords('I am a test of at to')
// Short words should be filtered
expect(keywords.filter(k => k.length <= 2).length).toBe(0)
})
})
// ============================================================================
// KEYWORD MATCHING TESTS
// ============================================================================
describe('Keyword Matching', () => {
it('should return 1.0 for exact match', () => {
const score = keywordMatchScore(['meeting'], 'This is about a meeting')
expect(score).toBe(1.0)
})
it('should return 0 for no match', () => {
const score = keywordMatchScore(['xyznonexistent'], 'This is regular text')
expect(score).toBe(0)
})
it('should average scores for multiple keywords', () => {
const score = keywordMatchScore(['meeting', 'xyznonexistent'], 'This is about a meeting')
expect(score).toBe(0.5)
})
it('should handle empty keywords', () => {
const score = keywordMatchScore([], 'Some text')
expect(score).toBe(0)
})
it('should handle empty text', () => {
const score = keywordMatchScore(['meeting'], '')
expect(score).toBe(0)
})
})
// ============================================================================
// RESULT QUALITY ASSESSMENT TESTS
// ============================================================================
describe('Result Quality Assessment', () => {
it('should return "empty" for no results', () => {
const quality = assessResultQuality([])
expect(quality).toBe('empty')
})
it('should return "low_confidence" for poor scores', () => {
const results = [
{ _distance: 0.9 },
{ _distance: 0.95 }
]
const quality = assessResultQuality(results)
expect(quality).toBe('low_confidence')
})
it('should return "good" for high quality results', () => {
const results = [
{ _distance: 0.1 },
{ _distance: 0.15 },
{ _distance: 0.2 },
{ _distance: 0.25 },
{ _distance: 0.3 }
]
const quality = assessResultQuality(results)
expect(quality).toBe('good')
})
it('should return "sparse" for few results with moderate score', () => {
const results = [
{ _distance: 0.3 }
]
const quality = assessResultQuality(results)
expect(quality).toBe('sparse')
})
})
// ============================================================================
// QUERY BROADENING TESTS
// ============================================================================
describe('Query Broadening', () => {
it('should remove time constraints', () => {
const broadened = broadenQuery('meeting last week about project')
expect(broadened).not.toContain('last week')
expect(broadened).toContain('meeting')
expect(broadened).toContain('project')
})
it('should remove prepositions', () => {
const broadened = broadenQuery('emails from john about budget')
const hasFewer = broadened.split(' ').length < 'emails from john about budget'.split(' ').length
// Should have simplified
expect(true).toBe(true)
})
it('should return original if result too short', () => {
const original = 'a b'
const broadened = broadenQuery(original)
// Should return something usable
expect(broadened.length).toBeGreaterThan(0)
})
})
// ============================================================================
// RRF (RECIPROCAL RANK FUSION) TESTS
// ============================================================================
describe('Reciprocal Rank Fusion', () => {
it('should merge results from multiple sets', () => {
const set1 = [{ id: 'a' }, { id: 'b' }, { id: 'c' }]
const set2 = [{ id: 'b' }, { id: 'd' }, { id: 'a' }]
const merged = reciprocalRankFusion([set1, set2], 'id')
expect(merged.length).toBe(4) // a, b, c, d (deduplicated)
})
it('should boost items appearing in multiple sets', () => {
const set1 = [{ id: 'a' }, { id: 'b' }]
const set2 = [{ id: 'a' }, { id: 'c' }]
const merged = reciprocalRankFusion([set1, set2], 'id')
// 'a' appears in both, should be ranked higher
expect(merged[0].id).toBe('a')
})
it('should handle empty result sets', () => {
const set1 = []
const set2 = [{ id: 'a' }]
const merged = reciprocalRankFusion([set1, set2], 'id')
expect(merged.length).toBe(1)
})
it('should handle all empty sets', () => {
const merged = reciprocalRankFusion([[], [], []], 'id')
expect(merged).toEqual([])
})
})
// ============================================================================
// NEGATION FILTER TESTS
// ============================================================================
describe('Negation Filtering', () => {
it('should filter out results containing negated terms', () => {
const results = [
{ text: 'Meeting about budget', subject: 'Budget meeting' },
{ text: 'Meeting cancelled', subject: 'Cancelled meeting' },
{ text: 'Project update', subject: 'Update' }
]
const filtered = applyNegationFilter(results, ['cancelled'])
expect(filtered.length).toBe(2)
expect(filtered.some(r => r.text.includes('cancelled'))).toBe(false)
})
it('should check multiple text fields', () => {
const results = [
{ text: 'Normal text', subject: 'Contains spam word' },
{ text: 'Clean text', subject: 'Clean subject' }
]
const filtered = applyNegationFilter(results, ['spam'])
expect(filtered.length).toBe(1)
})
it('should handle no negations', () => {
const results = [{ text: 'Test' }]
const filtered = applyNegationFilter(results, [])
expect(filtered).toEqual(results)
})
})
// ============================================================================
// FILTER EXTRACTION TESTS
// ============================================================================
describe('Filter Extraction from Natural Language', () => {
describe('Person extraction', () => {
it('should extract "from John" pattern', () => {
const filters = extractFilters('emails from John about budget')
// The regex matches the first name after "from"
expect(filters.person).toBeDefined()
expect(filters.person.toLowerCase()).toContain('john')
})
it('should extract "from John Smith" pattern', () => {
const filters = extractFilters('messages from John Smith')
// Should capture at least the first name
expect(filters.person).toBeDefined()
expect(filters.person.toLowerCase()).toContain('john')
})
})
describe('Date extraction', () => {
it('should extract "yesterday"', () => {
const filters = extractFilters('emails from yesterday')
expect(filters.dateRange).toBe('yesterday')
})
it('should extract "last week"', () => {
const filters = extractFilters('messages from last week')
expect(filters.dateRange).toBe('last week')
})
it('should extract "last month"', () => {
const filters = extractFilters('calendar events from last month')
expect(filters.dateRange).toBe('last month')
})
})
})
// ============================================================================
// INTEGRATION WITH REAL DATA
// ============================================================================
describe.skipIf(!indexExists)('Advanced Features with Real Data', () => {
beforeAll(async () => {
if (indexExists) {
db = await connect(DB_PATH)
}
loadContacts()
})
it('should find different results for different query expansions', async () => {
const variants = expandQuery('meeting about budget')
if (variants.length > 1) {
const results1 = await searchTable('emails', variants[0], 10)
const results2 = await searchTable('emails', variants[1] || variants[0], 10)
console.log(` → Variant 1 "${variants[0]}": ${results1.length} results`)
console.log(` → Variant 2 "${variants[1] || 'same'}": ${results2.length} results`)
expect(Array.isArray(results1)).toBe(true)
expect(Array.isArray(results2)).toBe(true)
}
})
it('should apply negation filtering to real results', async () => {
const results = await searchTable('emails', 'meeting update', 20)
if (results.length > 0) {
const filtered = applyNegationFilter(results, ['cancelled', 'postponed'])
console.log(` → Before negation: ${results.length}, After: ${filtered.length}`)
expect(filtered.length).toBeLessThanOrEqual(results.length)
}
})
it('should merge results using RRF', async () => {
const query1Results = await searchTable('emails', 'meeting', 10)
const query2Results = await searchTable('emails', 'call', 10)
const merged = reciprocalRankFusion([query1Results, query2Results], 'filePath')
console.log(` → Set 1: ${query1Results.length}, Set 2: ${query2Results.length}, Merged: ${merged.length}`)
expect(merged.length).toBeLessThanOrEqual(query1Results.length + query2Results.length)
})
it('should assess quality of real search results', async () => {
const goodQuery = await searchTable('emails', 'invoice payment receipt', 10)
const badQuery = await searchTable('emails', 'xyznonexistentquery99999', 10)
const goodQuality = assessResultQuality(goodQuery)
const badQuality = assessResultQuality(badQuery)
console.log(` → Good query quality: ${goodQuality}`)
console.log(` → Bad query quality: ${badQuality}`)
// Vector search returns nearest neighbors even for nonsense queries
// Bad queries should have low confidence (high distance scores)
expect(['empty', 'low_confidence', 'sparse']).toContain(badQuality)
})
})
// ============================================================================
// CONTACT-BASED SEARCH TESTS
// ============================================================================
describe('Contact-Based Search Features', () => {
beforeAll(() => {
loadContacts()
})
it('should resolve contact names for person filters', () => {
const contacts = resolveByName('john')
console.log(` → Found ${contacts.length} contacts named "john"`)
expect(Array.isArray(contacts)).toBe(true)
})
it('should handle partial name matching', () => {
const contacts = resolveByName('jo')
expect(Array.isArray(contacts)).toBe(true)
})
it('should handle name with special characters', () => {
const contacts = resolveByName("O'Brien")
expect(Array.isArray(contacts)).toBe(true)
})
})