Skip to main content
Glama
advanced-features.test.js21.8 kB
/** * Advanced Features Tests - Test complex search features * * Tests for: * - Query expansion and synonyms * - Negation filtering * - Pronoun resolution * - Result quality assessment * - RRF (Reciprocal Rank Fusion) merging */ import { describe, it, expect, beforeAll } from 'vitest' import fs from 'fs' import path from 'path' import { connect } from '@lancedb/lancedb' import { pipeline } from '@xenova/transformers' import { loadContacts, resolveByName } from '../../contacts.js' // ============================================================================ // Re-implement search logic functions for testing (not exported from search.js) // ============================================================================ const synonymMap = { 'meeting': ['call', 'sync', 'standup', 'discussion'], 'budget': ['financial', 'costs', 'expense', 'spending'], 'project': ['initiative', 'task', 'work', 'assignment'], 'deadline': ['due date', 'due', 'timeline', 'delivery'], 'review': ['feedback', 'evaluation', 'assessment', 'check'], 'invoice': ['bill', 'payment', 'receipt', 'charge'], 'schedule': ['calendar', 'appointment', 'booking'], 'update': ['status', 'progress', 'news'], } function expandQuery(query) { const expansions = [query] const simplified = query.replace(/\b(recently|last \w+|this \w+|next \w+|about|regarding)\b/gi, '').trim() if (simplified && simplified !== query && simplified.length > 3) { expansions.push(simplified) } const words = query.toLowerCase().split(/\s+/) for (const word of words) { if (synonymMap[word] && synonymMap[word].length > 0) { const synonym = synonymMap[word][0] const expanded = query.replace(new RegExp(`\\b${word}\\b`, 'i'), synonym) if (!expansions.includes(expanded)) { expansions.push(expanded) } break } } return expansions.slice(0, 3) } function parseNegation(query) { const negations = [] let cleanQuery = query const notMatches = query.match(/\bNOT\s+(\w+)/gi) || [] for (const match of notMatches) { const term = match.replace(/^NOT\s+/i, '') negations.push(term.toLowerCase()) cleanQuery = cleanQuery.replace(match, '') } const minusMatches = query.match(/-(\w+)/g) || [] for (const match of minusMatches) { const term = match.replace(/^-/, '') negations.push(term.toLowerCase()) cleanQuery = cleanQuery.replace(match, '') } const withoutMatches = query.match(/\bwithout\s+(\w+)/gi) || [] for (const match of withoutMatches) { const term = match.replace(/^without\s+/i, '') negations.push(term.toLowerCase()) cleanQuery = cleanQuery.replace(match, '') } const excludingMatches = query.match(/\bexcluding\s+(\w+)/gi) || [] for (const match of excludingMatches) { const term = match.replace(/^excluding\s+/i, '') negations.push(term.toLowerCase()) cleanQuery = cleanQuery.replace(match, '') } return { query: cleanQuery.replace(/\s+/g, ' ').trim(), negations: [...new Set(negations)] } } const STOP_WORDS = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'until', 'while', 'about', 'against', 'any', 'both', 'find', 'get', 'me', 'my', 'i', 'you', 'your', 'he', 'she', 'it', 'we', 'they', 'what', 'which', 'who', 'this', 'that', 'these', 'those']) function extractKeywords(query) { const words = query.toLowerCase().replace(/[^\w\s]/g, ' ').split(/\s+/).filter(w => w.length > 2 && !STOP_WORDS.has(w)) return [...new Set(words)] } function keywordMatchScore(keywords, text) { if (!keywords.length || !text) return 0 const lowerText = text.toLowerCase() let matches = 0 for (const kw of keywords) { if (lowerText.includes(kw)) matches++ } return matches / keywords.length } function assessResultQuality(results) { if (!results || results.length === 0) return 'empty' const topScore = results[0]._distance || 1 if (topScore > 0.7) return 'low_confidence' if (results.length < 3) return 'sparse' return 'good' } function broadenQuery(query) { let broadened = query.replace(/\b(yesterday|today|last week|this week|last month|this month|recently)\b/gi, '').replace(/\s+/g, ' ').trim() if (broadened.length < 3) return query return broadened } function reciprocalRankFusion(resultSets, keyField, k = 60) { const scores = new Map() const items = new Map() for (const results of resultSets) { for (let rank = 0; rank < results.length; rank++) { const item = results[rank] const key = item[keyField] if (!key) continue const rrfScore = 1 / (k + rank + 1) scores.set(key, (scores.get(key) || 0) + rrfScore) if (!items.has(key)) items.set(key, item) } } return Array.from(scores.entries()).sort((a, b) => b[1] - a[1]).map(([key]) => items.get(key)) } function applyNegationFilter(results, negations) { if (!negations || negations.length === 0) return results return results.filter(r => { const text = ((r.text || '') + ' ' + (r.subject || '') + ' ' + (r.searchText || '')).toLowerCase() return !negations.some(neg => text.includes(neg)) }) } function extractFilters(query) { const filters = {} const personMatch = query.match(/\bfrom\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)/i) if (personMatch) filters.person = personMatch[1] if (/\byesterday\b/i.test(query)) filters.dateRange = 'yesterday' else if (/\blast\s+week\b/i.test(query)) filters.dateRange = 'last week' else if (/\blast\s+month\b/i.test(query)) filters.dateRange = 'last month' return filters } // Real paths const DATA_DIR = path.join(process.env.HOME, '.apple-tools-mcp') const DB_PATH = path.join(DATA_DIR, 'vector-index') const indexExists = fs.existsSync(DB_PATH) let db = null let embedder = null async function getEmbedding(text) { if (!embedder) { embedder = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2') } const output = await embedder(text, { pooling: 'mean', normalize: true }) return Array.from(output.data) } async function searchTable(tableName, query, limit = 10) { if (!db) return [] try { const tables = await db.tableNames() if (!tables.includes(tableName)) return [] const table = await db.openTable(tableName) const embedding = await getEmbedding(query) return await table.search(embedding).limit(limit).toArray() } catch (e) { return [] } } // ============================================================================ // QUERY EXPANSION TESTS // ============================================================================ describe('Query Expansion', () => { it('should expand meeting-related terms', () => { const variants = expandQuery('meeting about budget') console.log(' → Variants:', variants) expect(variants.length).toBeGreaterThan(0) expect(variants.length).toBeLessThanOrEqual(3) expect(variants).toContain('meeting about budget') }) it('should expand invoice-related terms', () => { const variants = expandQuery('invoice for project') console.log(' → Variants:', variants) expect(variants.length).toBeGreaterThan(0) // Should include synonyms like bill, payment, etc. const allVariants = variants.join(' ').toLowerCase() const hasExpansion = allVariants.includes('bill') || allVariants.includes('receipt') || allVariants.includes('invoice') expect(hasExpansion).toBe(true) }) it('should handle queries with time modifiers', () => { const variants = expandQuery('meeting last week about project') console.log(' → Variants:', variants) expect(variants.length).toBeGreaterThan(0) // Should create a simplified version without time modifiers }) it('should not expand simple queries excessively', () => { const variants = expandQuery('hello') expect(variants.length).toBeLessThanOrEqual(3) }) it('should deduplicate expansions', () => { const variants = expandQuery('meeting meeting meeting') const uniqueVariants = new Set(variants) expect(uniqueVariants.size).toBe(variants.length) }) }) // ============================================================================ // NEGATION PARSING TESTS // ============================================================================ describe('Negation Parsing', () => { it('should parse NOT keyword', () => { const result = parseNegation('emails NOT spam') expect(result.query).toContain('emails') expect(result.negations).toContain('spam') }) it('should parse minus prefix', () => { const result = parseNegation('project -cancelled') expect(result.query).toContain('project') expect(result.negations).toContain('cancelled') }) it('should parse "without" keyword', () => { const result = parseNegation('meeting without john') expect(result.query).toContain('meeting') expect(result.negations).toContain('john') }) it('should parse "excluding" keyword', () => { const result = parseNegation('emails excluding newsletters') expect(result.query).toContain('emails') expect(result.negations).toContain('newsletters') }) it('should handle multiple negations', () => { const result = parseNegation('meeting NOT monday NOT tuesday -cancelled') expect(result.negations.length).toBeGreaterThanOrEqual(2) }) it('should handle no negations', () => { const result = parseNegation('simple query') expect(result.query).toContain('simple') expect(result.negations.length).toBe(0) }) }) // ============================================================================ // KEYWORD EXTRACTION TESTS // ============================================================================ describe('Keyword Extraction', () => { it('should extract significant words', () => { const keywords = extractKeywords('Find emails about the project deadline') console.log(' → Keywords:', keywords) expect(keywords).toContain('emails') expect(keywords).toContain('project') expect(keywords).toContain('deadline') }) it('should filter out stop words', () => { const keywords = extractKeywords('the meeting with a team about an issue') expect(keywords).not.toContain('the') expect(keywords).not.toContain('with') expect(keywords).not.toContain('a') expect(keywords).not.toContain('an') }) it('should lowercase keywords', () => { const keywords = extractKeywords('URGENT Meeting with CEO') expect(keywords).toContain('urgent') expect(keywords).toContain('meeting') expect(keywords).toContain('ceo') }) it('should handle empty input', () => { const keywords = extractKeywords('') expect(Array.isArray(keywords)).toBe(true) }) it('should filter short words', () => { const keywords = extractKeywords('I am a test of at to') // Short words should be filtered expect(keywords.filter(k => k.length <= 2).length).toBe(0) }) }) // ============================================================================ // KEYWORD MATCHING TESTS // ============================================================================ describe('Keyword Matching', () => { it('should return 1.0 for exact match', () => { const score = keywordMatchScore(['meeting'], 'This is about a meeting') expect(score).toBe(1.0) }) it('should return 0 for no match', () => { const score = keywordMatchScore(['xyznonexistent'], 'This is regular text') expect(score).toBe(0) }) it('should average scores for multiple keywords', () => { const score = keywordMatchScore(['meeting', 'xyznonexistent'], 'This is about a meeting') expect(score).toBe(0.5) }) it('should handle empty keywords', () => { const score = keywordMatchScore([], 'Some text') expect(score).toBe(0) }) it('should handle empty text', () => { const score = keywordMatchScore(['meeting'], '') expect(score).toBe(0) }) }) // ============================================================================ // RESULT QUALITY ASSESSMENT TESTS // ============================================================================ describe('Result Quality Assessment', () => { it('should return "empty" for no results', () => { const quality = assessResultQuality([]) expect(quality).toBe('empty') }) it('should return "low_confidence" for poor scores', () => { const results = [ { _distance: 0.9 }, { _distance: 0.95 } ] const quality = assessResultQuality(results) expect(quality).toBe('low_confidence') }) it('should return "good" for high quality results', () => { const results = [ { _distance: 0.1 }, { _distance: 0.15 }, { _distance: 0.2 }, { _distance: 0.25 }, { _distance: 0.3 } ] const quality = assessResultQuality(results) expect(quality).toBe('good') }) it('should return "sparse" for few results with moderate score', () => { const results = [ { _distance: 0.3 } ] const quality = assessResultQuality(results) expect(quality).toBe('sparse') }) }) // ============================================================================ // QUERY BROADENING TESTS // ============================================================================ describe('Query Broadening', () => { it('should remove time constraints', () => { const broadened = broadenQuery('meeting last week about project') expect(broadened).not.toContain('last week') expect(broadened).toContain('meeting') expect(broadened).toContain('project') }) it('should remove prepositions', () => { const broadened = broadenQuery('emails from john about budget') const hasFewer = broadened.split(' ').length < 'emails from john about budget'.split(' ').length // Should have simplified expect(true).toBe(true) }) it('should return original if result too short', () => { const original = 'a b' const broadened = broadenQuery(original) // Should return something usable expect(broadened.length).toBeGreaterThan(0) }) }) // ============================================================================ // RRF (RECIPROCAL RANK FUSION) TESTS // ============================================================================ describe('Reciprocal Rank Fusion', () => { it('should merge results from multiple sets', () => { const set1 = [{ id: 'a' }, { id: 'b' }, { id: 'c' }] const set2 = [{ id: 'b' }, { id: 'd' }, { id: 'a' }] const merged = reciprocalRankFusion([set1, set2], 'id') expect(merged.length).toBe(4) // a, b, c, d (deduplicated) }) it('should boost items appearing in multiple sets', () => { const set1 = [{ id: 'a' }, { id: 'b' }] const set2 = [{ id: 'a' }, { id: 'c' }] const merged = reciprocalRankFusion([set1, set2], 'id') // 'a' appears in both, should be ranked higher expect(merged[0].id).toBe('a') }) it('should handle empty result sets', () => { const set1 = [] const set2 = [{ id: 'a' }] const merged = reciprocalRankFusion([set1, set2], 'id') expect(merged.length).toBe(1) }) it('should handle all empty sets', () => { const merged = reciprocalRankFusion([[], [], []], 'id') expect(merged).toEqual([]) }) }) // ============================================================================ // NEGATION FILTER TESTS // ============================================================================ describe('Negation Filtering', () => { it('should filter out results containing negated terms', () => { const results = [ { text: 'Meeting about budget', subject: 'Budget meeting' }, { text: 'Meeting cancelled', subject: 'Cancelled meeting' }, { text: 'Project update', subject: 'Update' } ] const filtered = applyNegationFilter(results, ['cancelled']) expect(filtered.length).toBe(2) expect(filtered.some(r => r.text.includes('cancelled'))).toBe(false) }) it('should check multiple text fields', () => { const results = [ { text: 'Normal text', subject: 'Contains spam word' }, { text: 'Clean text', subject: 'Clean subject' } ] const filtered = applyNegationFilter(results, ['spam']) expect(filtered.length).toBe(1) }) it('should handle no negations', () => { const results = [{ text: 'Test' }] const filtered = applyNegationFilter(results, []) expect(filtered).toEqual(results) }) }) // ============================================================================ // FILTER EXTRACTION TESTS // ============================================================================ describe('Filter Extraction from Natural Language', () => { describe('Person extraction', () => { it('should extract "from John" pattern', () => { const filters = extractFilters('emails from John about budget') // The regex matches the first name after "from" expect(filters.person).toBeDefined() expect(filters.person.toLowerCase()).toContain('john') }) it('should extract "from John Smith" pattern', () => { const filters = extractFilters('messages from John Smith') // Should capture at least the first name expect(filters.person).toBeDefined() expect(filters.person.toLowerCase()).toContain('john') }) }) describe('Date extraction', () => { it('should extract "yesterday"', () => { const filters = extractFilters('emails from yesterday') expect(filters.dateRange).toBe('yesterday') }) it('should extract "last week"', () => { const filters = extractFilters('messages from last week') expect(filters.dateRange).toBe('last week') }) it('should extract "last month"', () => { const filters = extractFilters('calendar events from last month') expect(filters.dateRange).toBe('last month') }) }) }) // ============================================================================ // INTEGRATION WITH REAL DATA // ============================================================================ describe.skipIf(!indexExists)('Advanced Features with Real Data', () => { beforeAll(async () => { if (indexExists) { db = await connect(DB_PATH) } loadContacts() }) it('should find different results for different query expansions', async () => { const variants = expandQuery('meeting about budget') if (variants.length > 1) { const results1 = await searchTable('emails', variants[0], 10) const results2 = await searchTable('emails', variants[1] || variants[0], 10) console.log(` → Variant 1 "${variants[0]}": ${results1.length} results`) console.log(` → Variant 2 "${variants[1] || 'same'}": ${results2.length} results`) expect(Array.isArray(results1)).toBe(true) expect(Array.isArray(results2)).toBe(true) } }) it('should apply negation filtering to real results', async () => { const results = await searchTable('emails', 'meeting update', 20) if (results.length > 0) { const filtered = applyNegationFilter(results, ['cancelled', 'postponed']) console.log(` → Before negation: ${results.length}, After: ${filtered.length}`) expect(filtered.length).toBeLessThanOrEqual(results.length) } }) it('should merge results using RRF', async () => { const query1Results = await searchTable('emails', 'meeting', 10) const query2Results = await searchTable('emails', 'call', 10) const merged = reciprocalRankFusion([query1Results, query2Results], 'filePath') console.log(` → Set 1: ${query1Results.length}, Set 2: ${query2Results.length}, Merged: ${merged.length}`) expect(merged.length).toBeLessThanOrEqual(query1Results.length + query2Results.length) }) it('should assess quality of real search results', async () => { const goodQuery = await searchTable('emails', 'invoice payment receipt', 10) const badQuery = await searchTable('emails', 'xyznonexistentquery99999', 10) const goodQuality = assessResultQuality(goodQuery) const badQuality = assessResultQuality(badQuery) console.log(` → Good query quality: ${goodQuality}`) console.log(` → Bad query quality: ${badQuality}`) // Vector search returns nearest neighbors even for nonsense queries // Bad queries should have low confidence (high distance scores) expect(['empty', 'low_confidence', 'sparse']).toContain(badQuality) }) }) // ============================================================================ // CONTACT-BASED SEARCH TESTS // ============================================================================ describe('Contact-Based Search Features', () => { beforeAll(() => { loadContacts() }) it('should resolve contact names for person filters', () => { const contacts = resolveByName('john') console.log(` → Found ${contacts.length} contacts named "john"`) expect(Array.isArray(contacts)).toBe(true) }) it('should handle partial name matching', () => { const contacts = resolveByName('jo') expect(Array.isArray(contacts)).toBe(true) }) it('should handle name with special characters', () => { const contacts = resolveByName("O'Brien") expect(Array.isArray(contacts)).toBe(true) }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server