Skip to main content
Glama
search-logic.test.js20.4 kB
/** * Unit tests for search logic in search.js * Tests query expansion, negation parsing, keyword extraction, hybrid scoring, etc. */ import { describe, it, expect, vi, beforeEach } from 'vitest' // ============ QUERY EXPANSION ============ describe('Query Expansion', () => { // Reimplementing expandQuery for testing const expandQuery = (query) => { const expansions = [query] // Simplified (remove time modifiers) const simplified = query.replace(/\b(recently|last \w+|this \w+|next \w+|about|regarding)\b/gi, '').trim() if (simplified && simplified !== query && simplified.length > 3) { expansions.push(simplified) } // Synonym replacement const synonymMap = { 'meeting': ['call', 'sync', 'standup', 'discussion'], 'budget': ['financial', 'costs', 'expense', 'spending'], 'project': ['initiative', 'task', 'work', 'assignment'], 'deadline': ['due date', 'due', 'timeline', 'delivery'], 'review': ['feedback', 'evaluation', 'assessment', 'check'], 'invoice': ['bill', 'payment', 'receipt', 'charge'], 'schedule': ['calendar', 'appointment', 'booking'], 'update': ['status', 'progress', 'news'], 'help': ['assist', 'support', 'question'], 'issue': ['problem', 'bug', 'error', 'concern'] } for (const [word, syns] of Object.entries(synonymMap)) { if (query.toLowerCase().includes(word)) { expansions.push(query.replace(new RegExp(`\\b${word}\\b`, 'gi'), syns[0])) break } } return [...new Set(expansions)].slice(0, 3) } it('should include original query', () => { const result = expandQuery('team meeting') expect(result).toContain('team meeting') }) it('should create simplified version without time modifiers', () => { const result = expandQuery('meeting last week') expect(result.some(q => !q.includes('last week'))).toBe(true) }) it('should expand synonyms for meeting', () => { const result = expandQuery('team meeting') expect(result.some(q => q.includes('call'))).toBe(true) }) it('should expand synonyms for budget', () => { const result = expandQuery('budget report') expect(result.some(q => q.includes('financial'))).toBe(true) }) it('should expand synonyms for invoice', () => { const result = expandQuery('invoice from vendor') expect(result.some(q => q.includes('bill'))).toBe(true) }) it('should limit to 3 variants', () => { const result = expandQuery('meeting about budget recently') expect(result.length).toBeLessThanOrEqual(3) }) it('should deduplicate expansions', () => { const result = expandQuery('simple query') const unique = [...new Set(result)] expect(result.length).toBe(unique.length) }) }) // ============ NEGATION PARSING ============ describe('Negation Parsing', () => { const parseNegation = (query) => { const negations = [] const negationPatterns = [ /\bNOT\s+(\w+)/gi, /\s-(\w+)/g, /\bwithout\s+(\w+)/gi, /\bexcluding?\s+(\w+)/gi ] let cleanQuery = query for (const pattern of negationPatterns) { let match while ((match = pattern.exec(query)) !== null) { negations.push(match[1].toLowerCase()) } cleanQuery = cleanQuery.replace(pattern, ' ') } return { cleanQuery: cleanQuery.replace(/\s+/g, ' ').trim(), negations: [...new Set(negations)] } } it('should parse NOT keyword', () => { const result = parseNegation('meeting NOT weekly') expect(result.negations).toContain('weekly') expect(result.cleanQuery).toBe('meeting') }) it('should parse minus sign prefix', () => { const result = parseNegation('email -spam') expect(result.negations).toContain('spam') }) it('should parse without keyword', () => { const result = parseNegation('documents without attachments') expect(result.negations).toContain('attachments') }) it('should parse excluding keyword', () => { const result = parseNegation('messages excluding newsletters') expect(result.negations).toContain('newsletters') }) it('should handle multiple negations', () => { const result = parseNegation('meeting NOT weekly -standup without zoom') expect(result.negations).toContain('weekly') expect(result.negations).toContain('standup') expect(result.negations).toContain('zoom') }) it('should deduplicate negations', () => { const result = parseNegation('meeting NOT spam -spam') expect(result.negations.filter(n => n === 'spam').length).toBe(1) }) it('should clean up query whitespace', () => { const result = parseNegation('meeting NOT weekly') expect(result.cleanQuery).toBe('meeting') }) }) // ============ KEYWORD EXTRACTION ============ describe('Keyword Extraction', () => { const stopWords = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'any', 'both', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'it', 'its', 'my', 'your', 'his', 'her', 'our', 'their', 'me', 'him', 'them', 'us', 'i', 'you', 'we']) const extractKeywords = (query) => { return query.toLowerCase() .replace(/[^\w\s]/g, ' ') .split(/\s+/) .filter(w => w.length > 2 && !stopWords.has(w)) } it('should extract significant words', () => { const result = extractKeywords('meeting with John about budget') expect(result).toContain('meeting') expect(result).toContain('john') expect(result).toContain('budget') }) it('should filter out stop words', () => { const result = extractKeywords('the meeting with a team') expect(result).not.toContain('the') expect(result).not.toContain('with') expect(result).not.toContain('a') }) it('should filter out short words', () => { const result = extractKeywords('go to the meeting') expect(result).not.toContain('go') expect(result).not.toContain('to') }) it('should lowercase keywords', () => { const result = extractKeywords('Meeting with JOHN') expect(result).toContain('meeting') expect(result).toContain('john') }) it('should handle punctuation', () => { const result = extractKeywords('email, phone, and messages!') expect(result).toContain('email') expect(result).toContain('phone') expect(result).toContain('messages') }) }) // ============ KEYWORD MATCHING ============ describe('Keyword Matching', () => { const keywordMatch = (text, keywords) => { if (!text || !keywords || keywords.length === 0) return 0 const textLower = text.toLowerCase() let matches = 0 let totalWeight = 0 for (const kw of keywords) { const kwLower = kw.toLowerCase() const wordBoundary = new RegExp(`\\b${kwLower}\\b`, 'i') if (wordBoundary.test(text)) { matches += 1.0 // Exact word match } else if (textLower.includes(kwLower)) { matches += 0.5 // Partial match } totalWeight += 1 } return totalWeight > 0 ? matches / totalWeight : 0 } it('should return 1.0 for exact single keyword match', () => { const score = keywordMatch('Team meeting tomorrow', ['meeting']) expect(score).toBe(1.0) }) it('should return 0.5 for partial match', () => { const score = keywordMatch('Meetings are scheduled', ['meeting']) // "meeting" is contained in "Meetings" but not exact word boundary // Actually "Meetings" contains "meeting" as substring, score = 0.5 expect(score).toBe(0.5) }) it('should return 0 for no match', () => { const score = keywordMatch('Team discussion tomorrow', ['meeting']) expect(score).toBe(0) }) it('should average scores for multiple keywords', () => { const score = keywordMatch('Team meeting about budget', ['meeting', 'budget']) expect(score).toBe(1.0) // Both exact matches }) it('should handle mixed matches', () => { const score = keywordMatch('Team meeting', ['meeting', 'budget']) expect(score).toBe(0.5) // 1 match out of 2 }) it('should return 0 for empty inputs', () => { expect(keywordMatch('', ['meeting'])).toBe(0) expect(keywordMatch('text', [])).toBe(0) expect(keywordMatch(null, ['meeting'])).toBe(0) }) }) // ============ FILTER EXTRACTION FROM QUERY ============ describe('Filter Extraction from Natural Language', () => { const extractFiltersFromQuery = (query) => { const filters = {} const q = query.toLowerCase() // Extract person names const safeQuery = query.length > 500 ? query.substring(0, 500) : query const personPatterns = [ /(?:from|with|to)\s+([A-Z][a-z]{1,20}(?:\s[A-Z][a-z]{1,20})?)/, /([A-Z][a-z]{1,20}(?:\s[A-Z][a-z]{1,20})?)\s+(?:said|sent|wrote|messaged|texted|emailed)/, /(?:emails?|messages?|texts?|calls?)\s+(?:from|to|with)\s+([A-Z][a-z]{1,20})/i ] for (const pattern of personPatterns) { const match = safeQuery.match(pattern) if (match && match[1] && match[1].length > 2) { filters.person = match[1] break } } // Extract date ranges const datePatterns = { 'yesterday': 1, 'last week': 7, 'this week': 7, 'last month': 30, 'this month': 30, 'last few days': 3, 'past week': 7, 'past month': 30, 'recent': 7, 'recently': 7, 'today': 1 } for (const [phrase, days] of Object.entries(datePatterns)) { if (q.includes(phrase)) { filters.daysBack = days break } } // Extract "last N days" pattern const lastNDays = q.match(/last\s+(\d+)\s+days?/i) if (lastNDays) { filters.daysBack = parseInt(lastNDays[1], 10) } return filters } describe('Person extraction', () => { it('should extract "from John" pattern', () => { const filters = extractFiltersFromQuery('emails from John') expect(filters.person).toBe('John') }) it('should extract "from John Smith" pattern', () => { const filters = extractFiltersFromQuery('messages from John Smith') expect(filters.person).toBe('John Smith') }) it('should extract "John said" pattern', () => { const filters = extractFiltersFromQuery('what John said about the project') expect(filters.person).toBe('John') }) it('should extract "emails from John" pattern', () => { const filters = extractFiltersFromQuery('emails from Sarah') expect(filters.person).toBe('Sarah') }) }) describe('Date extraction', () => { it('should extract yesterday', () => { const filters = extractFiltersFromQuery('emails from yesterday') expect(filters.daysBack).toBe(1) }) it('should extract last week', () => { const filters = extractFiltersFromQuery('messages from last week') expect(filters.daysBack).toBe(7) }) it('should extract last month', () => { const filters = extractFiltersFromQuery('calendar events last month') expect(filters.daysBack).toBe(30) }) it('should extract "last N days" pattern', () => { const filters = extractFiltersFromQuery('emails from last 14 days') expect(filters.daysBack).toBe(14) }) it('should extract recently', () => { const filters = extractFiltersFromQuery('recent messages') expect(filters.daysBack).toBe(7) }) }) }) // ============ PRONOUN RESOLUTION ============ describe('Pronoun Resolution', () => { // Simulating context tracking let queryContext = { lastPerson: null, lastTimestamp: 0 } const CONTEXT_EXPIRY_MS = 5 * 60 * 1000 const resolvePronouns = (query, context) => { if (Date.now() - context.lastTimestamp > CONTEXT_EXPIRY_MS) { return query } const pronounPattern = /\b(they|them|their|he|him|his|she|her|hers)\b/gi if (pronounPattern.test(query) && context.lastPerson) { return query.replace(pronounPattern, context.lastPerson) } return query } beforeEach(() => { queryContext = { lastPerson: 'John', lastTimestamp: Date.now() } }) it('should replace "they" with last person', () => { const result = resolvePronouns('what did they say', queryContext) expect(result).toBe('what did John say') }) it('should replace "their" with last person', () => { const result = resolvePronouns('check their email', queryContext) expect(result).toBe('check John email') }) it('should replace "he/him" with last person', () => { const result = resolvePronouns('email from him', queryContext) expect(result).toBe('email from John') }) it('should not replace if context expired', () => { queryContext.lastTimestamp = Date.now() - (10 * 60 * 1000) // 10 minutes ago const result = resolvePronouns('what did they say', queryContext) expect(result).toBe('what did they say') }) it('should not replace if no last person', () => { queryContext.lastPerson = null const result = resolvePronouns('what did they say', queryContext) expect(result).toBe('what did they say') }) }) // ============ RESULT QUALITY ASSESSMENT ============ describe('Result Quality Assessment', () => { const MIN_CONFIDENCE_SCORE = 0.5 const assessResultQuality = (results) => { if (!results || results.length === 0) { return { quality: 'empty', shouldRetry: true } } const topScore = results[0]._distance ? (1 - results[0]._distance) : 0 if (topScore < MIN_CONFIDENCE_SCORE) { return { quality: 'low_confidence', shouldRetry: true, topScore } } if (results.length < 3 && topScore < 0.7) { return { quality: 'sparse', shouldRetry: true, topScore } } return { quality: 'good', shouldRetry: false, topScore } } it('should return empty for no results', () => { expect(assessResultQuality([])).toEqual({ quality: 'empty', shouldRetry: true }) expect(assessResultQuality(null)).toEqual({ quality: 'empty', shouldRetry: true }) }) it('should return low_confidence for poor top score', () => { const results = [{ _distance: 0.8 }] // score = 0.2 const quality = assessResultQuality(results) expect(quality.quality).toBe('low_confidence') expect(quality.shouldRetry).toBe(true) }) it('should return sparse for few results with moderate score', () => { const results = [{ _distance: 0.35 }, { _distance: 0.4 }] // scores 0.65, 0.6 const quality = assessResultQuality(results) expect(quality.quality).toBe('sparse') }) it('should return good for high-quality results', () => { const results = [ { _distance: 0.1 }, // score 0.9 { _distance: 0.2 }, { _distance: 0.3 } ] const quality = assessResultQuality(results) expect(quality.quality).toBe('good') expect(quality.shouldRetry).toBe(false) }) }) // ============ QUERY BROADENING ============ describe('Query Broadening', () => { const broadenQuery = (query) => { let broader = query.replace(/\b(recently|last \w+|this \w+|next \w+|yesterday|today|tomorrow)\b/gi, '') broader = broader.replace(/\b(about|regarding|concerning|from|to|with)\b/gi, '') broader = broader.replace(/\s+/g, ' ').trim() return broader.length > 3 ? broader : query } it('should remove time constraints', () => { expect(broadenQuery('meeting yesterday')).toBe('meeting') expect(broadenQuery('emails last week')).toBe('emails') expect(broadenQuery('events next month')).toBe('events') }) it('should remove prepositions', () => { expect(broadenQuery('meeting with John')).toBe('meeting John') expect(broadenQuery('email from Sarah')).toBe('email Sarah') }) it('should clean up whitespace', () => { expect(broadenQuery('meeting with John')).toBe('meeting John') }) it('should return original if result too short', () => { expect(broadenQuery('from me')).toBe('from me') }) }) // ============ RECIPROCAL RANK FUSION ============ describe('Reciprocal Rank Fusion (RRF)', () => { const RRF_K = 60 const reciprocalRankFusion = (resultSets, keyField) => { const scores = new Map() for (const results of resultSets) { for (let rank = 0; rank < results.length; rank++) { const doc = results[rank] const key = doc[keyField] if (!key) continue const rrfScore = 1 / (RRF_K + rank + 1) const existing = scores.get(key) if (existing) { existing.rrfScore += rrfScore if (doc._distance && (!existing.doc._distance || doc._distance < existing.doc._distance)) { existing.doc = doc } } else { scores.set(key, { doc, rrfScore }) } } } return Array.from(scores.values()) .sort((a, b) => b.rrfScore - a.rrfScore) .map(({ doc, rrfScore }) => ({ ...doc, _rrfScore: rrfScore })) } it('should merge results from multiple sets', () => { const set1 = [{ id: 'a', _distance: 0.1 }, { id: 'b', _distance: 0.2 }] const set2 = [{ id: 'b', _distance: 0.15 }, { id: 'c', _distance: 0.3 }] const merged = reciprocalRankFusion([set1, set2], 'id') expect(merged.length).toBe(3) // 'b' appears in both sets, should have higher RRF score const bResult = merged.find(r => r.id === 'b') const aResult = merged.find(r => r.id === 'a') expect(bResult._rrfScore).toBeGreaterThan(aResult._rrfScore) }) it('should sort by RRF score descending', () => { const set1 = [{ id: 'a' }, { id: 'b' }, { id: 'c' }] const set2 = [{ id: 'c' }, { id: 'a' }, { id: 'b' }] const merged = reciprocalRankFusion([set1, set2], 'id') // All items appear in both sets at different ranks // Verify sorted by _rrfScore for (let i = 0; i < merged.length - 1; i++) { expect(merged[i]._rrfScore).toBeGreaterThanOrEqual(merged[i + 1]._rrfScore) } }) it('should handle empty result sets', () => { const set1 = [{ id: 'a' }] const set2 = [] const merged = reciprocalRankFusion([set1, set2], 'id') expect(merged.length).toBe(1) }) it('should skip items without key field', () => { const set1 = [{ id: 'a' }, { noId: 'x' }] const merged = reciprocalRankFusion([set1], 'id') expect(merged.length).toBe(1) }) }) // ============ JUNK MAIL FILTERING ============ describe('Junk Mail Filtering', () => { const EXCLUDED_MAILBOXES = ['junk', 'trash', 'deleted messages', 'spam'] const excludeJunkMail = (results, includeJunk = false, explicitMailbox = null) => { if (includeJunk || explicitMailbox) return results return results.filter(r => !EXCLUDED_MAILBOXES.some(mb => (r.mailbox || "").toLowerCase().includes(mb) ) ) } it('should filter out junk mail by default', () => { const results = [ { mailbox: 'INBOX', subject: 'Important' }, { mailbox: 'Junk', subject: 'Spam offer' }, { mailbox: 'Archive', subject: 'Old email' } ] const filtered = excludeJunkMail(results) expect(filtered.length).toBe(2) expect(filtered.every(r => r.mailbox !== 'Junk')).toBe(true) }) it('should filter out trash', () => { const results = [ { mailbox: 'INBOX', subject: 'Important' }, { mailbox: 'Trash', subject: 'Deleted' } ] const filtered = excludeJunkMail(results) expect(filtered.length).toBe(1) }) it('should include junk when explicitly requested', () => { const results = [ { mailbox: 'INBOX', subject: 'Important' }, { mailbox: 'Junk', subject: 'Spam offer' } ] const filtered = excludeJunkMail(results, true) expect(filtered.length).toBe(2) }) it('should not filter when explicit mailbox specified', () => { const results = [ { mailbox: 'Junk', subject: 'Spam offer' } ] const filtered = excludeJunkMail(results, false, 'Junk') expect(filtered.length).toBe(1) }) })

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sfls1397/Apple-Tools-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server