Skip to main content
Glama
searchEngine.ts11.5 kB
/** * Search engine for session notes with filtering and relevance scoring */ import { SearchFilters, SearchResult, SessionNote, } from '../types/session.js'; import { loadNoteMetadata, readNote, getAllNoteFiles } from './storage.js'; import { logger } from '../utils/logger.js'; import { escapeRegex } from '../utils/validation.js'; /** * Search notes with filters and return sorted results * Supports both text search and similarity search (or combination) */ export async function searchNotes( notesDir: string, filters: SearchFilters ): Promise<SearchResult[]> { const results: SearchResult[] = []; // Load base note for similarity comparison if provided let baseNote: SessionNote | null = null; if (filters.similarTo) { const baseMetadata = await loadNoteMetadata(filters.similarTo); if (baseMetadata) { baseNote = { summary: baseMetadata.summary || '', timestamp: baseMetadata.timestamp || new Date().toISOString(), projectName: baseMetadata.projectName, topic: baseMetadata.topic, tags: baseMetadata.tags, analysis: baseMetadata.analysis, }; } } // Get all note files const noteFiles = await getAllNoteFiles(notesDir); for (const filePath of noteFiles) { try { // Skip the base note itself in similarity search if (filters.similarTo && filePath === filters.similarTo) { continue; } // First check metadata for fast filtering const metadata = await loadNoteMetadata(filePath); // Skip if metadata couldn't be loaded if (!metadata) { continue; } // Apply filters to metadata first if (!matchesMetadataFilters(metadata, filters)) { continue; } // If query is provided, need to load full note for text search if (filters.query) { // For query search, we need the full markdown content const markdownContent = await readNote(filePath); if (!markdownContent.toLowerCase().includes(filters.query.toLowerCase())) { continue; } } // Apply full filters including query if (!matchesFilters(metadata, filters)) { continue; } // Convert metadata to SessionNote for scoring const sessionNote: SessionNote = { summary: metadata.summary || '', timestamp: metadata.timestamp || new Date().toISOString(), projectName: metadata.projectName, topic: metadata.topic, tags: metadata.tags, analysis: metadata.analysis, }; // Calculate combined score (text relevance + similarity) let relevanceScore = 0; let matchedTags: string[] | undefined; if (filters.similarTo && baseNote) { // Similarity mode: weighted combination const textScore = filters.query ? calculateRelevance(metadata, filters) : 0; const simResult = calculateSimilarity(baseNote, sessionNote); // 60% text relevance, 40% similarity (or 100% similarity if no query) relevanceScore = filters.query ? (textScore * 0.6) + (simResult.similarityPercentage * 0.4) : simResult.similarityPercentage; matchedTags = simResult.matchedTags; } else { // Text search only relevanceScore = calculateRelevance(metadata, filters); } results.push({ filePath, note: sessionNote, relevanceScore, matchedTags, }); } catch (error) { // Skip corrupted files logger.warn(`Failed to process ${filePath}`, { error: String(error) }); continue; } } // Sort by relevance score (highest first) results.sort((a, b) => b.relevanceScore - a.relevanceScore); return results; } /** * Get recent notes (most recent first) */ export async function getRecentNotes( notesDir: string, limit: number = 10 ): Promise<SessionNote[]> { const noteFiles = await getAllNoteFiles(notesDir); const notes: { note: SessionNote; timestamp: Date }[] = []; for (const filePath of noteFiles) { try { const metadata = await loadNoteMetadata(filePath); // Skip if metadata couldn't be loaded if (!metadata || !metadata.timestamp) { continue; } // Convert metadata to SessionNote const sessionNote: SessionNote = { summary: metadata.summary || '', timestamp: metadata.timestamp, projectName: metadata.projectName, topic: metadata.topic, tags: metadata.tags, analysis: metadata.analysis, }; notes.push({ note: sessionNote, timestamp: new Date(metadata.timestamp), }); } catch (error) { logger.warn(`Failed to load ${filePath}`, { error: String(error) }); continue; } } // Sort by timestamp (most recent first) notes.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime()); // Return top N return notes.slice(0, limit).map((item) => item.note); } /** * Check if metadata matches filters (fast filter) */ function matchesMetadataFilters( note: Partial<SessionNote>, filters: SearchFilters ): boolean { // Project filter if (filters.projectName && note.projectName !== filters.projectName) { return false; } // Pattern filter if ( filters.pattern && note.analysis?.pattern && note.analysis.pattern !== filters.pattern ) { return false; } // Complexity filter if ( filters.complexity && note.analysis?.complexity && note.analysis.complexity !== filters.complexity ) { return false; } // Date range filter if (filters.startDate || filters.endDate) { if (!note.timestamp) { return false; } const noteDate = new Date(note.timestamp); if (filters.startDate && noteDate < new Date(filters.startDate)) { return false; } if (filters.endDate && noteDate > new Date(filters.endDate)) { return false; } } // Tag filter - note must have at least one of the specified tags if (filters.tags && filters.tags.length > 0) { if (!note.tags || note.tags.length === 0) { return false; } const hasMatchingTag = filters.tags.some((filterTag) => note.tags!.includes(filterTag) ); if (!hasMatchingTag) { return false; } } return true; } /** * Check if note matches all filters including query */ function matchesFilters(note: Partial<SessionNote>, filters: SearchFilters): boolean { // First check metadata filters if (!matchesMetadataFilters(note, filters)) { return false; } // Query filter (text search) if (filters.query) { const query = filters.query.toLowerCase(); const searchableText = getSearchableText(note).toLowerCase(); if (!searchableText.includes(query)) { return false; } } return true; } /** * Get all searchable text from a note */ function getSearchableText(note: Partial<SessionNote>): string { const parts: string[] = []; parts.push(note.summary || ''); parts.push(note.projectName || ''); parts.push(note.topic || ''); if (note.tags) { parts.push(...note.tags); } if (note.fileChanges) { for (const change of note.fileChanges) { parts.push(change.path); parts.push(change.description || ''); } } if (note.commands) { for (const cmd of note.commands) { parts.push(cmd.command); parts.push(cmd.description || ''); } } if (note.codeSnippets) { for (const snippet of note.codeSnippets) { parts.push(snippet.description || ''); parts.push(snippet.code); } } return parts.join(' '); } /** * Calculate relevance score for a note * Higher score = more relevant */ function calculateRelevance(note: Partial<SessionNote>, filters: SearchFilters): number { let score = 0; // Base score for all results score += 10; // Boost for exact project match if (filters.projectName && note.projectName === filters.projectName) { score += 50; } // Boost for tag matches if (filters.tags && note.tags) { const matchingTags = filters.tags.filter((tag) => note.tags!.includes(tag)); score += matchingTags.length * 20; } // Boost for pattern match if (filters.pattern && note.analysis?.pattern === filters.pattern) { score += 30; } // Boost for complexity match if (filters.complexity && note.analysis?.complexity === filters.complexity) { score += 20; } // Query relevance boost if (filters.query) { const query = filters.query.toLowerCase(); const searchableText = getSearchableText(note).toLowerCase(); // Count occurrences (escape regex to prevent ReDoS attacks) const occurrences = (searchableText.match(new RegExp(escapeRegex(query), 'g')) || []) .length; score += occurrences * 15; // Boost if in summary (more important) if (note.summary && note.summary.toLowerCase().includes(query)) { score += 40; } // Boost if in topic if (note.topic && note.topic.toLowerCase().includes(query)) { score += 30; } } // Recency boost (newer = slightly higher) if (note.timestamp) { const daysSinceCreation = (Date.now() - new Date(note.timestamp).getTime()) / (1000 * 60 * 60 * 24); const recencyBoost = Math.max(0, 10 - daysSinceCreation / 10); score += recencyBoost; } return score; } /** * Calculate similarity between two sessions using weighted Jaccard similarity * Returns similarity percentage (0-100) and matched tags */ function calculateSimilarity( session1: SessionNote, session2: SessionNote ): { similarityPercentage: number; matchedTags: string[] } { let totalScore = 0; let maxScore = 0; // Factor 1: Tags similarity (40% weight) const tagSimilarity = jaccardSimilarity( session1.tags || [], session2.tags || [] ); totalScore += tagSimilarity * 40; maxScore += 40; // Track matched tags for result const matchedTags = intersection(session1.tags || [], session2.tags || []); // Factor 2: Pattern similarity (30% weight) if (session1.analysis?.pattern && session2.analysis?.pattern) { const patternMatch = session1.analysis.pattern === session2.analysis.pattern ? 1 : 0; totalScore += patternMatch * 30; } maxScore += 30; // Factor 3: Project similarity (30% weight) if (session1.projectName && session2.projectName) { const projectMatch = session1.projectName === session2.projectName ? 1 : 0; totalScore += projectMatch * 30; } maxScore += 30; // Calculate final percentage const similarityPercentage = maxScore > 0 ? (totalScore / maxScore) * 100 : 0; return { similarityPercentage: Math.round(similarityPercentage), matchedTags, }; } /** * Calculate Jaccard similarity coefficient for two sets * J(A,B) = |A ∩ B| / |A ∪ B| * Returns value between 0 and 1 */ function jaccardSimilarity(set1: string[], set2: string[]): number { if (set1.length === 0 && set2.length === 0) { return 0; } const intersectionSize = intersection(set1, set2).length; const unionSize = union(set1, set2).length; return unionSize > 0 ? intersectionSize / unionSize : 0; } /** * Get intersection of two arrays */ function intersection<T>(arr1: T[], arr2: T[]): T[] { const set2 = new Set(arr2); return arr1.filter((item) => set2.has(item)); } /** * Get union of two arrays (unique items) */ function union<T>(arr1: T[], arr2: T[]): T[] { return Array.from(new Set([...arr1, ...arr2])); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/VoCoufi/second-brain-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server