/**
* Search engine for session notes with filtering and relevance scoring
*/
import {
SearchFilters,
SearchResult,
SessionNote,
} from '../types/session.js';
import { loadNoteMetadata, readNote, getAllNoteFiles } from './storage.js';
import { logger } from '../utils/logger.js';
import { escapeRegex } from '../utils/validation.js';
/**
* Search notes with filters and return sorted results
* Supports both text search and similarity search (or combination)
*/
export async function searchNotes(
notesDir: string,
filters: SearchFilters
): Promise<SearchResult[]> {
const results: SearchResult[] = [];
// Load base note for similarity comparison if provided
let baseNote: SessionNote | null = null;
if (filters.similarTo) {
const baseMetadata = await loadNoteMetadata(filters.similarTo);
if (baseMetadata) {
baseNote = {
summary: baseMetadata.summary || '',
timestamp: baseMetadata.timestamp || new Date().toISOString(),
projectName: baseMetadata.projectName,
topic: baseMetadata.topic,
tags: baseMetadata.tags,
analysis: baseMetadata.analysis,
};
}
}
// Get all note files
const noteFiles = await getAllNoteFiles(notesDir);
for (const filePath of noteFiles) {
try {
// Skip the base note itself in similarity search
if (filters.similarTo && filePath === filters.similarTo) {
continue;
}
// First check metadata for fast filtering
const metadata = await loadNoteMetadata(filePath);
// Skip if metadata couldn't be loaded
if (!metadata) {
continue;
}
// Apply filters to metadata first
if (!matchesMetadataFilters(metadata, filters)) {
continue;
}
// If query is provided, need to load full note for text search
if (filters.query) {
// For query search, we need the full markdown content
const markdownContent = await readNote(filePath);
if (!markdownContent.toLowerCase().includes(filters.query.toLowerCase())) {
continue;
}
}
// Apply full filters including query
if (!matchesFilters(metadata, filters)) {
continue;
}
// Convert metadata to SessionNote for scoring
const sessionNote: SessionNote = {
summary: metadata.summary || '',
timestamp: metadata.timestamp || new Date().toISOString(),
projectName: metadata.projectName,
topic: metadata.topic,
tags: metadata.tags,
analysis: metadata.analysis,
};
// Calculate combined score (text relevance + similarity)
let relevanceScore = 0;
let matchedTags: string[] | undefined;
if (filters.similarTo && baseNote) {
// Similarity mode: weighted combination
const textScore = filters.query ? calculateRelevance(metadata, filters) : 0;
const simResult = calculateSimilarity(baseNote, sessionNote);
// 60% text relevance, 40% similarity (or 100% similarity if no query)
relevanceScore = filters.query
? (textScore * 0.6) + (simResult.similarityPercentage * 0.4)
: simResult.similarityPercentage;
matchedTags = simResult.matchedTags;
} else {
// Text search only
relevanceScore = calculateRelevance(metadata, filters);
}
results.push({
filePath,
note: sessionNote,
relevanceScore,
matchedTags,
});
} catch (error) {
// Skip corrupted files
logger.warn(`Failed to process ${filePath}`, { error: String(error) });
continue;
}
}
// Sort by relevance score (highest first)
results.sort((a, b) => b.relevanceScore - a.relevanceScore);
return results;
}
/**
* Get recent notes (most recent first)
*/
export async function getRecentNotes(
notesDir: string,
limit: number = 10
): Promise<SessionNote[]> {
const noteFiles = await getAllNoteFiles(notesDir);
const notes: { note: SessionNote; timestamp: Date }[] = [];
for (const filePath of noteFiles) {
try {
const metadata = await loadNoteMetadata(filePath);
// Skip if metadata couldn't be loaded
if (!metadata || !metadata.timestamp) {
continue;
}
// Convert metadata to SessionNote
const sessionNote: SessionNote = {
summary: metadata.summary || '',
timestamp: metadata.timestamp,
projectName: metadata.projectName,
topic: metadata.topic,
tags: metadata.tags,
analysis: metadata.analysis,
};
notes.push({
note: sessionNote,
timestamp: new Date(metadata.timestamp),
});
} catch (error) {
logger.warn(`Failed to load ${filePath}`, { error: String(error) });
continue;
}
}
// Sort by timestamp (most recent first)
notes.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
// Return top N
return notes.slice(0, limit).map((item) => item.note);
}
/**
* Check if metadata matches filters (fast filter)
*/
function matchesMetadataFilters(
note: Partial<SessionNote>,
filters: SearchFilters
): boolean {
// Project filter
if (filters.projectName && note.projectName !== filters.projectName) {
return false;
}
// Pattern filter
if (
filters.pattern &&
note.analysis?.pattern &&
note.analysis.pattern !== filters.pattern
) {
return false;
}
// Complexity filter
if (
filters.complexity &&
note.analysis?.complexity &&
note.analysis.complexity !== filters.complexity
) {
return false;
}
// Date range filter
if (filters.startDate || filters.endDate) {
if (!note.timestamp) {
return false;
}
const noteDate = new Date(note.timestamp);
if (filters.startDate && noteDate < new Date(filters.startDate)) {
return false;
}
if (filters.endDate && noteDate > new Date(filters.endDate)) {
return false;
}
}
// Tag filter - note must have at least one of the specified tags
if (filters.tags && filters.tags.length > 0) {
if (!note.tags || note.tags.length === 0) {
return false;
}
const hasMatchingTag = filters.tags.some((filterTag) =>
note.tags!.includes(filterTag)
);
if (!hasMatchingTag) {
return false;
}
}
return true;
}
/**
* Check if note matches all filters including query
*/
function matchesFilters(note: Partial<SessionNote>, filters: SearchFilters): boolean {
// First check metadata filters
if (!matchesMetadataFilters(note, filters)) {
return false;
}
// Query filter (text search)
if (filters.query) {
const query = filters.query.toLowerCase();
const searchableText = getSearchableText(note).toLowerCase();
if (!searchableText.includes(query)) {
return false;
}
}
return true;
}
/**
* Get all searchable text from a note
*/
function getSearchableText(note: Partial<SessionNote>): string {
const parts: string[] = [];
parts.push(note.summary || '');
parts.push(note.projectName || '');
parts.push(note.topic || '');
if (note.tags) {
parts.push(...note.tags);
}
if (note.fileChanges) {
for (const change of note.fileChanges) {
parts.push(change.path);
parts.push(change.description || '');
}
}
if (note.commands) {
for (const cmd of note.commands) {
parts.push(cmd.command);
parts.push(cmd.description || '');
}
}
if (note.codeSnippets) {
for (const snippet of note.codeSnippets) {
parts.push(snippet.description || '');
parts.push(snippet.code);
}
}
return parts.join(' ');
}
/**
* Calculate relevance score for a note
* Higher score = more relevant
*/
function calculateRelevance(note: Partial<SessionNote>, filters: SearchFilters): number {
let score = 0;
// Base score for all results
score += 10;
// Boost for exact project match
if (filters.projectName && note.projectName === filters.projectName) {
score += 50;
}
// Boost for tag matches
if (filters.tags && note.tags) {
const matchingTags = filters.tags.filter((tag) => note.tags!.includes(tag));
score += matchingTags.length * 20;
}
// Boost for pattern match
if (filters.pattern && note.analysis?.pattern === filters.pattern) {
score += 30;
}
// Boost for complexity match
if (filters.complexity && note.analysis?.complexity === filters.complexity) {
score += 20;
}
// Query relevance boost
if (filters.query) {
const query = filters.query.toLowerCase();
const searchableText = getSearchableText(note).toLowerCase();
// Count occurrences (escape regex to prevent ReDoS attacks)
const occurrences = (searchableText.match(new RegExp(escapeRegex(query), 'g')) || [])
.length;
score += occurrences * 15;
// Boost if in summary (more important)
if (note.summary && note.summary.toLowerCase().includes(query)) {
score += 40;
}
// Boost if in topic
if (note.topic && note.topic.toLowerCase().includes(query)) {
score += 30;
}
}
// Recency boost (newer = slightly higher)
if (note.timestamp) {
const daysSinceCreation =
(Date.now() - new Date(note.timestamp).getTime()) / (1000 * 60 * 60 * 24);
const recencyBoost = Math.max(0, 10 - daysSinceCreation / 10);
score += recencyBoost;
}
return score;
}
/**
* Calculate similarity between two sessions using weighted Jaccard similarity
* Returns similarity percentage (0-100) and matched tags
*/
function calculateSimilarity(
session1: SessionNote,
session2: SessionNote
): { similarityPercentage: number; matchedTags: string[] } {
let totalScore = 0;
let maxScore = 0;
// Factor 1: Tags similarity (40% weight)
const tagSimilarity = jaccardSimilarity(
session1.tags || [],
session2.tags || []
);
totalScore += tagSimilarity * 40;
maxScore += 40;
// Track matched tags for result
const matchedTags = intersection(session1.tags || [], session2.tags || []);
// Factor 2: Pattern similarity (30% weight)
if (session1.analysis?.pattern && session2.analysis?.pattern) {
const patternMatch =
session1.analysis.pattern === session2.analysis.pattern ? 1 : 0;
totalScore += patternMatch * 30;
}
maxScore += 30;
// Factor 3: Project similarity (30% weight)
if (session1.projectName && session2.projectName) {
const projectMatch = session1.projectName === session2.projectName ? 1 : 0;
totalScore += projectMatch * 30;
}
maxScore += 30;
// Calculate final percentage
const similarityPercentage = maxScore > 0 ? (totalScore / maxScore) * 100 : 0;
return {
similarityPercentage: Math.round(similarityPercentage),
matchedTags,
};
}
/**
* Calculate Jaccard similarity coefficient for two sets
* J(A,B) = |A ∩ B| / |A ∪ B|
* Returns value between 0 and 1
*/
function jaccardSimilarity(set1: string[], set2: string[]): number {
if (set1.length === 0 && set2.length === 0) {
return 0;
}
const intersectionSize = intersection(set1, set2).length;
const unionSize = union(set1, set2).length;
return unionSize > 0 ? intersectionSize / unionSize : 0;
}
/**
* Get intersection of two arrays
*/
function intersection<T>(arr1: T[], arr2: T[]): T[] {
const set2 = new Set(arr2);
return arr1.filter((item) => set2.has(item));
}
/**
* Get union of two arrays (unique items)
*/
function union<T>(arr1: T[], arr2: T[]): T[] {
return Array.from(new Set([...arr1, ...arr2]));
}