obsidian_smart_search
Search a local Obsidian vault using BM25 ranking with boosted titles, tags, and headings. Retrieves relevant notes with compact snippets.
Instructions
BM25-ranked vault search with title/tag/heading boosting and compact snippets.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| vault | No | Optional configured vault name. Defaults to the server default vault. | |
| query | Yes | ||
| contextChars | No | ||
| limit | No | ||
| offset | No |
Implementation Reference
- src/tools.ts:205-220 (registration)Registration of the 'obsidian_smart_search' tool with its schema (query, contextChars, limit, offset) and handler that calls smartSearch() from intelligence.ts.
tool( "obsidian_smart_search", "BM25-ranked vault search with title/tag/heading boosting and compact snippets.", { vault: vaultArg, query: z.string(), contextChars: z.number().int().min(20).max(2000).optional().default(180), limit: z.number().int().min(1).max(100).optional().default(20), offset: z.number().int().min(0).optional().default(0), }, async (args) => { const all = smartSearch(await loadNotes(vaults, args.vault), args.query, args.offset + args.limit, args.contextChars); return { total: all.length, offset: args.offset, hits: all.slice(args.offset, args.offset + args.limit) }; }, { readOnlyHint: true }, ); - src/intelligence.ts:39-79 (handler)The smartSearch() function implementing BM25-ranked vault search with title/tag/heading boosting, tokenization, IDF scoring, and snippet extraction.
export function smartSearch(notes: NoteRecord[], query: string, limit: number, contextChars: number): SmartSearchHit[] { const terms = tokenize(query); if (terms.length === 0) return []; const docs = notes.map((note) => weightedTerms(note)); const avgLen = docs.reduce((sum, doc) => sum + doc.length, 0) / Math.max(1, docs.length); const df = new Map<string, number>(); for (const doc of docs) { for (const term of new Set(doc.terms.keys())) df.set(term, (df.get(term) ?? 0) + 1); } const hits: SmartSearchHit[] = []; for (let i = 0; i < notes.length; i += 1) { const note = notes[i]; const doc = docs[i]; let score = 0; const matchedTerms: string[] = []; for (const term of terms) { const tf = doc.terms.get(term) ?? 0; if (tf <= 0) continue; const dfVal = Math.min(df.get(term) ?? 0, notes.length); const idf = Math.log(1 + (notes.length - dfVal + 0.5) / (dfVal + 0.5)); score += idf * ((tf * 2.2) / (tf + 1.2 * (1 - 0.75 + 0.75 * doc.length / Math.max(1, avgLen)))); matchedTerms.push(term); } if (matchedTerms.length === 0) continue; const phrase = terms.join(" "); const normalizedTitle = normalizeText(note.title); if (normalizedTitle.includes(phrase)) score += 5; for (const tag of note.tags) { if (terms.some((term) => normalizeText(tag).includes(term))) score += 2.5; } hits.push({ path: note.path, title: note.title, score: Number(score.toFixed(4)), tags: note.tags, snippet: snippetFor(note.content, terms, contextChars), matchedTerms, }); } return hits.sort((a, b) => b.score - a.score || a.path.localeCompare(b.path)).slice(0, limit); } - src/intelligence.ts:5-12 (schema)The SmartSearchHit type that defines the output shape (path, title, score, tags, snippet, matchedTerms).
export type SmartSearchHit = { path: string; title: string; score: number; tags: string[]; snippet: string; matchedTerms: string[]; }; - src/intelligence.ts:227-247 (helper)Tokenization (tokenize), text normalization, and snippet extraction (snippetFor) helper functions used by smartSearch.
function tokenize(text: string): string[] { return normalizeText(text) .split(/[^a-z0-9áéíóúñü]+/i) .map((term) => term.trim()) .filter((term) => term.length > 2 && !STOPWORDS.has(term)); } function normalizeText(text: string): string { return text .toLowerCase() .normalize("NFKD") .replace(/[\u0300-\u036f]/g, ""); } function snippetFor(content: string, terms: string[], contextChars: number): string { const normalized = normalizeText(content); const idx = terms.map((term) => normalized.indexOf(term)).filter((i) => i >= 0).sort((a, b) => a - b)[0] ?? 0; const start = Math.max(0, idx - contextChars); const end = Math.min(content.length, idx + contextChars); return `${start > 0 ? "..." : ""}${content.slice(start, end).replace(/\s+/g, " ").trim()}${end < content.length ? "..." : ""}`; } - src/intelligence.ts:205-212 (helper)weightedTerms() helper that builds a BM25 term map from note content, title, tags, and headings with boosting weights.
function weightedTerms(note: NoteRecord): { terms: Map<string, number>; length: number } { const terms = new Map<string, number>(); addWeighted(terms, note.content, 1); addWeighted(terms, note.title, 3); addWeighted(terms, note.tags.join(" "), 2.5); addWeighted(terms, note.headings.map((h) => h.title).join(" "), 2); return { terms, length: [...terms.values()].reduce((sum, value) => sum + value, 0) }; }