search_notes
Find relevant notes with BM25 full-text search. Optionally narrow by directory or decide whether to search body or frontmatter.
Instructions
Full-text BM25 search. Pass { query } and optionally scope (path prefix), searchContent (default true), searchFrontmatter (default false), limit. Returns { root, results[] } sorted by relevance, each with path, score, excerpt.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
No arguments | |||
Implementation Reference
- src/tools/search-tools.ts:40-75 (handler)The main tool handler for 'search_notes'. Calls SearchServiceImpl.search() with parsed query, scope, searchContent, searchFrontmatter, limit options. Returns results as JSON with root path.
function makeSearchNotesTool(container: ServiceContainer): ToolHandler { return { name: "search_notes", description: "Full-text BM25 search. Pass `{ query }` and optionally `scope` (path prefix), `searchContent` (default true), `searchFrontmatter` (default false), `limit`. Returns `{ root, results[] }` sorted by relevance, each with `path`, `score`, `excerpt`.", inputSchema: SearchNotesSchema, async handler(args): Promise<ToolResponse> { try { const services = requireServices(container); const { query, scope, searchContent, searchFrontmatter, limit } = SearchNotesSchema.parse(args); log.info({ query, scope, searchContent, searchFrontmatter, limit }, "search_notes called"); const results = await services.search.search(query, { scope, searchContent, searchFrontmatter, limit, }); log.info({ query, resultCount: results.length }, "search_notes complete"); return { content: [{ type: "text", text: JSON.stringify({ root: getRoot(container), results }, null, 2) }], }; } catch (err) { log.error({ err }, "search_notes failed"); return { content: [{ type: "text", text: JSON.stringify({ root: getRoot(container), error: err instanceof Error ? err.message : String(err), possibleSolutions: ["Try a different search query", "Use list_directory to browse the directory structure", "Check the scope path exists with list_directory"], }) }], isError: true, }; } }, }; } - src/tools/search-tools.ts:12-38 (schema)Zod input schema for 'search_notes': defines query (string, required), scope (optional path prefix), searchContent (boolean, default true), searchFrontmatter (boolean, default false), and limit (optional positive int).
const SearchNotesSchema = z.object({ query: z .string() .describe("Full-text search query. Supports multiple words; results ranked by BM25."), scope: z .string() .optional() .describe( "Root-relative path prefix to restrict the search scope. Omit to search the entire directory.", ), searchContent: z .boolean() .optional() .default(true) .describe("Search within note body content. Default: true."), searchFrontmatter: z .boolean() .optional() .default(false) .describe("Search within frontmatter field values. Default: false."), limit: z .number() .int() .positive() .optional() .describe("Maximum number of results to return. Omit for default limit."), }); - src/tools/search-tools.ts:81-90 (registration)Registration of search tools (including search_notes) into the tool registry. Called from src/tools/index.ts registerTools().
export function registerSearchTools( registry: Map<string, ToolHandler>, container: ServiceContainer, ): void { const tools = [makeSearchNotesTool(container)]; for (const tool of tools) { registry.set(tool.name, tool); } } - src/tools/search-tools.ts:1-6 (helper)Imports for search_notes tool: zod for schema validation, type imports for ToolHandler/ServiceContainer/ToolResponse, helper functions requireServices/getRoot from index.ts, and logger.
import { z } from "zod"; import type { ToolHandler, ServiceContainer, ToolResponse } from "../types.js"; import { requireServices, getRoot } from "./index.js"; import { createChildLog } from "../markscribe-log.js"; const log = createChildLog({ module: "search-tools" }); - SearchServiceImpl.search() - the actual BM25 full-text search implementation. Tokenizes query, collects notes in scope, computes BM25 scores (k1=1.2, b=0.75), extracts excerpts, and returns sorted results.
async search(query: string, options?: SearchOptions): Promise<SearchResult[]> { const scope = options?.scope; const searchContent = options?.searchContent ?? true; const searchFrontmatterOpt = options?.searchFrontmatter ?? false; const limit = options?.limit; log.info( { query, scope, searchContent, searchFrontmatter: searchFrontmatterOpt, limit }, "search", ); const queryTerms = tokenize(query); if (queryTerms.length === 0) { log.debug("search: empty query terms, returning []"); return []; } // Collect all notes in scope const allPaths = await this.collectPaths(scope); log.debug({ count: allPaths.length, scope }, "search: collected paths"); type DocData = { path: string; tokens: string[]; tf: Map<string, number>; contentText: string; frontmatterText: string; frontmatter: Record<string, unknown>; }; // Read all notes with bounded concurrency const docs: DocData[] = []; for (let i = 0; i < allPaths.length; i += READ_CONCURRENCY) { const batch = allPaths.slice(i, i + READ_CONCURRENCY); const batchResults = await Promise.all( batch.map(async (notePath) => { try { return { note: await this.file.readNote(notePath), path: notePath }; } catch { log.debug({ path: notePath }, "search: skipping unreadable note"); return null; } }), ); for (const result of batchResults) { if (!result) continue; const { note, path: notePath } = result; const contentText = note.content; const contentTokens = searchContent ? tokenize(contentText) : []; const contentTf = searchContent ? buildTermFreq(contentTokens) : new Map<string, number>(); let frontmatterText = ""; let frontmatterTokens: string[] = []; let frontmatterTf = new Map<string, number>(); if (searchFrontmatterOpt) { frontmatterText = this.frontmatterToText(note.frontmatter); frontmatterTokens = tokenize(frontmatterText); frontmatterTf = buildTermFreq(frontmatterTokens); } const combinedTokens = [...contentTokens, ...frontmatterTokens]; const combinedTf = this.mergeTf(contentTf, frontmatterTf); docs.push({ path: notePath, tokens: combinedTokens, tf: combinedTf, contentText, frontmatterText, frontmatter: note.frontmatter, }); } } if (docs.length === 0) { return []; } // Compute document frequency across corpus const df = new Map<string, number>(); for (const doc of docs) { const seen = new Set<string>(); for (const token of doc.tokens) { if (!seen.has(token)) { seen.add(token); df.set(token, (df.get(token) ?? 0) + 1); } } } const N = docs.length; const avgDocLen = docs.reduce((sum, d) => sum + d.tokens.length, 0) / N; const results: SearchResult[] = []; for (const doc of docs) { const docLen = doc.tokens.length; let score = 0; for (const term of queryTerms) { const tf = doc.tf.get(term) ?? 0; if (tf === 0) continue; const dfVal = df.get(term) ?? 0; const idf = Math.log((N - dfVal + 0.5) / (dfVal + 0.5) + 1); const termScore = (idf * (tf * (BM25_K1 + 1))) / (tf + BM25_K1 * (1 - BM25_B + BM25_B * (docLen / avgDocLen))); score += termScore; } if (score <= 0) continue; // Identify matched frontmatter fields const matchedFields: string[] = []; if (searchFrontmatterOpt) { for (const [field, fieldValue] of Object.entries(doc.frontmatter)) { const fieldText = Array.isArray(fieldValue) ? fieldValue.map(String).join(" ").toLowerCase() : String(fieldValue ?? "").toLowerCase(); for (const term of queryTerms) { if (fieldText.includes(term)) { if (!matchedFields.includes(field)) { matchedFields.push(field); } } } } } const excerptSource = searchContent ? doc.contentText : doc.frontmatterText; const excerpt = extractExcerpt( excerptSource || doc.contentText, queryTerms, this.excerptChars, ); const result: SearchResult = { path: doc.path, score, excerpt, }; if (matchedFields.length > 0) { result.matchedFields = matchedFields; } results.push(result); } // Sort by score descending results.sort((a, b) => b.score - a.score); const effectiveLimit = limit ?? this.maxResults; const limited = results.slice(0, effectiveLimit); log.info({ query, resultCount: limited.length }, "search complete"); return limited; }