semantic_identifier_search
Search code identifiers like functions and variables using natural language queries to find definitions and usage patterns across your codebase.
Instructions
Search semantic intent at identifier level (functions, methods, classes, variables) with definition lines and ranked call sites. Uses embeddings over symbol signatures and source context, then returns line-numbered definition/call chains.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | Natural language intent to match identifiers and usages. | |
| top_k | No | How many identifiers to return. Default: 5. | |
| top_calls_per_identifier | No | How many ranked call sites per identifier. Default: 10. | |
| include_kinds | No | Optional kinds filter, e.g. ["function", "method", "variable"]. | |
| semantic_weight | No | Weight for semantic similarity score. Default: 0.78. | |
| keyword_weight | No | Weight for keyword overlap score. Default: 0.22. |
Implementation Reference
- The main handler function for the `semantic_identifier_search` tool. It performs identifier indexing, calculates semantic and keyword scores, and ranks call sites.
export async function semanticIdentifierSearch(options: SemanticIdentifierSearchOptions): Promise<string> { const topK = Math.max(1, Math.floor(options.topK ?? 5)); const topCalls = Math.max(1, Math.floor(options.topCallsPerIdentifier ?? 10)); const semanticWeight = normalizeWeight(options.semanticWeight, 0.78); const keywordWeight = normalizeWeight(options.keywordWeight, 0.22); const includeKinds = normalizeKinds(options.includeKinds); const index = await buildIdentifierIndex(options.rootDir); if (index.docs.length === 0) { return "No supported identifiers found for semantic identifier search."; } const [queryVec] = await fetchEmbedding(options.query); const queryTerms = new Set(splitTerms(options.query)); const scored: RankedIdentifier[] = []; for (let i = 0; i < index.docs.length; i++) { const doc = index.docs[i]; if (includeKinds && !includeKinds.has(doc.kind.toLowerCase())) continue; const semanticScore = Math.max(cosine(queryVec, index.vectors[i]), 0); const keywordScore = getKeywordCoverage(queryTerms, `${doc.name} ${doc.signature} ${doc.path} ${doc.header}`); const totalWeight = semanticWeight + keywordWeight; const score = totalWeight > 0 ? clamp01((semanticWeight * semanticScore + keywordWeight * keywordScore) / totalWeight) : semanticScore; scored.push({ doc, semanticScore, keywordScore, score }); } if (scored.length === 0) { return "No identifiers matched the requested kind filters."; } const top = scored.sort((a, b) => b.score - a.score).slice(0, topK); const cache = await loadEmbeddingCache(options.rootDir, IDENTIFIER_CACHE_FILE); const lines: string[] = [ `Top ${top.length} identifier matches for: "${options.query}"`, "", ]; for (let i = 0; i < top.length; i++) { const item = top[i]; const range = formatLineRange(item.doc.line, item.doc.endLine); lines.push(`${i + 1}. ${item.doc.kind} ${item.doc.name} - ${item.doc.path} (${range})`); lines.push(` Score: ${Math.round(item.score * 1000) / 10}% | Semantic: ${Math.round(item.semanticScore * 1000) / 10}% | Keyword: ${Math.round(item.keywordScore * 1000) / 10}%`); lines.push(` Signature: ${item.doc.signature}`); if (item.doc.parentName) lines.push(` Parent: ${item.doc.parentName}`); const calls = await rankCallSites( options.rootDir, cache, queryTerms, queryVec, item.doc, index.fileLines, topCalls, ); if (calls.sites.length === 0) { lines.push(" Calls: none found"); lines.push(""); continue; } lines.push(` Calls (${calls.sites.length}/${calls.total}):`); for (let j = 0; j < calls.sites.length; j++) { const site = calls.sites[j]; lines.push(` ${j + 1}. ${site.file}:L${site.line} (${Math.round(site.score * 1000) / 10}%) ${site.context}`); } lines.push(""); } await saveEmbeddingCache(options.rootDir, cache, IDENTIFIER_CACHE_FILE); return lines.join("\n"); } - Type definition for the input options of the `semantic_identifier_search` tool.
export interface SemanticIdentifierSearchOptions { rootDir: string; query: string; topK?: number; topCallsPerIdentifier?: number; includeKinds?: string[]; semanticWeight?: number; keywordWeight?: number; }