searchTranscripts
Search YouTube video transcripts to find specific content with timestamped results. Filter by collection or video ID to locate relevant segments quickly.
Instructions
Search imported transcript-text collections with active-collection focus by default and return ranked timestamped chunks.
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| query | Yes | ||
| collectionId | No | ||
| maxResults | No | ||
| minScore | No | ||
| videoIdFilter | No | ||
| useActiveCollection | No |
Implementation Reference
- src/lib/knowledge-base.ts:451-521 (handler)This is the actual handler function that executes the transcript search logic. It searches collections, ranks results, and formats the output.
async search(input: SearchTranscriptsInput): Promise<SearchTranscriptsOutput> { const startedAt = Date.now(); const maxResults = Math.max(1, Math.min(input.maxResults ?? 10, 50)); const minScore = Math.max(0, Math.min(input.minScore ?? 0.2, 1)); const scope = this.resolveCollectionScope(input); const targetCollections = scope.searchedCollectionIds; const videoFilter = input.videoIdFilter ? new Set(input.videoIdFilter) : undefined; const results: SearchTranscriptsOutput["results"] = []; let totalChunksSearched = 0; let embeddingModelLabel = DEFAULT_LOCAL_EMBEDDING_MODEL; let semanticFallback = false; for (const collectionId of targetCollections) { const model = this.loadModel(collectionId); if (!model || model.chunkCount === 0) { continue; } const rows = this.loadSearchRows(collectionId, videoFilter); if (rows.length === 0) { continue; } totalChunksSearched += rows.length; embeddingModelLabel = humanizeAlgorithm(model.algorithm); const rankedResult = await rankCollection(rows, model, input.query); const ranked = rankedResult.rows; semanticFallback ||= rankedResult.semanticFallback; const byVideo = groupChunkContexts(rows); for (const row of ranked) { if (row.score < minScore) { continue; } const context = byVideo.get(row.videoId); const previous = context?.get(row.ordinal - 1); const next = context?.get(row.ordinal + 1); results.push({ collectionId, videoId: row.videoId, videoTitle: row.videoTitle, channelTitle: row.channelTitle, chunkText: row.text, tStartSec: row.tStartSec, tEndSec: row.tEndSec, timestampUrl: buildTimestampUrl(row.videoId, row.tStartSec), score: round(row.score, 4), lexicalScore: round(row.lexicalScore, 4), semanticScore: row.semanticScore !== undefined ? round(row.semanticScore, 4) : undefined, context: { prevChunkText: previous?.text, nextChunkText: next?.text, }, }); } } const deduped = results .sort((a, b) => b.score - a.score || a.videoTitle.localeCompare(b.videoTitle)) .slice(0, maxResults); return { query: input.query, results: deduped, searchMeta: { totalChunksSearched, embeddingModel: semanticFallback ? `${embeddingModelLabel} (lexical fallback for this query)` : embeddingModelLabel, searchLatencyMs: Date.now() - startedAt, scope, }, provenance: localProvenance(), }; } - src/server/mcp-server.ts:976-984 (registration)The registration in the server's executeTool switch statement. It passes the arguments to the `service.searchTranscripts` method.
case "searchTranscripts": return service.searchTranscripts({ query: readString(args, "query"), collectionId: optionalString(args, "collectionId"), maxResults: optionalNumber(args, "maxResults"), minScore: optionalNumber(args, "minScore"), videoIdFilter: optionalStringArray(args, "videoIdFilter"), useActiveCollection: optionalBoolean(args, "useActiveCollection"), });