start_batch
Automatically transcribe all untranscribed audio/video files in a folder. Processes shortest files first, validates outputs, and returns a batch ID for progress tracking.
Instructions
Start an automated sequential batch transcription of all untranscribed files in a folder. Scans for files without a matching .txt, sorts by duration (shortest first), and processes them one at a time as background jobs. Each file is validated after completion — empty or suspiciously short outputs are flagged. Returns a batch ID to use with check_batch_progress.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| folder_path | Yes | Absolute Windows path to the folder. | |
| language | No | Language code. Defaults to en. | en |
| threads | No | CPU threads. Defaults to 2 of 2. |
Implementation Reference
- src/index.ts:1644-1717 (handler)The start_batch handler function in CallToolRequestSchema that scans a folder for untranscribed files, probes their durations, creates a batch state, spawns the first background job, and returns a batch ID to use with check_batch_progress.
if (name === "start_batch") { const folderPath = args?.folder_path as string; const language = (args?.language as string) || "en"; const threads = Math.min(SYSTEM_THREADS, Math.max(1, Math.round((args?.threads as number) || WHISPER_THREADS))); if (!folderPath) return { content: [{ type: "text", text: "folder_path is required." }], isError: true }; const pathError = validateInputPath(folderPath); if (pathError) return { content: [{ type: "text", text: pathError }], isError: true }; if (!existsSync(folderPath)) return { content: [{ type: "text", text: `Folder not found: ${folderPath}` }], isError: true }; const configError = validatePaths(); if (configError) return { content: [{ type: "text", text: configError }], isError: true }; if (await isWhisperRunning()) { return { content: [{ type: "text", text: "A transcription is already running. Wait for it to finish before starting a batch." }], isError: true }; } // Scan for untranscribed files const allFiles = getFiles(folderPath, false); const untranscribed = allFiles.filter(f => !existsSync(f.replace(/\.[^.]+$/, ".txt"))); if (untranscribed.length === 0) { return { content: [{ type: "text", text: `✅ All files in ${folderPath} are already transcribed. Nothing to do.` }] }; } // Probe durations for sorting const batchFiles: BatchFile[] = []; for (const f of untranscribed) { const info = await probeFile(f); batchFiles.push({ filePath: f, fileName: basename(f), durationSec: info?.durationSec ?? 0, status: "pending", }); } batchFiles.sort((a, b) => a.durationSec - b.durationSec); ensureJobsDir(); const batchId = `batch_${Date.now()}`; const batchPath = join(JOBS_DIR, `${batchId}.batch.json`); const state: BatchState = { batchId, batchPath, folder: folderPath, startTime: new Date().toISOString(), files: batchFiles, currentIndex: 0, status: "running", model: WHISPER_MODEL, language, threads, }; writeFileSync(batchPath, JSON.stringify(state, null, 2), "utf8"); await spawnNextBatchJob(state); const totalDuration = batchFiles.reduce((acc, f) => acc + f.durationSec, 0); return { content: [{ type: "text", text: `⏳ Batch started!\n\n` + `Batch ID: ${batchId}\n` + `Folder: ${folderPath}\n` + `Files to process: ${batchFiles.length}\n` + `Total audio: ${formatDuration(totalDuration)}\n` + `Est. GPU time: ${estimateTime(totalDuration, hasVulkanDll())}\n\n` + `First file: ${batchFiles[0].fileName}\n\n` + `Call check_batch_progress with batch_id="${batchId}" to monitor.`, }], }; } - src/index.ts:1009-1031 (schema)Input schema registration for the start_batch tool in ListToolsRequestSchema, defining folder_path (required), language (default 'en'), and threads parameters.
{ name: "check_config", description: "Verify whisper-cli.exe, model, and FFmpeg are all available. Run this first if anything fails.", inputSchema: { type: "object", properties: {} }, }, { name: "start_batch", description: "Start an automated sequential batch transcription of all untranscribed files in a folder. " + "Scans for files without a matching .txt, sorts by duration (shortest first), " + "and processes them one at a time as background jobs. " + "Each file is validated after completion — empty or suspiciously short outputs are flagged. " + "Returns a batch ID to use with check_batch_progress.", inputSchema: { type: "object", properties: { folder_path: { type: "string", description: "Absolute Windows path to the folder." }, language: { type: "string", description: "Language code. Defaults to en.", default: "en" }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, }, required: ["folder_path"], }, }, - src/index.ts:886-1127 (registration)Registration of all tools including start_batch within the ListToolsRequestSchema handler. The tool list includes start_batch at lines 1015-1031.
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: "transcribe_audio", description: "Transcribe a single audio or video file using whisper.cpp on Windows. " + "Natively supports mp3 and wav. Automatically converts mp4, mkv, avi, mov, " + "webm, m4a, flac, ogg etc. via FFmpeg — no manual conversion needed. " + "Can output plain text, timestamps, JSON, or SRT subtitle files. " + "For files that may take more than 4 minutes, set background=true to run as a detached job " + "and use check_progress to monitor it.", inputSchema: { type: "object", properties: { file_path: { type: "string", description: "Absolute Windows path, e.g. C:\\Users\\You\\Downloads\\recording.mp4" }, model: { type: "string", description: "Override model path. Leave blank to use active model." }, language: { type: "string", description: "Language code (e.g. en, ja, es, fr) or 'auto' to detect automatically. Defaults to en.", default: "en" }, output_format: { type: "string", enum: ["text", "timestamps", "json", "srt"], description: "text = plain (default), timestamps = with time codes, json = structured, srt = subtitle file saved next to source.", default: "text", }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, save_to_file: { type: "boolean", description: "Save transcript as .txt next to the source file.", default: false }, background: { type: "boolean", description: "Run as a detached background job. Returns a job ID immediately. Use check_progress to monitor. Recommended for files over 10 minutes.", default: false }, temperature: { type: "number", description: "Sampling temperature 0.0–1.0. Default 0.0 (deterministic). Higher values reduce hallucination on noisy audio at the cost of consistency." }, prompt: { type: "string", description: "Prior context string injected before transcription. Improves accuracy for domain-specific vocabulary, speaker names, or technical terms. Example: 'Names: Keemstar, DramaAlert.'" }, condition_on_prev_text: { type: "boolean", description: "Re-enable conditioning each segment on its own prior output (removes --max-context 0 flag). Default false (off). Only enable for highly structured audio where context continuity helps.", default: false }, no_speech_thold: { type: "number", description: "Confidence threshold below which segments are treated as silence rather than transcribed. Default 0.6.", default: 0.6 }, beam_size: { type: "number", description: "Beam search width. Higher = more accurate but slower. Default 5." }, best_of: { type: "number", description: "Number of candidate sequences to evaluate. Default 5." }, gpu_device: { type: "number", description: "GPU device index for multi-GPU systems. Use check_system to see available GPUs. Default 0." }, processors: { type: "number", description: "Number of parallel processors for chunk processing. Default 1." }, word_timestamps: { type: "boolean", description: "Output one word per timestamped segment (sets --max-len 1 --split-on-word). Useful for clip alignment and precise timecode search.", default: false }, max_segment_length: { type: "number", description: "Maximum segment length in characters. Controls line break frequency in output. Ignored when word_timestamps=true." }, split_on_word: { type: "boolean", description: "Split segments at word boundaries rather than mid-word. Defaults to false.", default: false }, diarize: { type: "boolean", description: "Stereo speaker diarization — labels left/right channel speakers in transcript. Requires stereo audio with speakers on separate channels.", default: false }, vad_model: { type: "string", description: "Absolute path to a Silero VAD model .bin file. When provided, voice activity detection strips silence before transcription — reduces hallucinations and speeds up processing. Download via download_model." }, offset_t: { type: "number", description: "Start transcription at this offset in milliseconds. Use to process a specific section of a file." }, duration: { type: "number", description: "Process only this many milliseconds of audio starting from offset_t (or the beginning). Use with offset_t to target a specific time window." }, }, required: ["file_path"], }, }, { name: "check_progress", description: "Check the status of a background transcription job started with transcribe_audio (background=true). " + "Returns current progress, elapsed time, last processed timestamp, and the transcript when complete. " + "Call this repeatedly until the job shows as complete or failed.", inputSchema: { type: "object", properties: { job_id: { type: "string", description: "Job ID returned by transcribe_audio when background=true." }, }, required: ["job_id"], }, }, { name: "transcribe_batch", description: "Transcribe multiple audio/video files in a folder interactively, one file at a time. " + "Shows a preview of each transcript and waits for confirmation before continuing. " + "Saves each transcript as a .txt file next to its source. " + "Files already transcribed (with matching .txt) are shown as done and skipped. " + "Supported formats: mp3, wav, mp4, mkv, avi, mov, webm, m4a, flac, ogg. " + "NOTE: For large unattended batch jobs, use whisper-cli.exe directly from the command line " + "— see TROUBLESHOOTING.md for the command syntax.", inputSchema: { type: "object", properties: { folder_path: { type: "string", description: "Absolute Windows path to the folder." }, file_index: { type: "number", description: "Which file to process (1-based). Omit to list files first.", }, language: { type: "string", description: "Language code. Defaults to en.", default: "en" }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, recursive: { type: "boolean", description: "Include subfolders. Defaults to false.", default: false }, }, required: ["folder_path"], }, }, { name: "generate_subtitles", description: "Generate subtitle files for an audio or video file using whisper.cpp. " + "Set language='auto' to detect the spoken language automatically. " + "Set translate_to_english=true to also generate an English translation subtitle file. " + "When both are requested, two .srt files are saved: one in the original language (e.g. film.ja.srt) " + "and one English translation (film.en.srt). " + "Load in VLC via Subtitle → Add Subtitle File. " + "Supports all standard formats plus .3gp and .ts.", inputSchema: { type: "object", properties: { file_path: { type: "string", description: "Absolute Windows path to the file." }, language: { type: "string", description: "Language code (e.g. ja, es, fr, de) or 'auto' to detect automatically. Defaults to en.", default: "en", }, translate_to_english: { type: "boolean", description: "Also generate an English translation .srt alongside the native language .srt. Only applies when language is not 'en'. Not available in background mode.", default: false, }, background: { type: "boolean", description: "Run as a detached background job — recommended for files over 10 minutes. Returns a job ID to use with check_progress. translate_to_english is not available in background mode.", default: false, }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, temperature: { type: "number", description: "Sampling temperature 0.0–1.0. Default 0.0." }, prompt: { type: "string", description: "Prior context string for domain-specific vocabulary or speaker names." }, beam_size: { type: "number", description: "Beam search width. Higher = more accurate, slower. Default 5." }, best_of: { type: "number", description: "Candidate sequences evaluated. Default 5." }, diarize: { type: "boolean", description: "Stereo speaker diarization. Requires stereo audio with speakers on separate channels.", default: false }, vad_model: { type: "string", description: "Path to Silero VAD model .bin. Strips silence before transcription. Download via download_model." }, }, required: ["file_path"], }, }, { name: "check_config", description: "Verify whisper-cli.exe, model, and FFmpeg are all available. Run this first if anything fails.", inputSchema: { type: "object", properties: {} }, }, { name: "start_batch", description: "Start an automated sequential batch transcription of all untranscribed files in a folder. " + "Scans for files without a matching .txt, sorts by duration (shortest first), " + "and processes them one at a time as background jobs. " + "Each file is validated after completion — empty or suspiciously short outputs are flagged. " + "Returns a batch ID to use with check_batch_progress.", inputSchema: { type: "object", properties: { folder_path: { type: "string", description: "Absolute Windows path to the folder." }, language: { type: "string", description: "Language code. Defaults to en.", default: "en" }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, }, required: ["folder_path"], }, }, { name: "check_batch_progress", description: "Check the status of a batch started with start_batch. " + "Automatically advances to the next file when the current one finishes. " + "Returns overall progress, current file, failed files, and elapsed time. " + "Call repeatedly until the batch shows as complete.", inputSchema: { type: "object", properties: { batch_id: { type: "string", description: "Batch ID returned by start_batch." }, }, required: ["batch_id"], }, }, { name: "analyze_media", description: "Analyze one or more media files using FFprobe before transcribing. " + "For a single file: returns duration, size, codec, and estimated transcription time on CPU and GPU. " + "For a folder: scans all supported media files and returns a sorted table with the same info for each. " + "Use this to plan batch work, estimate how long transcription will take, or check what's already been transcribed.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Absolute Windows path to a single file or a folder.", }, sort_by: { type: "string", enum: ["duration", "name", "size"], description: "For folder scans: sort order. Defaults to duration (shortest first).", default: "duration", }, }, required: ["path"], }, }, { name: "check_system", description: "Detect GPU hardware and verify Vulkan acceleration is available. " + "Reports GPU name, VRAM, whether the Vulkan binary is installed, " + "and recommends the best Whisper model for your hardware. " + "Run this if you want to confirm GPU acceleration is working or diagnose why it isn't.", inputSchema: { type: "object", properties: {} }, }, { name: "list_models", description: "List all Whisper model files installed in your models directory. " + "Shows filename, size, whether it is currently active, quantization status, " + "and recommended use case for each model. " + "No network calls — reads local filesystem only.", inputSchema: { type: "object", properties: {} }, }, { name: "download_model", description: "Download a Whisper model from Hugging Face directly into your models directory. " + "Accepts a model name (e.g. large-v3-turbo, medium.en-q5_0) and handles the download automatically. " + "Downloads only from trusted Hugging Face namespaces (ggerganov/whisper.cpp and ggml-org). " + "After downloading, use switch_model to activate it for the current session.", inputSchema: { type: "object", properties: { model_name: { type: "string", description: "Model name to download, e.g. 'large-v3-turbo', 'medium.en-q5_0', 'large-v3-turbo-q5_0'. Use list_models to see what is already installed.", }, }, required: ["model_name"], }, }, { name: "switch_model", description: "Switch the active Whisper model for the current session without restarting Claude Desktop. " + "Accepts a model filename (e.g. ggml-large-v3-turbo.bin) or full path. " + "The model must already be installed in your models directory. " + "Use list_models to see installed models, download_model to add new ones. " + "Change is session-scoped — does not persist after Claude Desktop restarts.", inputSchema: { type: "object", properties: { model_name: { type: "string", description: "Model filename (e.g. ggml-large-v3-turbo.bin) or full path. Must be a .bin file in the configured models directory.", }, }, required: ["model_name"], }, }, ], })); - src/index.ts:376-391 (helper)The spawnNextBatchJob helper function called by the start_batch handler and readBatchProgress to advance the batch to the next pending file.
async function spawnNextBatchJob(state: BatchState): Promise<void> { for (let i = state.currentIndex; i < state.files.length; i++) { if (state.files[i].status === "pending") { state.currentIndex = i; state.files[i].status = "running"; const f = state.files[i]; const { jobId } = await spawnDetached(f.filePath, state.model, state.language, state.threads); state.files[i].jobId = jobId; writeFileSync(state.batchPath, JSON.stringify(state, null, 2), "utf8"); return; } } // Nothing left to run state.status = "complete"; writeFileSync(state.batchPath, JSON.stringify(state, null, 2), "utf8"); } - src/index.ts:393-499 (helper)The readBatchProgress helper that checks the status of a running batch, validates completed jobs, and advances to the next file.
async function readBatchProgress(batchId: string): Promise<string> { const batchPath = join(JOBS_DIR, `${batchId}.batch.json`); if (!existsSync(batchPath)) { return `❌ Batch not found: ${batchId}\n\nThe batch file may have been deleted or the ID is incorrect.`; } const state: BatchState = JSON.parse(readFileSync(batchPath, "utf8")); // Check current running job const running = state.files.find(f => f.status === "running"); if (running && running.jobId) { const jobPath = join(JOBS_DIR, `${running.jobId}.json`); if (existsSync(jobPath)) { const job = JSON.parse(readFileSync(jobPath, "utf8")); const isRunning = await isPidRunning(job.pid); const outputExists = existsSync(job.outputPath); if (!isRunning) { // Move temp output to final destination if needed. // spawnDetached writes to a sanitized JOBS_DIR temp path to avoid Unicode // filename issues. readJobProgress normally handles this move, but // readBatchProgress must do it too since it never calls readJobProgress. const ext = job.outputFormat === "srt" ? ".srt" : ".txt"; const tmpOutput = `${job.tmpOutputBase}${ext}`; if (existsSync(tmpOutput) && tmpOutput !== job.outputPath) { try { writeFileSync(job.outputPath, readFileSync(tmpOutput, "utf8"), "utf8"); unlinkSync(tmpOutput); } catch { /* ignore — validateTranscript will catch missing output */ } } // Clean up temp WAV if present if (job.isTmp && existsSync(job.transcribeFrom)) { try { unlinkSync(job.transcribeFrom); } catch { } } // Job finished — validate and advance const finalOutputExists = existsSync(job.outputPath); const validation = validateTranscript(job.outputPath, running.durationSec); if (finalOutputExists && validation.valid) { running.status = "complete"; } else { running.status = "failed"; running.failReason = validation.reason ?? "no output file"; } // Advance to next state.currentIndex = state.files.indexOf(running) + 1; if (state.files.some(f => f.status === "pending")) { await spawnNextBatchJob(state); } else { state.status = "complete"; writeFileSync(batchPath, JSON.stringify(state, null, 2), "utf8"); } } else { // Still running — update state file without advancing writeFileSync(batchPath, JSON.stringify(state, null, 2), "utf8"); } } } else if (state.status !== "complete" && state.files.some(f => f.status === "pending")) { // No running job but pending files exist — advance await spawnNextBatchJob(state); } // Build status report const done = state.files.filter(f => f.status === "complete").length; const failed = state.files.filter(f => f.status === "failed"); const pending = state.files.filter(f => f.status === "pending").length; const currentRunning = state.files.find(f => f.status === "running"); const total = state.files.length; const elapsed = Math.round((Date.now() - new Date(state.startTime).getTime()) / 1000); let report = `Batch: ${batchId}\n`; report += `Folder: ${state.folder}\n`; report += `${"─".repeat(50)}\n`; report += `Progress: ${done}/${total} complete`; if (failed.length > 0) report += ` | ${failed.length} failed`; if (pending > 0) report += ` | ${pending} remaining`; report += `\nElapsed: ${formatDuration(elapsed)}\n`; if (currentRunning) { report += `\nCurrently processing: ${currentRunning.fileName}`; if (currentRunning.jobId) { const jobPath = join(JOBS_DIR, `${currentRunning.jobId}.json`); if (existsSync(jobPath)) { const job = JSON.parse(readFileSync(jobPath, "utf8")); const logContent = existsSync(job.logPath) ? readFileSync(job.logPath, "utf8") : ""; const lastSec = parseLastTimestamp(logContent); if (lastSec > 0) report += ` (${formatDuration(lastSec)} / ${formatDuration(currentRunning.durationSec)})`; } } } if (failed.length > 0) { report += `\n\n⚠️ Failed files:\n`; for (const f of failed) { report += ` ❌ ${f.fileName} — ${f.failReason ?? "unknown reason"}\n`; } report += `\nRe-run failed files with transcribe_audio individually.`; } if (state.status === "complete") { report = `✅ Batch complete!\n\n` + report; } else { report += `\n\nCall check_batch_progress again to update.`; } return report; }