check_progress
Check the status of a background transcription job. Get progress, elapsed time, and transcript when complete or failed.
Instructions
Check the status of a background transcription job started with transcribe_audio (background=true). Returns current progress, elapsed time, last processed timestamp, and the transcript when complete. Call this repeatedly until the job shows as complete or failed.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| job_id | Yes | Job ID returned by transcribe_audio when background=true. |
Implementation Reference
- src/index.ts:1630-1639 (handler)Handler function for the check_progress tool. Extracts job_id from arguments and delegates to readJobProgress() to get the status of a background transcription job.
if (name === "check_progress") { const jobId = args?.job_id as string; if (!jobId) return { content: [{ type: "text", text: "job_id is required." }], isError: true }; try { const result = await readJobProgress(jobId); return { content: [{ type: "text", text: result }] }; } catch (err: any) { return { content: [{ type: "text", text: `Error reading job progress:\n\n${err?.message || String(err)}` }], isError: true }; } } - src/index.ts:931-943 (schema)Input schema definition for check_progress tool. Declares the tool name, description, and input schema requiring a single 'job_id' string parameter.
name: "check_progress", description: "Check the status of a background transcription job started with transcribe_audio (background=true). " + "Returns current progress, elapsed time, last processed timestamp, and the transcript when complete. " + "Call this repeatedly until the job shows as complete or failed.", inputSchema: { type: "object", properties: { job_id: { type: "string", description: "Job ID returned by transcribe_audio when background=true." }, }, required: ["job_id"], }, }, - src/index.ts:886-1127 (registration)Tool registration via ListToolsRequestSchema handler. The check_progress tool is registered alongside all other tools in the tools array.
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: "transcribe_audio", description: "Transcribe a single audio or video file using whisper.cpp on Windows. " + "Natively supports mp3 and wav. Automatically converts mp4, mkv, avi, mov, " + "webm, m4a, flac, ogg etc. via FFmpeg — no manual conversion needed. " + "Can output plain text, timestamps, JSON, or SRT subtitle files. " + "For files that may take more than 4 minutes, set background=true to run as a detached job " + "and use check_progress to monitor it.", inputSchema: { type: "object", properties: { file_path: { type: "string", description: "Absolute Windows path, e.g. C:\\Users\\You\\Downloads\\recording.mp4" }, model: { type: "string", description: "Override model path. Leave blank to use active model." }, language: { type: "string", description: "Language code (e.g. en, ja, es, fr) or 'auto' to detect automatically. Defaults to en.", default: "en" }, output_format: { type: "string", enum: ["text", "timestamps", "json", "srt"], description: "text = plain (default), timestamps = with time codes, json = structured, srt = subtitle file saved next to source.", default: "text", }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, save_to_file: { type: "boolean", description: "Save transcript as .txt next to the source file.", default: false }, background: { type: "boolean", description: "Run as a detached background job. Returns a job ID immediately. Use check_progress to monitor. Recommended for files over 10 minutes.", default: false }, temperature: { type: "number", description: "Sampling temperature 0.0–1.0. Default 0.0 (deterministic). Higher values reduce hallucination on noisy audio at the cost of consistency." }, prompt: { type: "string", description: "Prior context string injected before transcription. Improves accuracy for domain-specific vocabulary, speaker names, or technical terms. Example: 'Names: Keemstar, DramaAlert.'" }, condition_on_prev_text: { type: "boolean", description: "Re-enable conditioning each segment on its own prior output (removes --max-context 0 flag). Default false (off). Only enable for highly structured audio where context continuity helps.", default: false }, no_speech_thold: { type: "number", description: "Confidence threshold below which segments are treated as silence rather than transcribed. Default 0.6.", default: 0.6 }, beam_size: { type: "number", description: "Beam search width. Higher = more accurate but slower. Default 5." }, best_of: { type: "number", description: "Number of candidate sequences to evaluate. Default 5." }, gpu_device: { type: "number", description: "GPU device index for multi-GPU systems. Use check_system to see available GPUs. Default 0." }, processors: { type: "number", description: "Number of parallel processors for chunk processing. Default 1." }, word_timestamps: { type: "boolean", description: "Output one word per timestamped segment (sets --max-len 1 --split-on-word). Useful for clip alignment and precise timecode search.", default: false }, max_segment_length: { type: "number", description: "Maximum segment length in characters. Controls line break frequency in output. Ignored when word_timestamps=true." }, split_on_word: { type: "boolean", description: "Split segments at word boundaries rather than mid-word. Defaults to false.", default: false }, diarize: { type: "boolean", description: "Stereo speaker diarization — labels left/right channel speakers in transcript. Requires stereo audio with speakers on separate channels.", default: false }, vad_model: { type: "string", description: "Absolute path to a Silero VAD model .bin file. When provided, voice activity detection strips silence before transcription — reduces hallucinations and speeds up processing. Download via download_model." }, offset_t: { type: "number", description: "Start transcription at this offset in milliseconds. Use to process a specific section of a file." }, duration: { type: "number", description: "Process only this many milliseconds of audio starting from offset_t (or the beginning). Use with offset_t to target a specific time window." }, }, required: ["file_path"], }, }, { name: "check_progress", description: "Check the status of a background transcription job started with transcribe_audio (background=true). " + "Returns current progress, elapsed time, last processed timestamp, and the transcript when complete. " + "Call this repeatedly until the job shows as complete or failed.", inputSchema: { type: "object", properties: { job_id: { type: "string", description: "Job ID returned by transcribe_audio when background=true." }, }, required: ["job_id"], }, }, { name: "transcribe_batch", description: "Transcribe multiple audio/video files in a folder interactively, one file at a time. " + "Shows a preview of each transcript and waits for confirmation before continuing. " + "Saves each transcript as a .txt file next to its source. " + "Files already transcribed (with matching .txt) are shown as done and skipped. " + "Supported formats: mp3, wav, mp4, mkv, avi, mov, webm, m4a, flac, ogg. " + "NOTE: For large unattended batch jobs, use whisper-cli.exe directly from the command line " + "— see TROUBLESHOOTING.md for the command syntax.", inputSchema: { type: "object", properties: { folder_path: { type: "string", description: "Absolute Windows path to the folder." }, file_index: { type: "number", description: "Which file to process (1-based). Omit to list files first.", }, language: { type: "string", description: "Language code. Defaults to en.", default: "en" }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, recursive: { type: "boolean", description: "Include subfolders. Defaults to false.", default: false }, }, required: ["folder_path"], }, }, { name: "generate_subtitles", description: "Generate subtitle files for an audio or video file using whisper.cpp. " + "Set language='auto' to detect the spoken language automatically. " + "Set translate_to_english=true to also generate an English translation subtitle file. " + "When both are requested, two .srt files are saved: one in the original language (e.g. film.ja.srt) " + "and one English translation (film.en.srt). " + "Load in VLC via Subtitle → Add Subtitle File. " + "Supports all standard formats plus .3gp and .ts.", inputSchema: { type: "object", properties: { file_path: { type: "string", description: "Absolute Windows path to the file." }, language: { type: "string", description: "Language code (e.g. ja, es, fr, de) or 'auto' to detect automatically. Defaults to en.", default: "en", }, translate_to_english: { type: "boolean", description: "Also generate an English translation .srt alongside the native language .srt. Only applies when language is not 'en'. Not available in background mode.", default: false, }, background: { type: "boolean", description: "Run as a detached background job — recommended for files over 10 minutes. Returns a job ID to use with check_progress. translate_to_english is not available in background mode.", default: false, }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, temperature: { type: "number", description: "Sampling temperature 0.0–1.0. Default 0.0." }, prompt: { type: "string", description: "Prior context string for domain-specific vocabulary or speaker names." }, beam_size: { type: "number", description: "Beam search width. Higher = more accurate, slower. Default 5." }, best_of: { type: "number", description: "Candidate sequences evaluated. Default 5." }, diarize: { type: "boolean", description: "Stereo speaker diarization. Requires stereo audio with speakers on separate channels.", default: false }, vad_model: { type: "string", description: "Path to Silero VAD model .bin. Strips silence before transcription. Download via download_model." }, }, required: ["file_path"], }, }, { name: "check_config", description: "Verify whisper-cli.exe, model, and FFmpeg are all available. Run this first if anything fails.", inputSchema: { type: "object", properties: {} }, }, { name: "start_batch", description: "Start an automated sequential batch transcription of all untranscribed files in a folder. " + "Scans for files without a matching .txt, sorts by duration (shortest first), " + "and processes them one at a time as background jobs. " + "Each file is validated after completion — empty or suspiciously short outputs are flagged. " + "Returns a batch ID to use with check_batch_progress.", inputSchema: { type: "object", properties: { folder_path: { type: "string", description: "Absolute Windows path to the folder." }, language: { type: "string", description: "Language code. Defaults to en.", default: "en" }, threads: { type: "number", description: `CPU threads. Defaults to ${WHISPER_THREADS} of ${SYSTEM_THREADS}.` }, }, required: ["folder_path"], }, }, { name: "check_batch_progress", description: "Check the status of a batch started with start_batch. " + "Automatically advances to the next file when the current one finishes. " + "Returns overall progress, current file, failed files, and elapsed time. " + "Call repeatedly until the batch shows as complete.", inputSchema: { type: "object", properties: { batch_id: { type: "string", description: "Batch ID returned by start_batch." }, }, required: ["batch_id"], }, }, { name: "analyze_media", description: "Analyze one or more media files using FFprobe before transcribing. " + "For a single file: returns duration, size, codec, and estimated transcription time on CPU and GPU. " + "For a folder: scans all supported media files and returns a sorted table with the same info for each. " + "Use this to plan batch work, estimate how long transcription will take, or check what's already been transcribed.", inputSchema: { type: "object", properties: { path: { type: "string", description: "Absolute Windows path to a single file or a folder.", }, sort_by: { type: "string", enum: ["duration", "name", "size"], description: "For folder scans: sort order. Defaults to duration (shortest first).", default: "duration", }, }, required: ["path"], }, }, { name: "check_system", description: "Detect GPU hardware and verify Vulkan acceleration is available. " + "Reports GPU name, VRAM, whether the Vulkan binary is installed, " + "and recommends the best Whisper model for your hardware. " + "Run this if you want to confirm GPU acceleration is working or diagnose why it isn't.", inputSchema: { type: "object", properties: {} }, }, { name: "list_models", description: "List all Whisper model files installed in your models directory. " + "Shows filename, size, whether it is currently active, quantization status, " + "and recommended use case for each model. " + "No network calls — reads local filesystem only.", inputSchema: { type: "object", properties: {} }, }, { name: "download_model", description: "Download a Whisper model from Hugging Face directly into your models directory. " + "Accepts a model name (e.g. large-v3-turbo, medium.en-q5_0) and handles the download automatically. " + "Downloads only from trusted Hugging Face namespaces (ggerganov/whisper.cpp and ggml-org). " + "After downloading, use switch_model to activate it for the current session.", inputSchema: { type: "object", properties: { model_name: { type: "string", description: "Model name to download, e.g. 'large-v3-turbo', 'medium.en-q5_0', 'large-v3-turbo-q5_0'. Use list_models to see what is already installed.", }, }, required: ["model_name"], }, }, { name: "switch_model", description: "Switch the active Whisper model for the current session without restarting Claude Desktop. " + "Accepts a model filename (e.g. ggml-large-v3-turbo.bin) or full path. " + "The model must already be installed in your models directory. " + "Use list_models to see installed models, download_model to add new ones. " + "Change is session-scoped — does not persist after Claude Desktop restarts.", inputSchema: { type: "object", properties: { model_name: { type: "string", description: "Model filename (e.g. ggml-large-v3-turbo.bin) or full path. Must be a .bin file in the configured models directory.", }, }, required: ["model_name"], }, }, ], })); - src/index.ts:261-337 (helper)Helper function readJobProgress() that implements the core logic for check_progress. Reads the job JSON file, parses the whisper log to extract the last timestamp, checks if the process is still running via tasklist, and returns status (complete/failed/in-progress) with appropriate details.
async function readJobProgress(jobId: string): Promise<string> { const jobPath = join(JOBS_DIR, `${jobId}.json`); if (!existsSync(jobPath)) { return `❌ Job not found: ${jobId}\n\nThe job file may have been deleted or the ID is incorrect.`; } const job: Job = JSON.parse(readFileSync(jobPath, "utf8")); // Read log let logContent = ""; if (existsSync(job.logPath)) { logContent = readFileSync(job.logPath, "utf8"); } const lastSec = parseLastTimestamp(logContent); const isRunning = await isPidRunning(job.pid); const ext = job.outputFormat === "srt" ? ".srt" : ".txt"; const tmpOutput = `${job.tmpOutputBase}${ext}`; const outputExists = existsSync(job.outputPath) || existsSync(tmpOutput); // Completed if (!isRunning && outputExists) { // Move temp output file to correct destination if needed const ext = job.outputFormat === "srt" ? ".srt" : ".txt"; const tmpOutput = `${job.tmpOutputBase}${ext}`; if (existsSync(tmpOutput) && tmpOutput !== job.outputPath) { try { writeFileSync(job.outputPath, readFileSync(tmpOutput, "utf8"), "utf8"); unlinkSync(tmpOutput); } catch { } } job.status = "complete"; writeFileSync(job.jobPath, JSON.stringify(job, null, 2), "utf8"); // Clean up tmp wav if present if (job.isTmp && existsSync(job.transcribeFrom)) { try { unlinkSync(job.transcribeFrom); } catch { } } const outputContent = readFileSync(job.outputPath, "utf8").trim(); const preview = job.outputFormat === "srt" ? outputContent.split("\n").slice(0, 20).join("\n") : outputContent.slice(0, 600); return ( `✅ Complete!\n\n` + `Source: ${basename(job.sourceFile)}\n` + `Output: ${job.outputPath}\n\n` + `Preview:\n${preview}${outputContent.length > 600 && job.outputFormat !== "srt" ? "..." : ""}` ); } // Failed if (!isRunning && !outputExists) { job.status = "failed"; writeFileSync(job.jobPath, JSON.stringify(job, null, 2), "utf8"); const lastLines = logContent.split(/\r?\n/).filter(l => l.trim()).slice(-5).join("\n"); return ( `❌ Failed or cancelled.\n\n` + `Source: ${basename(job.sourceFile)}\n` + `No output found at: ${job.outputPath}\n\n` + `Last log output:\n${lastLines || "(empty)"}` ); } // Still running const elapsed = Math.round((Date.now() - new Date(job.startTime).getTime()) / 1000); const progressLine = lastSec > 0 ? `Last segment: ${formatDuration(lastSec)}` : "Starting up..."; return ( `⏳ In progress...\n\n` + `Source: ${basename(job.sourceFile)}\n` + `Job ID: ${jobId}\n` + `Elapsed: ${formatDuration(elapsed)}\n` + `${progressLine}\n\n` + `Call check_progress with this job ID to get an update.` ); }