/**
* Transcript Acquisition System
* Multi-strategy approach to guarantee transcript extraction from videos
*/
import { spawn } from 'child_process';
import { YoutubeTranscript } from 'youtube-transcript';
import { createLearnLogger } from './utils/custom-logger.js';
export class TranscriptAcquisition {
constructor(options = {}) {
this.logger = createLearnLogger('TranscriptAcquisition');
// Configuration
this.strategies = options.strategies || [
'youtube_transcript_api',
'yt_dlp_auto_subs',
'yt_dlp_manual_subs',
'whisper_ai_transcription',
'assembly_ai_fallback'
];
this.maxRetries = options.maxRetries || 3;
this.timeoutMs = options.timeoutMs || 30000;
this.fallbackToAudio = options.fallbackToAudio !== false; // Default true
}
/**
* Get transcript using all available strategies until success
*/
async getTranscript(videoUrl, videoId, metadata = {}) {
this.logger.info('Starting transcript acquisition', {
videoUrl,
videoId,
strategiesCount: this.strategies.length,
title: metadata.title
});
const results = {
transcript: null,
method: null,
confidence: 0,
language: metadata.language || 'unknown',
attempts: [],
fallbackUsed: false
};
// Try each strategy in order
for (const strategy of this.strategies) {
try {
this.logger.info(`Attempting strategy: ${strategy}`, { videoId });
const result = await this.executeStrategy(strategy, videoUrl, videoId, metadata);
if (result.success && result.transcript) {
results.transcript = result.transcript;
results.method = strategy;
results.confidence = result.confidence || 0.8;
results.language = result.language || results.language;
this.logger.info('Transcript acquisition successful', {
method: strategy,
transcriptLength: result.transcript.length,
confidence: results.confidence
});
return results;
}
results.attempts.push({
strategy,
success: false,
error: result.error || 'No transcript returned'
});
} catch (error) {
this.logger.warn(`Strategy ${strategy} failed`, {
error: error.message,
videoId
});
results.attempts.push({
strategy,
success: false,
error: error.message
});
}
}
// If all strategies failed, try audio transcription as last resort
if (this.fallbackToAudio) {
try {
this.logger.info('All transcript strategies failed, attempting audio transcription', { videoId });
const audioResult = await this.transcribeFromAudio(videoUrl, videoId, metadata);
if (audioResult.success) {
results.transcript = audioResult.transcript;
results.method = 'audio_transcription';
results.confidence = audioResult.confidence || 0.6;
results.fallbackUsed = true;
this.logger.info('Audio transcription successful', {
transcriptLength: audioResult.transcript.length
});
return results;
}
} catch (error) {
this.logger.error('Audio transcription fallback failed', {
error: error.message,
videoId
});
}
}
// Complete failure
this.logger.error('All transcript acquisition strategies failed', {
videoId,
attemptsCount: results.attempts.length
});
throw new Error(`Failed to acquire transcript after ${results.attempts.length} attempts`);
}
/**
* Execute a specific transcript acquisition strategy
*/
async executeStrategy(strategy, videoUrl, videoId, metadata) {
switch (strategy) {
case 'youtube_transcript_api':
return await this.getYouTubeTranscriptAPI(videoId);
case 'yt_dlp_auto_subs':
return await this.getYtDlpAutoSubs(videoUrl);
case 'yt_dlp_manual_subs':
return await this.getYtDlpManualSubs(videoUrl);
case 'whisper_ai_transcription':
return await this.getWhisperTranscription(videoUrl, videoId);
case 'assembly_ai_fallback':
return await this.getAssemblyAITranscription(videoUrl, videoId);
default:
throw new Error(`Unknown strategy: ${strategy}`);
}
}
/**
* Strategy 1: YouTube Transcript API (youtube-transcript package)
*/
async getYouTubeTranscriptAPI(videoId) {
try {
const transcriptData = await YoutubeTranscript.fetchTranscript(videoId, {
lang: 'en',
country: 'US'
});
if (!transcriptData || transcriptData.length === 0) {
return { success: false, error: 'No transcript data returned' };
}
// Convert to our standard format
const transcript = transcriptData.map(item => ({
start: item.offset / 1000, // Convert to seconds
duration: item.duration / 1000,
text: item.text
}));
return {
success: true,
transcript,
confidence: 0.9,
language: 'en',
format: 'timestamped'
};
} catch (error) {
return { success: false, error: error.message };
}
}
/**
* Strategy 2: yt-dlp with auto-generated subtitles
*/
async getYtDlpAutoSubs(videoUrl) {
return new Promise((resolve) => {
const ytDlpPath = this.findYtDlpPath();
const args = [
'--write-auto-sub',
'--sub-lang', 'en',
'--sub-format', 'vtt',
'--skip-download',
'--print', 'filename',
videoUrl
];
const process = spawn(ytDlpPath, args, {
stdio: ['pipe', 'pipe', 'pipe']
});
let stdout = '';
let stderr = '';
process.stdout.on('data', (data) => {
stdout += data.toString();
});
process.stderr.on('data', (data) => {
stderr += data.toString();
});
process.on('close', (code) => {
if (code === 0 && stdout.trim()) {
try {
// Parse VTT format and convert to our format
const transcript = this.parseVTTContent(stdout);
resolve({
success: true,
transcript,
confidence: 0.7, // Auto-generated subs are less accurate
language: 'en',
format: 'timestamped'
});
} catch (error) {
resolve({ success: false, error: `VTT parsing failed: ${error.message}` });
}
} else {
resolve({ success: false, error: stderr || 'yt-dlp auto-subs failed' });
}
});
// Timeout handling
setTimeout(() => {
process.kill();
resolve({ success: false, error: 'yt-dlp auto-subs timeout' });
}, this.timeoutMs);
});
}
/**
* Strategy 3: yt-dlp with manual subtitles
*/
async getYtDlpManualSubs(videoUrl) {
return new Promise((resolve) => {
const ytDlpPath = this.findYtDlpPath();
const args = [
'--write-sub',
'--sub-lang', 'en',
'--sub-format', 'vtt',
'--skip-download',
'--print', 'filename',
videoUrl
];
const process = spawn(ytDlpPath, args, {
stdio: ['pipe', 'pipe', 'pipe']
});
let stdout = '';
let stderr = '';
process.stdout.on('data', (data) => {
stdout += data.toString();
});
process.stderr.on('data', (data) => {
stderr += data.toString();
});
process.on('close', (code) => {
if (code === 0 && stdout.trim()) {
try {
const transcript = this.parseVTTContent(stdout);
resolve({
success: true,
transcript,
confidence: 0.95, // Manual subs are highly accurate
language: 'en',
format: 'timestamped'
});
} catch (error) {
resolve({ success: false, error: `VTT parsing failed: ${error.message}` });
}
} else {
resolve({ success: false, error: stderr || 'yt-dlp manual subs failed' });
}
});
setTimeout(() => {
process.kill();
resolve({ success: false, error: 'yt-dlp manual subs timeout' });
}, this.timeoutMs);
});
}
/**
* Strategy 4: Whisper AI transcription (requires audio extraction)
*/
async getWhisperTranscription(videoUrl, videoId) {
try {
// First extract audio
const audioPath = await this.extractAudio(videoUrl, videoId);
// Then transcribe with Whisper
const transcriptResult = await this.runWhisperTranscription(audioPath);
// Cleanup audio file
await this.cleanupFile(audioPath);
return transcriptResult;
} catch (error) {
return { success: false, error: error.message };
}
}
/**
* Strategy 5: AssemblyAI transcription service
*/
async getAssemblyAITranscription(videoUrl, videoId) {
// Placeholder for AssemblyAI integration
return { success: false, error: 'AssemblyAI integration not implemented' };
}
/**
* Audio transcription fallback
*/
async transcribeFromAudio(videoUrl, videoId, metadata) {
try {
this.logger.info('Attempting audio transcription fallback', { videoId });
// Extract audio from video
const audioPath = await this.extractAudio(videoUrl, videoId);
// Try Whisper transcription
const result = await this.runWhisperTranscription(audioPath);
// Cleanup
await this.cleanupFile(audioPath);
return result;
} catch (error) {
return { success: false, error: error.message };
}
}
// Helper methods
/**
* Find yt-dlp executable path
*/
findYtDlpPath() {
const paths = [
'/Users/bretmeraki/Library/Python/3.9/bin/yt-dlp',
'/usr/local/bin/yt-dlp',
'yt-dlp'
];
// For now, return the known working path
return paths[0];
}
/**
* Parse VTT subtitle format
*/
parseVTTContent(vttContent) {
const lines = vttContent.split('\n');
const transcript = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
// Look for timestamp lines (format: 00:00:00.000 --> 00:00:03.000)
if (line.includes('-->')) {
const [startTime, endTime] = line.split('-->').map(t => t.trim());
const start = this.parseVTTTimestamp(startTime);
const end = this.parseVTTTimestamp(endTime);
// Get the text from the next non-empty line
let textLine = '';
for (let j = i + 1; j < lines.length; j++) {
if (lines[j].trim() && !lines[j].includes('-->')) {
textLine = lines[j].trim();
break;
}
}
if (textLine) {
transcript.push({
start,
duration: end - start,
text: textLine
});
}
}
}
return transcript;
}
/**
* Parse VTT timestamp to seconds
*/
parseVTTTimestamp(timestamp) {
const parts = timestamp.split(':');
const seconds = parts.pop().split('.')[0];
const minutes = parts.pop() || 0;
const hours = parts.pop() || 0;
return parseInt(hours) * 3600 + parseInt(minutes) * 60 + parseInt(seconds);
}
/**
* Extract audio from video (placeholder)
*/
async extractAudio(videoUrl, videoId) {
// This would use yt-dlp to extract audio
throw new Error('Audio extraction not implemented yet');
}
/**
* Run Whisper transcription (placeholder)
*/
async runWhisperTranscription(audioPath) {
// This would use OpenAI Whisper or similar
throw new Error('Whisper transcription not implemented yet');
}
/**
* Cleanup temporary files
*/
async cleanupFile(filePath) {
// File cleanup implementation
}
}