#!/usr/bin/env node
/**
* Guaranteed Transcript Test - Test robust transcript acquisition strategies
*/
import { TranscriptAcquisition } from './modules/transcript-acquisition.js';
import { createLearnLogger } from './modules/utils/custom-logger.js';
// Test URLs with different transcript availability scenarios
const TEST_VIDEOS = [
{
url: 'https://www.youtube.com/watch?v=scGlmUuq4WM',
id: 'scGlmUuq4WM',
description: 'Guitar tutorial (Arabic) - May have auto-generated subs',
expectedStrategies: ['youtube_transcript_api', 'yt_dlp_auto_subs']
},
{
url: 'https://www.youtube.com/watch?v=dQw4w9WgXcQ',
id: 'dQw4w9WgXcQ',
description: 'Rick Astley - Never Gonna Give You Up (Popular video with manual subs)',
expectedStrategies: ['youtube_transcript_api', 'yt_dlp_manual_subs']
},
{
url: 'https://www.youtube.com/watch?v=jNQXAC9IVRw',
id: 'jNQXAC9IVRw',
description: 'Me at the zoo (First YouTube video - likely no subs)',
expectedStrategies: ['yt_dlp_auto_subs', 'yt_dlp_manual_subs']
}
];
class GuaranteedTranscriptTest {
constructor() {
this.logger = createLearnLogger('TranscriptTest');
this.transcriptAcquisition = new TranscriptAcquisition({
strategies: [
'youtube_transcript_api',
'yt_dlp_auto_subs',
'yt_dlp_manual_subs'
],
maxRetries: 2,
timeoutMs: 20000,
fallbackToAudio: false
});
}
async run() {
console.log(`
šÆ GUARANTEED TRANSCRIPT ACQUISITION TEST
Testing multi-strategy transcript acquisition system
Strategies: YouTube API ā yt-dlp auto-subs ā yt-dlp manual subs
Goal: Guarantee transcript extraction for maximum content mining
`);
const results = {
totalTests: TEST_VIDEOS.length,
successful: 0,
failed: 0,
strategies: {},
details: []
};
for (const video of TEST_VIDEOS) {
console.log(`\n${'='.repeat(80)}`);
console.log(`š¬ Testing: ${video.description}`);
console.log(`URL: ${video.url}`);
console.log(`Expected strategies: ${video.expectedStrategies.join(', ')}`);
console.log('');
try {
const startTime = Date.now();
const transcriptResult = await this.transcriptAcquisition.getTranscript(
video.url,
video.id,
{ title: video.description }
);
const duration = Date.now() - startTime;
console.log('ā
SUCCESS!');
console.log(`Method: ${transcriptResult.method}`);
console.log(`Confidence: ${Math.round(transcriptResult.confidence * 100)}%`);
console.log(`Language: ${transcriptResult.language}`);
console.log(`Duration: ${duration}ms`);
if (Array.isArray(transcriptResult.transcript)) {
console.log(`Segments: ${transcriptResult.transcript.length}`);
console.log(`Total text length: ${transcriptResult.transcript.map(s => s.text).join(' ').length} chars`);
// Show first few segments
console.log('\nš Sample segments:');
transcriptResult.transcript.slice(0, 3).forEach((segment, index) => {
const timestamp = this.formatTimestamp(segment.start);
console.log(`${index + 1}. [${timestamp}] ${segment.text.substring(0, 80)}...`);
});
} else {
console.log(`Text length: ${transcriptResult.transcript.length} chars`);
console.log(`Sample: ${transcriptResult.transcript.substring(0, 150)}...`);
}
// Show attempts made
if (transcriptResult.attempts && transcriptResult.attempts.length > 0) {
console.log('\nš Attempts made:');
transcriptResult.attempts.forEach((attempt, index) => {
console.log(`${index + 1}. ${attempt.strategy}: ${attempt.success ? 'ā
' : 'ā'} ${attempt.error || ''}`);
});
}
results.successful++;
results.strategies[transcriptResult.method] = (results.strategies[transcriptResult.method] || 0) + 1;
results.details.push({
video: video.description,
success: true,
method: transcriptResult.method,
confidence: transcriptResult.confidence,
duration,
segmentCount: Array.isArray(transcriptResult.transcript) ? transcriptResult.transcript.length : 0,
textLength: Array.isArray(transcriptResult.transcript)
? transcriptResult.transcript.map(s => s.text).join(' ').length
: transcriptResult.transcript.length
});
} catch (error) {
console.log('ā FAILED!');
console.log(`Error: ${error.message}`);
results.failed++;
results.details.push({
video: video.description,
success: false,
error: error.message
});
}
}
this.displaySummary(results);
}
displaySummary(results) {
console.log(`\n${'='.repeat(80)}`);
console.log('š TRANSCRIPT ACQUISITION SUMMARY');
console.log(`${'='.repeat(80)}`);
console.log(`\nšÆ OVERALL RESULTS:`);
console.log(`Total tests: ${results.totalTests}`);
console.log(`Successful: ${results.successful} (${Math.round(results.successful / results.totalTests * 100)}%)`);
console.log(`Failed: ${results.failed} (${Math.round(results.failed / results.totalTests * 100)}%)`);
if (Object.keys(results.strategies).length > 0) {
console.log(`\nš ļø STRATEGY EFFECTIVENESS:`);
Object.entries(results.strategies).forEach(([strategy, count]) => {
console.log(`${strategy}: ${count} successes`);
});
}
console.log(`\nš DETAILED RESULTS:`);
results.details.forEach((detail, index) => {
console.log(`\n${index + 1}. ${detail.video}`);
if (detail.success) {
console.log(` ā
Success via ${detail.method}`);
console.log(` š Confidence: ${Math.round(detail.confidence * 100)}%`);
console.log(` ā±ļø Duration: ${detail.duration}ms`);
console.log(` š Content: ${detail.segmentCount} segments, ${detail.textLength} chars`);
} else {
console.log(` ā Failed: ${detail.error}`);
}
});
console.log(`\nš TRANSCRIPT GUARANTEE ANALYSIS:`);
if (results.successful === results.totalTests) {
console.log('ā
100% SUCCESS RATE - Transcript guarantee achieved!');
console.log('šÆ The multi-strategy approach ensures reliable transcript acquisition');
console.log('š This enables consistent hyper-granular content mining');
} else if (results.successful > 0) {
console.log(`ā
${Math.round(results.successful / results.totalTests * 100)}% success rate`);
console.log('š§ Some strategies working - system provides good coverage');
console.log('š” Consider adding more fallback strategies for 100% guarantee');
} else {
console.log('ā No transcripts acquired - system needs debugging');
console.log('š§ Check yt-dlp installation and network connectivity');
}
console.log(`\nš² FOREST INTEGRATION IMPACT:`);
console.log('With guaranteed transcripts, Forest can now:');
console.log('⢠Generate timestamped, specific tasks');
console.log('⢠Reference exact moments in learning videos');
console.log('⢠Extract step-by-step instructions with precision');
console.log('⢠Identify common mistakes with context');
console.log('⢠Create practice exercises based on video content');
console.log('⢠Build skill progressions from actual demonstrations');
console.log(`\nšÆ NEXT STEPS:`);
if (results.successful > 0) {
console.log('ā
System is working - ready for production use');
console.log('š§ Consider adding audio transcription fallback for 100% coverage');
console.log('š Integrate with LearnMCP for hyper-granular content mining');
} else {
console.log('š§ Debug transcript acquisition strategies');
console.log('š Verify yt-dlp installation and configuration');
console.log('š Check network connectivity and video accessibility');
}
}
formatTimestamp(seconds) {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, '0')}`;
}
}
// Run the test
if (import.meta.url === `file://${process.argv[1]}`) {
const test = new GuaranteedTranscriptTest();
process.on('SIGINT', () => {
console.log('\nš Test interrupted by user');
process.exit(0);
});
test.run().catch(error => {
console.error('ā Guaranteed transcript test failed:', error.message);
process.exit(1);
});
}