/**
* Content Summarizer
* Extracts hyper-granular, actionable content for precise task generation
*/
import { createLearnLogger } from './utils/custom-logger.js';
import { TranscriptMiner } from './transcript-miner.js';
export class ContentSummarizer {
constructor(options = {}) {
this.logger = createLearnLogger('ContentSummarizer');
// Configuration for hyper-granular extraction
this.maxChunkSize = options.maxChunkSize || 4000; // tokens
this.overlapSize = options.overlapSize || 200; // tokens for overlap
this.summaryLength = options.summaryLength || 'detailed'; // detailed for granular content
this.summaryStyle = options.summaryStyle || 'actionable'; // actionable, step-by-step
this.extractionMode = options.extractionMode || 'hyper_granular'; // hyper_granular, standard
// Initialize transcript miner
this.transcriptMiner = new TranscriptMiner({
miningSensitivity: 'high',
actionThreshold: 0.6
});
}
/**
* Extract hyper-granular, actionable content
*/
async summarize(extractedContent, options = {}) {
try {
this.logger.summarizationStart(extractedContent.sourceId, 'hyper_granular');
const summaryOptions = {
length: options.length || this.summaryLength,
style: options.style || this.summaryStyle,
focusAreas: options.focusAreas || [],
maxTokens: options.maxTokens || 2000, // Increased for granular content
extractionMode: options.extractionMode || this.extractionMode,
};
// Get the main text content
const textContent = this.extractTextContent(extractedContent);
if (!textContent || textContent.length < 50) {
// For short content, extract what we can from metadata
return this.extractFromMetadata(extractedContent, summaryOptions);
}
// Perform hyper-granular analysis
const granularAnalysis = await this.performGranularAnalysis(textContent, extractedContent, summaryOptions);
const summaryData = {
sourceId: extractedContent.sourceId,
sourceType: extractedContent.type,
sourceUrl: extractedContent.url,
// Hyper-granular content structure
summary: granularAnalysis.detailedSummary,
summaryMethod: granularAnalysis.method,
summaryOptions,
// Granular extractions
actionableSteps: granularAnalysis.actionableSteps,
specificTechniques: granularAnalysis.specificTechniques,
toolsAndResources: granularAnalysis.toolsAndResources,
timeBasedBreakdown: granularAnalysis.timeBasedBreakdown,
skillProgression: granularAnalysis.skillProgression,
commonMistakes: granularAnalysis.commonMistakes,
prerequisites: granularAnalysis.prerequisites,
measurableOutcomes: granularAnalysis.measurableOutcomes,
metadata: {
title: extractedContent.metadata?.title || 'Unknown',
author: extractedContent.metadata?.author || null,
originalLength: textContent.length,
summaryLength: granularAnalysis.detailedSummary.length,
compressionRatio: Math.round((granularAnalysis.detailedSummary.length / textContent.length) * 100) / 100,
granularityScore: granularAnalysis.granularityScore,
actionabilityScore: granularAnalysis.actionabilityScore,
relevanceScore: this.calculateRelevanceScore(extractedContent, summaryOptions),
created: new Date().toISOString(),
},
// Enhanced extractions
keyPoints: granularAnalysis.keyPoints,
tags: granularAnalysis.tags,
difficulty: granularAnalysis.difficulty,
estimatedDuration: granularAnalysis.estimatedDuration,
};
this.logger.summarizationComplete(
extractedContent.sourceId,
granularAnalysis.method,
summaryData.metadata.compressionRatio,
{
granularityScore: granularAnalysis.granularityScore,
actionabilityScore: granularAnalysis.actionabilityScore,
actionableSteps: granularAnalysis.actionableSteps.length,
},
);
return summaryData;
} catch (error) {
this.logger.error('Content summarization failed', {
error: error.message,
stack: error.stack,
});
throw error;
}
}
/**
* Perform hyper-granular analysis of content
*/
async performGranularAnalysis(textContent, extractedContent, options) {
try {
// Try Claude-powered granular analysis first
try {
return await this.claudeGranularAnalysis(textContent, extractedContent, options);
} catch (error) {
this.logger.warn('Claude granular analysis failed, using advanced extractive', {
error: error.message,
});
}
// Advanced extractive granular analysis
return this.advancedExtractiveAnalysis(textContent, extractedContent, options);
} catch (error) {
this.logger.error('Granular analysis failed', { error: error.message });
throw error;
}
}
/**
* Claude-powered hyper-granular analysis (placeholder)
*/
async claudeGranularAnalysis(textContent, extractedContent, options) {
// This would integrate with Claude API for sophisticated analysis
throw new Error('Claude granular analysis not implemented yet');
}
/**
* Advanced extractive granular analysis with transcript mining
*/
async advancedExtractiveAnalysis(textContent, extractedContent, options) {
const analysis = {
method: 'advanced_extractive',
granularityScore: 0,
actionabilityScore: 0,
};
// Check if we have transcript data for enhanced mining
const hasTranscript = extractedContent.content?.transcript &&
extractedContent.content.transcript.length > 100;
if (hasTranscript) {
this.logger.info('Using transcript miner for enhanced analysis', {
transcriptLength: extractedContent.content.transcript.length,
sourceType: extractedContent.type
});
// Use transcript miner for hyper-granular extraction
const transcriptResults = await this.transcriptMiner.mineTranscript(
extractedContent.content.transcript,
extractedContent.metadata
);
// Merge transcript mining results with traditional extraction
analysis.actionableSteps = [
...transcriptResults.stepByStepInstructions.map(item => ({
action: item.instruction,
type: item.type,
confidence: item.confidence,
timestamp: item.timestamp,
context: item.context
})),
...this.extractActionableSteps(textContent, extractedContent)
];
analysis.specificTechniques = [
...transcriptResults.specificTechniques.map(item => ({
name: item.technique,
type: item.category,
confidence: item.confidence,
timestamp: item.timestamp
})),
...this.extractSpecificTechniques(textContent, extractedContent)
];
analysis.toolsAndResources = [
...transcriptResults.toolsAndEquipment.map(item => ({
name: item.tool,
type: item.category,
confidence: item.confidence,
timestamp: item.timestamp
})),
...this.extractToolsAndResources(textContent, extractedContent)
];
analysis.timeBasedBreakdown = transcriptResults.timeBasedActions.map(item => ({
timestamp: item.timestamp,
content: item.action,
confidence: item.confidence,
actionType: item.actionType
}));
analysis.commonMistakes = transcriptResults.commonMistakes.map(item => ({
mistake: item.mistake,
type: 'transcript_extracted',
confidence: item.confidence,
timestamp: item.timestamp,
severity: item.severity
}));
// Add transcript-specific extractions
analysis.practiceExercises = transcriptResults.practiceExercises || [];
analysis.keyTerminology = transcriptResults.keyTerminology || [];
analysis.transcriptMetadata = transcriptResults.miningMetadata;
// Enhanced scoring with transcript data
analysis.granularityScore = Math.min(10, transcriptResults.miningMetadata.granularityLevel === 'ultra' ? 10 :
transcriptResults.miningMetadata.granularityLevel === 'high' ? 8 :
transcriptResults.miningMetadata.granularityLevel === 'medium' ? 6 : 4);
analysis.actionabilityScore = Math.min(10, transcriptResults.miningMetadata.actionabilityScore);
analysis.method = 'transcript_enhanced_extractive';
} else {
// Fallback to traditional extraction methods
this.logger.info('No transcript available, using traditional extraction', {
contentLength: textContent.length,
sourceType: extractedContent.type
});
analysis.actionableSteps = this.extractActionableSteps(textContent, extractedContent);
analysis.specificTechniques = this.extractSpecificTechniques(textContent, extractedContent);
analysis.toolsAndResources = this.extractToolsAndResources(textContent, extractedContent);
analysis.timeBasedBreakdown = this.extractTimeBasedBreakdown(textContent, extractedContent);
analysis.commonMistakes = this.extractCommonMistakes(textContent, extractedContent);
analysis.practiceExercises = [];
analysis.keyTerminology = [];
// Traditional scoring
analysis.actionabilityScore += analysis.actionableSteps.length * 2;
analysis.granularityScore += analysis.specificTechniques.length * 1.5;
analysis.actionabilityScore += analysis.toolsAndResources.length * 1.5;
analysis.granularityScore += analysis.timeBasedBreakdown.length * 2;
analysis.actionabilityScore += analysis.commonMistakes.length * 1;
}
// Common extractions for both paths
analysis.skillProgression = this.extractSkillProgression(textContent, extractedContent);
analysis.prerequisites = this.extractPrerequisites(textContent, extractedContent);
analysis.measurableOutcomes = this.extractMeasurableOutcomes(textContent, extractedContent);
// Create detailed summary
analysis.detailedSummary = this.createDetailedSummary(analysis, textContent, extractedContent);
// Extract enhanced metadata
analysis.keyPoints = this.extractEnhancedKeyPoints(analysis);
analysis.tags = this.generateEnhancedTags(analysis, extractedContent);
analysis.difficulty = this.assessDifficulty(analysis, extractedContent);
analysis.estimatedDuration = this.estimateDuration(analysis, extractedContent);
// Normalize scores
analysis.granularityScore = Math.min(10, Math.max(1, analysis.granularityScore));
analysis.actionabilityScore = Math.min(10, Math.max(1, analysis.actionabilityScore));
return analysis;
}
/**
* Extract actionable steps from content
*/
extractActionableSteps(textContent, extractedContent) {
const steps = [];
const stepPatterns = [
/(?:step|stage|phase|lesson)\s*\d+[:\-\s]([^.!?]+)/gi,
/(?:first|second|third|next|then|finally)[,\s]+([^.!?]+)/gi,
/(?:start by|begin with|make sure to|remember to)\s+([^.!?]+)/gi,
/(?:practice|try|attempt|work on)\s+([^.!?]+)/gi,
];
stepPatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const step = match[1].trim();
if (step.length > 10 && step.length < 200) {
steps.push({
action: step,
type: 'procedural',
confidence: 0.8,
});
}
}
});
// Extract from video metadata for YouTube content
if (extractedContent.type === 'youtube') {
const title = extractedContent.metadata?.title || '';
const description = extractedContent.metadata?.description || '';
// Extract lesson numbers and topics
const lessonMatch = title.match(/(?:lesson|day|part)\s*(\d+)/i);
if (lessonMatch) {
steps.push({
action: `Complete ${title}`,
type: 'lesson',
sequence: parseInt(lessonMatch[1], 10),
confidence: 0.9,
});
}
// Extract from description
if (description.includes('download') || description.includes('visit')) {
steps.push({
action: 'Download supplementary materials from provided links',
type: 'resource_acquisition',
confidence: 0.7,
});
}
}
return steps.slice(0, 10); // Limit to top 10 most relevant steps
}
/**
* Extract specific techniques mentioned in content
*/
extractSpecificTechniques(textContent, extractedContent) {
const techniques = [];
const techniquePatterns = [
/(?:technique|method|approach|style)\s*[:\-]?\s*([^.!?]+)/gi,
/(?:use|apply|employ)\s+(?:the\s+)?([a-z\s]+(?:technique|method|approach))/gi,
/([a-z\s]+(?:fingering|position|grip|stance|posture))/gi,
];
techniquePatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const technique = match[1].trim();
if (technique.length > 5 && technique.length < 100) {
techniques.push({
name: technique,
type: 'technique',
confidence: 0.7,
});
}
}
});
// Content-specific technique extraction
if (extractedContent.type === 'youtube') {
const keywords = extractedContent.metadata?.keywords || [];
keywords.forEach(keyword => {
if (keyword.toLowerCase().includes('guitar') ||
keyword.toLowerCase().includes('technique') ||
keyword.toLowerCase().includes('method')) {
techniques.push({
name: keyword,
type: 'keyword_technique',
confidence: 0.6,
});
}
});
}
return techniques.slice(0, 8);
}
/**
* Extract tools and resources
*/
extractToolsAndResources(textContent, extractedContent) {
const resources = [];
const resourcePatterns = [
/(http[s]?:\/\/[^\s]+)/gi,
/(?:download|visit|check out|go to)\s+([^\s.!?]+)/gi,
/(?:use|need|require)\s+(?:a|an|the)?\s*([a-z\s]+(?:guitar|pick|tuner|metronome|app|software))/gi,
];
resourcePatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const resource = match[1].trim();
if (resource.length > 3) {
resources.push({
name: resource,
type: pattern === resourcePatterns[0] ? 'url' : 'tool',
confidence: 0.8,
});
}
}
});
// Extract from metadata
if (extractedContent.metadata?.description) {
const description = extractedContent.metadata.description;
const urlMatches = description.match(/http[s]?:\/\/[^\s]+/g);
if (urlMatches) {
urlMatches.forEach(url => {
resources.push({
name: url,
type: 'url',
confidence: 0.9,
});
});
}
}
return resources.slice(0, 6);
}
/**
* Extract time-based breakdown
*/
extractTimeBasedBreakdown(textContent, extractedContent) {
const timeBreakdown = [];
const timePatterns = [
/(?:at|around|after)\s+(\d+:\d+|\d+\s*(?:minutes?|mins?|seconds?|secs?))/gi,
/(?:minute|min)\s*(\d+)[:\-\s]([^.!?]+)/gi,
];
timePatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
timeBreakdown.push({
timestamp: match[1],
content: match[2] || 'Content at this time',
confidence: 0.7,
});
}
});
// For YouTube content, create estimated breakdown
if (extractedContent.type === 'youtube' && extractedContent.metadata?.duration) {
const duration = extractedContent.metadata.duration;
const segments = Math.min(5, Math.max(2, Math.floor(duration / 300))); // 5-minute segments
for (let i = 0; i < segments; i++) {
const startTime = Math.floor((duration / segments) * i);
timeBreakdown.push({
timestamp: `${Math.floor(startTime / 60)}:${(startTime % 60).toString().padStart(2, '0')}`,
content: `Segment ${i + 1}: Core content section`,
confidence: 0.5,
});
}
}
return timeBreakdown.slice(0, 8);
}
/**
* Extract skill progression information
*/
extractSkillProgression(textContent, extractedContent) {
const progression = [];
const progressionPatterns = [
/(?:beginner|novice|start)[s]?\s*[:\-]?\s*([^.!?]+)/gi,
/(?:intermediate|advanced|expert)[s]?\s*[:\-]?\s*([^.!?]+)/gi,
/(?:before|after)\s+(?:you\s+)?(?:can|should|must)\s+([^.!?]+)/gi,
];
progressionPatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const skill = match[1].trim();
if (skill.length > 10) {
progression.push({
skill,
level: this.determineSkillLevel(match[0]),
confidence: 0.7,
});
}
}
});
// Extract from title for structured content
if (extractedContent.metadata?.title) {
const title = extractedContent.metadata.title;
const levelMatch = title.match(/(beginner|intermediate|advanced|day\s*\d+|lesson\s*\d+|101)/i);
if (levelMatch) {
progression.push({
skill: title,
level: this.determineSkillLevel(levelMatch[1]),
confidence: 0.9,
});
}
}
return progression.slice(0, 5);
}
/**
* Extract text content from various content types
*/
extractTextContent(extractedContent) {
switch (extractedContent.type) {
case 'youtube':
return extractedContent.content?.transcript || extractedContent.content?.description || '';
case 'pdf':
return extractedContent.content?.text || '';
case 'article':
return extractedContent.content?.text || '';
default:
return (
extractedContent.content?.text ||
extractedContent.content?.transcript ||
extractedContent.content?.description ||
''
);
}
}
/**
* Chunk content into manageable pieces
*/
chunkContent(text) {
if (text.length <= this.maxChunkSize * 4) {
// Rough token estimation
return [text];
}
const chunks = [];
const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);
let currentChunk = '';
for (const sentence of sentences) {
const sentenceWithPunctuation = `${sentence.trim()}.`;
if ((currentChunk + sentenceWithPunctuation).length > this.maxChunkSize * 4) {
if (currentChunk) {
chunks.push(currentChunk.trim());
// Add overlap from the end of current chunk
const words = currentChunk.split(' ');
const overlapWords = words.slice(-this.overlapSize / 4); // Rough estimation
currentChunk = `${overlapWords.join(' ')} ${sentenceWithPunctuation}`;
} else {
// Single sentence is too long, split it
chunks.push(sentenceWithPunctuation);
currentChunk = '';
}
} else {
currentChunk += (currentChunk ? ' ' : '') + sentenceWithPunctuation;
}
}
if (currentChunk) {
chunks.push(currentChunk.trim());
}
return chunks;
}
/**
* Summarize using Claude (placeholder - would need actual Claude integration)
*/
async claudeSummarize(chunks, extractedContent, options) {
// This would integrate with Claude API
// For now, throw error to trigger fallback
throw new Error('Claude integration not implemented yet');
}
/**
* Extractive summarization fallback
*/
extractiveSummarize(chunks, extractedContent, options) {
// Simple extractive summarization
const allSentences = chunks
.join(' ')
.split(/[.!?]+/)
.filter(s => s.trim().length > 0);
// Score sentences based on various factors
const scoredSentences = allSentences.map((sentence, index) => {
const score = this.scoreSentence(sentence, extractedContent, options);
return { sentence: `${sentence.trim()}.`, score, index };
});
// Sort by score and select top sentences
scoredSentences.sort((a, b) => b.score - a.score);
const summaryLength = this.getSummaryLength(options.length, allSentences.length);
const selectedSentences = scoredSentences.slice(0, summaryLength);
// Sort selected sentences by original order
selectedSentences.sort((a, b) => a.index - b.index);
return selectedSentences.map(s => s.sentence).join(' ');
}
/**
* Score sentence for extractive summarization
*/
scoreSentence(sentence, extractedContent, options) {
let score = 0;
const words = sentence.toLowerCase().split(/\s+/);
// Length score (prefer medium-length sentences)
const length = words.length;
if (length >= 10 && length <= 30) {
score += 2;
} else if (length >= 5 && length <= 50) {
score += 1;
}
// Title/keyword overlap
const title = extractedContent.metadata?.title?.toLowerCase() || '';
const titleWords = title.split(/\s+/);
const overlap = words.filter(word => titleWords.includes(word)).length;
score += overlap * 2;
// Focus areas
if (options.focusAreas && options.focusAreas.length > 0) {
const focusWords = options.focusAreas.join(' ').toLowerCase().split(/\s+/);
const focusOverlap = words.filter(word => focusWords.includes(word)).length;
score += focusOverlap * 3;
}
// Position score (prefer sentences from beginning and end)
// This would need sentence position information
return score;
}
/**
* Get summary length based on option
*/
getSummaryLength(lengthOption, totalSentences) {
switch (lengthOption) {
case 'short':
return Math.min(3, Math.ceil(totalSentences * 0.1));
case 'long':
return Math.min(10, Math.ceil(totalSentences * 0.3));
case 'medium':
default:
return Math.min(6, Math.ceil(totalSentences * 0.2));
}
}
/**
* Extract key points from summary
*/
extractKeyPoints(summary) {
// Simple key point extraction
const sentences = summary.split(/[.!?]+/).filter(s => s.trim().length > 0);
return sentences.slice(0, 5).map(s => s.trim());
}
/**
* Generate tags from content and summary
*/
generateTags(extractedContent, summary) {
const tags = [];
// Add content type
tags.push(extractedContent.type);
// Add keywords from metadata
if (extractedContent.metadata?.keywords) {
tags.push(...extractedContent.metadata.keywords.slice(0, 5));
}
// Simple keyword extraction from summary
const words = summary.toLowerCase().split(/\s+/);
const commonWords = new Set([
'the',
'a',
'an',
'and',
'or',
'but',
'in',
'on',
'at',
'to',
'for',
'of',
'with',
'by',
'is',
'are',
'was',
'were',
'be',
'been',
'have',
'has',
'had',
'do',
'does',
'did',
'will',
'would',
'could',
'should',
'may',
'might',
'can',
'this',
'that',
'these',
'those',
]);
const wordFreq = {};
words.forEach(word => {
if (word.length > 3 && !commonWords.has(word)) {
wordFreq[word] = (wordFreq[word] || 0) + 1;
}
});
const topWords = Object.entries(wordFreq)
.sort(([, a], [, b]) => b - a)
.slice(0, 5)
.map(([word]) => word);
tags.push(...topWords);
return [...new Set(tags)]; // Remove duplicates
}
/**
* Extract common mistakes from content
*/
extractCommonMistakes(textContent, extractedContent) {
const mistakes = [];
const mistakePatterns = [
/(?:don't|avoid|never|mistake|error|wrong)\s+([^.!?]+)/gi,
/(?:common|typical|frequent)\s+(?:mistake|error|problem)[s]?\s*[:\-]?\s*([^.!?]+)/gi,
/(?:be careful|watch out|make sure)\s+(?:not\s+to\s+)?([^.!?]+)/gi,
];
mistakePatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const mistake = match[1].trim();
if (mistake.length > 10 && mistake.length < 150) {
mistakes.push({
mistake,
type: 'common_error',
confidence: 0.7,
});
}
}
});
return mistakes.slice(0, 5);
}
/**
* Extract prerequisites from content
*/
extractPrerequisites(textContent, extractedContent) {
const prerequisites = [];
const prereqPatterns = [
/(?:before|first|prerequisite|requirement)[s]?\s*[:\-]?\s*([^.!?]+)/gi,
/(?:you need|you should|you must)\s+(?:to\s+)?(?:have|know|understand)\s+([^.!?]+)/gi,
/(?:make sure|ensure)\s+(?:you\s+)?(?:have|know)\s+([^.!?]+)/gi,
];
prereqPatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const prereq = match[1].trim();
if (prereq.length > 5 && prereq.length < 100) {
prerequisites.push({
requirement: prereq,
type: 'prerequisite',
confidence: 0.8,
});
}
}
});
return prerequisites.slice(0, 5);
}
/**
* Extract measurable outcomes from content
*/
extractMeasurableOutcomes(textContent, extractedContent) {
const outcomes = [];
const outcomePatterns = [
/(?:you will|you'll|you can|able to)\s+([^.!?]+)/gi,
/(?:goal|objective|outcome|result)[s]?\s*[:\-]?\s*([^.!?]+)/gi,
/(?:by the end|after this|when you finish)\s+([^.!?]+)/gi,
];
outcomePatterns.forEach(pattern => {
let match;
while ((match = pattern.exec(textContent)) !== null) {
const outcome = match[1].trim();
if (outcome.length > 10 && outcome.length < 150) {
outcomes.push({
outcome,
type: 'learning_outcome',
measurable: this.isMeasurable(outcome),
confidence: 0.7,
});
}
}
});
return outcomes.slice(0, 6);
}
/**
* Create detailed summary from granular analysis
*/
createDetailedSummary(analysis, textContent, extractedContent) {
const parts = [];
// Title and overview
const title = extractedContent.metadata?.title || 'Learning Content';
parts.push(`**${title}**`);
// Actionable steps summary
if (analysis.actionableSteps.length > 0) {
parts.push('\n**Key Actions:**');
analysis.actionableSteps.slice(0, 5).forEach((step, index) => {
parts.push(`${index + 1}. ${step.action}`);
});
}
// Techniques summary
if (analysis.specificTechniques.length > 0) {
parts.push('\n**Techniques Covered:**');
analysis.specificTechniques.slice(0, 3).forEach(tech => {
parts.push(`• ${tech.name}`);
});
}
// Resources summary
if (analysis.toolsAndResources.length > 0) {
parts.push('\n**Resources:**');
analysis.toolsAndResources.slice(0, 3).forEach(resource => {
parts.push(`• ${resource.name}`);
});
}
// Prerequisites
if (analysis.prerequisites.length > 0) {
parts.push('\n**Prerequisites:**');
analysis.prerequisites.slice(0, 2).forEach(prereq => {
parts.push(`• ${prereq.requirement}`);
});
}
return parts.join('\n');
}
/**
* Extract enhanced key points
*/
extractEnhancedKeyPoints(analysis) {
const keyPoints = [];
// Top actionable steps
analysis.actionableSteps.slice(0, 3).forEach(step => {
keyPoints.push(step.action);
});
// Top techniques
analysis.specificTechniques.slice(0, 2).forEach(tech => {
keyPoints.push(`Technique: ${tech.name}`);
});
// Key outcomes
analysis.measurableOutcomes.slice(0, 2).forEach(outcome => {
keyPoints.push(`Outcome: ${outcome.outcome}`);
});
return keyPoints;
}
/**
* Generate enhanced tags
*/
generateEnhancedTags(analysis, extractedContent) {
const tags = new Set();
// Add content type
tags.add(extractedContent.type);
// Add difficulty level
if (analysis.difficulty) {
tags.add(analysis.difficulty);
}
// Add technique tags
analysis.specificTechniques.forEach(tech => {
const words = tech.name.toLowerCase().split(/\s+/);
words.forEach(word => {
if (word.length > 3) tags.add(word);
});
});
// Add skill progression tags
analysis.skillProgression.forEach(skill => {
tags.add(skill.level);
});
// Add from metadata
if (extractedContent.metadata?.keywords) {
extractedContent.metadata.keywords.slice(0, 5).forEach(keyword => {
tags.add(keyword.toLowerCase());
});
}
return Array.from(tags).slice(0, 10);
}
/**
* Assess content difficulty
*/
assessDifficulty(analysis, extractedContent) {
let difficultyScore = 5; // Default medium
// Check for beginner indicators
const title = extractedContent.metadata?.title?.toLowerCase() || '';
if (title.includes('101') || title.includes('beginner') || title.includes('day 1')) {
difficultyScore = 2;
} else if (title.includes('advanced') || title.includes('expert')) {
difficultyScore = 8;
} else if (title.includes('intermediate')) {
difficultyScore = 6;
}
// Adjust based on prerequisites
if (analysis.prerequisites.length > 3) {
difficultyScore += 1;
}
// Adjust based on technique complexity
if (analysis.specificTechniques.length > 5) {
difficultyScore += 1;
}
const levels = ['beginner', 'beginner', 'beginner', 'intermediate', 'intermediate', 'intermediate', 'advanced', 'advanced', 'expert', 'expert'];
return levels[Math.min(9, Math.max(0, difficultyScore - 1))];
}
/**
* Estimate duration for content
*/
estimateDuration(analysis, extractedContent) {
let duration = 30; // Default 30 minutes
// Use actual video duration if available
if (extractedContent.metadata?.duration) {
duration = Math.ceil(extractedContent.metadata.duration / 60); // Convert to minutes
}
// Adjust based on actionable steps
duration += analysis.actionableSteps.length * 5; // 5 minutes per step
// Adjust based on techniques
duration += analysis.specificTechniques.length * 10; // 10 minutes per technique
return `${duration} minutes`;
}
/**
* Helper: Determine skill level from text
*/
determineSkillLevel(text) {
const lowerText = text.toLowerCase();
if (lowerText.includes('beginner') || lowerText.includes('101') || lowerText.includes('day 1')) {
return 'beginner';
} else if (lowerText.includes('intermediate')) {
return 'intermediate';
} else if (lowerText.includes('advanced') || lowerText.includes('expert')) {
return 'advanced';
}
return 'intermediate'; // Default
}
/**
* Helper: Check if outcome is measurable
*/
isMeasurable(outcome) {
const measurableWords = ['play', 'perform', 'complete', 'achieve', 'master', 'learn', 'practice'];
return measurableWords.some(word => outcome.toLowerCase().includes(word));
}
/**
* Extract from metadata when content is limited
*/
extractFromMetadata(extractedContent, options) {
const metadata = extractedContent.metadata || {};
return {
sourceId: extractedContent.sourceId,
sourceType: extractedContent.type,
sourceUrl: extractedContent.url,
summary: `${metadata.title || 'Unknown'}: ${metadata.description || 'No description available'}`,
summaryMethod: 'metadata_extraction',
actionableSteps: [{ action: `Study ${metadata.title || 'this content'}`, type: 'general', confidence: 0.5 }],
specificTechniques: [],
toolsAndResources: [],
timeBasedBreakdown: [],
skillProgression: [],
commonMistakes: [],
prerequisites: [],
measurableOutcomes: [],
metadata: {
title: metadata.title || 'Unknown',
author: metadata.author || null,
originalLength: 0,
summaryLength: (metadata.title || '').length + (metadata.description || '').length,
compressionRatio: 1,
granularityScore: 2,
actionabilityScore: 3,
relevanceScore: 5,
created: new Date().toISOString(),
},
keyPoints: [metadata.title || 'Unknown content'],
tags: [extractedContent.type],
difficulty: 'intermediate',
estimatedDuration: '30 minutes',
};
}
/**
* Calculate relevance score for content
*/
calculateRelevanceScore(extractedContent, options) {
let score = 5; // Base score
// Content quality indicators
const textLength = this.extractTextContent(extractedContent).length;
if (textLength > 1000) score += 1;
if (textLength > 5000) score += 1;
// Metadata completeness
if (extractedContent.metadata?.title) score += 1;
if (extractedContent.metadata?.author) score += 0.5;
if (extractedContent.metadata?.description) score += 0.5;
// Focus area relevance
if (options.focusAreas && options.focusAreas.length > 0) {
const content = this.extractTextContent(extractedContent).toLowerCase();
const focusWords = options.focusAreas.join(' ').toLowerCase();
if (content.includes(focusWords)) {
score += 2;
}
}
return Math.min(10, Math.max(1, score));
}
}