LearnMCP Server

Overview Schema Related Servers Score Discussions

LearnMCP
modules

transcript-miner.js•22.3 kB

/** * Transcript Miner * Hyper-granular extraction from video transcripts for actionable content mining */ import { createLearnLogger } from './utils/custom-logger.js'; export class TranscriptMiner { constructor(options = {}) { this.logger = createLearnLogger('TranscriptMiner'); // Mining configuration this.miningSensitivity = options.miningSensitivity || 'high'; // low, medium, high, ultra this.actionThreshold = options.actionThreshold || 0.6; // Confidence threshold for actions this.timeSegmentSize = options.timeSegmentSize || 30; // seconds per segment this.contextWindow = options.contextWindow || 3; // sentences before/after for context } /** * Mine transcript for hyper-granular actionable content */ async mineTranscript(transcriptData, videoMetadata = {}) { try { this.logger.info('Starting transcript mining', { transcriptLength: transcriptData.length, sensitivity: this.miningSensitivity, videoTitle: videoMetadata.title }); // Parse transcript into structured segments const segments = this.parseTranscriptSegments(transcriptData, videoMetadata); // Extract different types of actionable content const miningResults = { // Core extractions stepByStepInstructions: this.extractStepByStepInstructions(segments), specificTechniques: this.extractSpecificTechniques(segments), toolsAndEquipment: this.extractToolsAndEquipment(segments), commonMistakes: this.extractCommonMistakes(segments), troubleshooting: this.extractTroubleshooting(segments), // Advanced extractions timeBasedActions: this.extractTimeBasedActions(segments), prerequisiteSkills: this.extractPrerequisiteSkills(segments), progressionMarkers: this.extractProgressionMarkers(segments), practiceExercises: this.extractPracticeExercises(segments), assessmentCriteria: this.extractAssessmentCriteria(segments), // Context and structure conceptualFramework: this.extractConceptualFramework(segments), learningObjectives: this.extractLearningObjectives(segments), keyTerminology: this.extractKeyTerminology(segments), // Metadata miningMetadata: { totalSegments: segments.length, processingTime: Date.now(), confidenceScore: this.calculateOverallConfidence(segments), actionabilityScore: 0, // Will be calculated granularityLevel: this.assessGranularityLevel(segments) } }; // Calculate actionability score miningResults.miningMetadata.actionabilityScore = this.calculateActionabilityScore(miningResults); this.logger.info('Transcript mining completed', { stepByStepCount: miningResults.stepByStepInstructions.length, techniquesCount: miningResults.specificTechniques.length, timeBasedActionsCount: miningResults.timeBasedActions.length, confidenceScore: miningResults.miningMetadata.confidenceScore, actionabilityScore: miningResults.miningMetadata.actionabilityScore }); return miningResults; } catch (error) { this.logger.error('Transcript mining failed', { error: error.message, stack: error.stack }); throw error; } } /** * Parse transcript into structured segments with timestamps */ parseTranscriptSegments(transcriptData, videoMetadata) { const segments = []; // Handle different transcript formats let transcriptText = ''; let timestampedSegments = []; if (Array.isArray(transcriptData)) { // YouTube transcript format with timestamps timestampedSegments = transcriptData.map(item => ({ start: item.start || 0, duration: item.duration || 0, text: item.text || '', offset: item.offset || 0 })); transcriptText = transcriptData.map(item => item.text).join(' '); } else if (typeof transcriptData === 'string') { // Plain text transcript transcriptText = transcriptData; // Create artificial segments const sentences = this.splitIntoSentences(transcriptText); const avgDuration = (videoMetadata.duration || 600) / sentences.length; timestampedSegments = sentences.map((sentence, index) => ({ start: index * avgDuration, duration: avgDuration, text: sentence, offset: index })); } // Process each segment timestampedSegments.forEach((segment, index) => { const processedSegment = { id: `segment_${index}`, startTime: segment.start, endTime: segment.start + (segment.duration || 0), text: segment.text.trim(), sentences: this.splitIntoSentences(segment.text), // Analysis flags containsAction: this.containsActionWords(segment.text), containsTechnique: this.containsTechniqueWords(segment.text), containsInstruction: this.containsInstructionWords(segment.text), containsWarning: this.containsWarningWords(segment.text), containsEquipment: this.containsEquipmentWords(segment.text), // Context previousSegment: index > 0 ? timestampedSegments[index - 1] : null, nextSegment: index < timestampedSegments.length - 1 ? timestampedSegments[index + 1] : null, // Metadata wordCount: segment.text.split(/\s+/).length, confidence: this.calculateSegmentConfidence(segment.text) }; segments.push(processedSegment); }); return segments; } /** * Extract step-by-step instructions from transcript */ extractStepByStepInstructions(segments) { const instructions = []; const instructionPatterns = [ /(?:step|stage|phase)\s*(?:one|two|three|four|five|\d+)[:\-\s]([^.!?]+)/gi, /(?:first|second|third|fourth|fifth|next|then|now|finally)[,\s]+([^.!?]+)/gi, /(?:start by|begin with|make sure to|remember to|don't forget to)\s+([^.!?]+)/gi, /(?:you need to|you should|you must|you have to)\s+([^.!?]+)/gi, /(?:let's|let me|i'll|we'll)\s+([^.!?]+)/gi ]; segments.forEach(segment => { if (segment.containsInstruction || segment.containsAction) { instructionPatterns.forEach(pattern => { let match; while ((match = pattern.exec(segment.text)) !== null) { const instruction = match[1].trim(); if (instruction.length > 10 && instruction.length < 200) { instructions.push({ instruction, timestamp: this.formatTimestamp(segment.startTime), startTime: segment.startTime, endTime: segment.endTime, context: this.getSegmentContext(segment, segments), type: this.classifyInstructionType(instruction), confidence: segment.confidence * 0.9, segmentId: segment.id }); } } }); } }); return this.deduplicateAndRank(instructions, 'instruction'); } /** * Extract specific techniques mentioned in transcript */ extractSpecificTechniques(segments) { const techniques = []; const techniquePatterns = [ /(?:technique|method|approach|way to)\s+([^.!?]+)/gi, /(?:this is called|known as|referred to as)\s+([^.!?]+)/gi, /(?:use the|apply the|employ the)\s+([^.!?]+(?:technique|method|approach))/gi, /([a-z\s]+(?:grip|hold|position|stance|posture|fingering|picking))/gi, /(?:the\s+)?([a-z\s]+(?:scale|chord|progression|pattern))/gi ]; segments.forEach(segment => { if (segment.containsTechnique) { techniquePatterns.forEach(pattern => { let match; while ((match = pattern.exec(segment.text)) !== null) { const technique = match[1].trim(); if (technique.length > 3 && technique.length < 100) { techniques.push({ technique, timestamp: this.formatTimestamp(segment.startTime), startTime: segment.startTime, endTime: segment.endTime, context: this.getSegmentContext(segment, segments), category: this.categorizeTechnique(technique), confidence: segment.confidence * 0.8, segmentId: segment.id }); } } }); } }); return this.deduplicateAndRank(techniques, 'technique'); } /** * Extract tools and equipment mentions */ extractToolsAndEquipment(segments) { const equipment = []; const equipmentPatterns = [ /(?:use|need|require|get|buy|have)\s+(?:a|an|the|some)?\s*([^.!?]+(?:guitar|pick|tuner|metronome|amp|pedal|string|capo))/gi, /(?:with|using|on)\s+(?:a|an|the|your)?\s*([^.!?]+(?:guitar|instrument|device|tool))/gi, /([a-z\s]+(?:brand|model|type))\s+(?:guitar|amp|pedal)/gi ]; segments.forEach(segment => { if (segment.containsEquipment) { equipmentPatterns.forEach(pattern => { let match; while ((match = pattern.exec(segment.text)) !== null) { const tool = match[1].trim(); if (tool.length > 3 && tool.length < 80) { equipment.push({ tool, timestamp: this.formatTimestamp(segment.startTime), startTime: segment.startTime, endTime: segment.endTime, context: this.getSegmentContext(segment, segments), category: this.categorizeEquipment(tool), confidence: segment.confidence * 0.85, segmentId: segment.id }); } } }); } }); return this.deduplicateAndRank(equipment, 'tool'); } /** * Extract common mistakes and warnings */ extractCommonMistakes(segments) { const mistakes = []; const mistakePatterns = [ /(?:don't|avoid|never|mistake|error|wrong|incorrect)\s+([^.!?]+)/gi, /(?:common|typical|frequent)\s+(?:mistake|error|problem)[s]?\s*[:\-]?\s*([^.!?]+)/gi, /(?:be careful|watch out|make sure)\s+(?:not\s+to\s+)?([^.!?]+)/gi, /(?:this is wrong|that's incorrect|not like this)\s*[:\-]?\s*([^.!?]*)/gi ]; segments.forEach(segment => { if (segment.containsWarning) { mistakePatterns.forEach(pattern => { let match; while ((match = pattern.exec(segment.text)) !== null) { const mistake = match[1].trim(); if (mistake.length > 5 && mistake.length < 150) { mistakes.push({ mistake, timestamp: this.formatTimestamp(segment.startTime), startTime: segment.startTime, endTime: segment.endTime, context: this.getSegmentContext(segment, segments), severity: this.assessMistakeSeverity(mistake), confidence: segment.confidence * 0.9, segmentId: segment.id }); } } }); } }); return this.deduplicateAndRank(mistakes, 'mistake'); } /** * Extract time-based actions with precise timestamps */ extractTimeBasedActions(segments) { const timeBasedActions = []; segments.forEach(segment => { if (segment.containsAction || segment.containsInstruction) { // Extract the main action from this segment const actions = this.extractActionsFromText(segment.text); actions.forEach(action => { timeBasedActions.push({ action, timestamp: this.formatTimestamp(segment.startTime), startTime: segment.startTime, endTime: segment.endTime, duration: segment.endTime - segment.startTime, context: this.getSegmentContext(segment, segments), actionType: this.classifyActionType(action), confidence: segment.confidence, segmentId: segment.id }); }); } }); return timeBasedActions.slice(0, 20); // Limit to most relevant } /** * Extract practice exercises from transcript */ extractPracticeExercises(segments) { const exercises = []; const exercisePatterns = [ /(?:practice|exercise|drill|work on)\s+([^.!?]+)/gi, /(?:try|attempt|do)\s+(?:this|these)\s+([^.!?]+)/gi, /(?:repeat|do again|practice)\s+([^.!?]+)/gi, /(?:exercise|drill)\s*\d*[:\-]?\s*([^.!?]+)/gi ]; segments.forEach(segment => { exercisePatterns.forEach(pattern => { let match; while ((match = pattern.exec(segment.text)) !== null) { const exercise = match[1].trim(); if (exercise.length > 10 && exercise.length < 150) { exercises.push({ exercise, timestamp: this.formatTimestamp(segment.startTime), startTime: segment.startTime, endTime: segment.endTime, difficulty: this.assessExerciseDifficulty(exercise), estimatedTime: this.estimateExerciseTime(exercise), confidence: segment.confidence * 0.8, segmentId: segment.id }); } } }); }); return this.deduplicateAndRank(exercises, 'exercise'); } /** * Extract key terminology and definitions */ extractKeyTerminology(segments) { const terminology = []; const termPatterns = [ /(?:this is called|known as|referred to as|term|definition)\s+([^.!?]+)/gi, /([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|means|refers to)/gi, /(?:the\s+)?([a-z]+(?:\s+[a-z]+)*)\s+(?:technique|method|approach|scale|chord)/gi ]; segments.forEach(segment => { termPatterns.forEach(pattern => { let match; while ((match = pattern.exec(segment.text)) !== null) { const term = match[1].trim(); if (term.length > 2 && term.length < 50) { terminology.push({ term, timestamp: this.formatTimestamp(segment.startTime), context: segment.text, definition: this.extractDefinition(term, segment.text), confidence: segment.confidence * 0.7, segmentId: segment.id }); } } }); }); return this.deduplicateAndRank(terminology, 'term'); } // Helper methods for content analysis /** * Split text into sentences */ splitIntoSentences(text) { return text.split(/[.!?]+/).filter(s => s.trim().length > 0); } /** * Check if text contains action words */ containsActionWords(text) { const actionWords = ['do', 'make', 'play', 'press', 'hold', 'move', 'place', 'put', 'take', 'use', 'try', 'practice', 'start', 'begin', 'stop', 'end']; return actionWords.some(word => text.toLowerCase().includes(word)); } /** * Check if text contains technique words */ containsTechniqueWords(text) { const techniqueWords = ['technique', 'method', 'way', 'approach', 'style', 'grip', 'position', 'fingering', 'picking', 'strumming']; return techniqueWords.some(word => text.toLowerCase().includes(word)); } /** * Check if text contains instruction words */ containsInstructionWords(text) { const instructionWords = ['step', 'first', 'second', 'next', 'then', 'now', 'should', 'need to', 'have to', 'must']; return instructionWords.some(word => text.toLowerCase().includes(word)); } /** * Check if text contains warning words */ containsWarningWords(text) { const warningWords = ['don\'t', 'avoid', 'never', 'mistake', 'error', 'wrong', 'careful', 'watch out']; return warningWords.some(word => text.toLowerCase().includes(word)); } /** * Check if text contains equipment words */ containsEquipmentWords(text) { const equipmentWords = ['guitar', 'pick', 'tuner', 'metronome', 'amp', 'pedal', 'string', 'capo', 'instrument']; return equipmentWords.some(word => text.toLowerCase().includes(word)); } /** * Calculate segment confidence based on content quality */ calculateSegmentConfidence(text) { let confidence = 0.5; // Base confidence // Length factor const wordCount = text.split(/\s+/).length; if (wordCount > 5 && wordCount < 50) confidence += 0.2; // Content quality indicators if (this.containsActionWords(text)) confidence += 0.1; if (this.containsInstructionWords(text)) confidence += 0.1; if (this.containsTechniqueWords(text)) confidence += 0.1; return Math.min(1.0, confidence); } /** * Get context around a segment */ getSegmentContext(segment, allSegments) { const index = allSegments.findIndex(s => s.id === segment.id); const context = []; // Previous context for (let i = Math.max(0, index - this.contextWindow); i < index; i++) { context.push(allSegments[i].text); } // Current segment context.push(`**${segment.text}**`); // Next context for (let i = index + 1; i < Math.min(allSegments.length, index + this.contextWindow + 1); i++) { context.push(allSegments[i].text); } return context.join(' '); } /** * Format timestamp for display */ formatTimestamp(seconds) { const mins = Math.floor(seconds / 60); const secs = Math.floor(seconds % 60); return `${mins}:${secs.toString().padStart(2, '0')}`; } /** * Classify instruction type */ classifyInstructionType(instruction) { const lower = instruction.toLowerCase(); if (lower.includes('practice') || lower.includes('exercise')) return 'practice'; if (lower.includes('position') || lower.includes('hold')) return 'positioning'; if (lower.includes('play') || lower.includes('strum')) return 'technique'; if (lower.includes('tune') || lower.includes('adjust')) return 'setup'; return 'general'; } /** * Categorize technique */ categorizeTechnique(technique) { const lower = technique.toLowerCase(); if (lower.includes('finger') || lower.includes('fret')) return 'fingering'; if (lower.includes('pick') || lower.includes('strum')) return 'picking'; if (lower.includes('chord') || lower.includes('progression')) return 'harmony'; if (lower.includes('scale') || lower.includes('note')) return 'melody'; return 'general'; } /** * Categorize equipment */ categorizeEquipment(equipment) { const lower = equipment.toLowerCase(); if (lower.includes('guitar')) return 'instrument'; if (lower.includes('pick') || lower.includes('capo')) return 'accessory'; if (lower.includes('amp') || lower.includes('pedal')) return 'amplification'; if (lower.includes('tuner') || lower.includes('metronome')) return 'tool'; return 'general'; } /** * Extract actions from text */ extractActionsFromText(text) { const actions = []; const actionPatterns = [ /(?:you\s+)?(?:should|need to|have to|must|can)\s+([^.!?]+)/gi, /(?:let's|let me)\s+([^.!?]+)/gi, /(?:try to|attempt to)\s+([^.!?]+)/gi ]; actionPatterns.forEach(pattern => { let match; while ((match = pattern.exec(text)) !== null) { const action = match[1].trim(); if (action.length > 5 && action.length < 100) { actions.push(action); } } }); return actions; } /** * Classify action type */ classifyActionType(action) { const lower = action.toLowerCase(); if (lower.includes('practice') || lower.includes('repeat')) return 'practice'; if (lower.includes('listen') || lower.includes('watch')) return 'observation'; if (lower.includes('play') || lower.includes('perform')) return 'performance'; if (lower.includes('adjust') || lower.includes('tune')) return 'setup'; return 'instruction'; } /** * Deduplicate and rank results */ deduplicateAndRank(items, keyField) { // Remove duplicates based on similarity const unique = []; const seen = new Set(); items.forEach(item => { const key = item[keyField].toLowerCase().trim(); if (!seen.has(key)) { seen.add(key); unique.push(item); } }); // Sort by confidence and relevance return unique .sort((a, b) => (b.confidence || 0) - (a.confidence || 0)) .slice(0, 15); // Limit results } /** * Calculate overall confidence score */ calculateOverallConfidence(segments) { if (segments.length === 0) return 0; const avgConfidence = segments.reduce((sum, seg) => sum + seg.confidence, 0) / segments.length; return Math.round(avgConfidence * 10) / 10; } /** * Calculate actionability score */ calculateActionabilityScore(miningResults) { let score = 0; score += miningResults.stepByStepInstructions.length * 2; score += miningResults.timeBasedActions.length * 1.5; score += miningResults.practiceExercises.length * 1.5; score += miningResults.specificTechniques.length * 1; score += miningResults.toolsAndEquipment.length * 0.5; return Math.min(10, Math.max(0, score)); } /** * Assess granularity level */ assessGranularityLevel(segments) { const totalActions = segments.filter(s => s.containsAction).length; const totalInstructions = segments.filter(s => s.containsInstruction).length; const ratio = (totalActions + totalInstructions) / segments.length; if (ratio > 0.7) return 'ultra'; if (ratio > 0.5) return 'high'; if (ratio > 0.3) return 'medium'; return 'low'; } // Placeholder methods for future enhancement extractTroubleshooting(segments) { return []; } extractPrerequisiteSkills(segments) { return []; } extractProgressionMarkers(segments) { return []; } extractAssessmentCriteria(segments) { return []; } extractConceptualFramework(segments) { return []; } extractLearningObjectives(segments) { return []; } assessMistakeSeverity(mistake) { return 'medium'; } assessExerciseDifficulty(exercise) { return 'medium'; } estimateExerciseTime(exercise) { return '5-10 minutes'; } extractDefinition(term, context) { return 'Definition not available'; } }

Loading blob content...

Latest Blog Posts

Don't Use Large Strings as Cache Keys
By punkpeye on January 11, 2026.
markdown
node-js
cache
What are Claude Skills?
By punkpeye on January 10, 2026.
mcp
skills
How to Test MCP Streamable HTTP Endpoints Using cURL
By punkpeye on January 2, 2026.
tutorial
bash

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/BretMeraki/LearnMCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

transcript-miner.js•22.3 kB