conversation-detector.ts•27.6 kB
/**
* Advanced conversation detection and context analysis for SRT files
* Implements multiple algorithms for robust conversation boundary detection
*/
import { SRTSubtitle, SRTChunk, ConversationContext } from '../types/srt.js';
import { extractTextContent } from '../utils/style-tags.js';
/**
* Advanced conversation detection with multiple algorithms
* Returns only the first chunk with instructions to call subsequent chunks
*/
export function detectConversations(subtitles: SRTSubtitle[]): SRTChunk[] {
// First pass: Basic boundary detection
const initialChunks = detectBasicBoundaries(subtitles);
// Second pass: Semantic analysis and topic modeling
const semanticChunks = applySemanticAnalysis(initialChunks);
// Third pass: Speaker diarization and context-aware merging
const finalChunks = applySpeakerDiarization(semanticChunks);
// Fourth pass: Optimize chunk sizes and merge similar contexts
const allChunks = optimizeChunkSizes(finalChunks);
// Return only the first chunk with instructions for subsequent chunks
return [createFirstChunkWithInstructions(allChunks)];
}
/**
* Create first chunk with instructions to call subsequent chunks
*/
function createFirstChunkWithInstructions(allChunks: SRTChunk[]): SRTChunk {
if (allChunks.length === 0) {
throw new Error('No chunks detected');
}
const firstChunk = allChunks[0];
// Create instruction subtitle for subsequent chunks
const instructionText = `[CHUNK 1 of ${allChunks.length}] ${firstChunk.subtitles.map(s => s.text).join(' ')}...
[INSTRUCTION] To get the next chunk, call: getNextChunk(2)`;
// Create a modified first chunk with instructions
return {
id: firstChunk.id,
startIndex: firstChunk.startIndex,
endIndex: firstChunk.endIndex,
subtitles: firstChunk.subtitles,
context: {
...firstChunk.context,
conversationId: firstChunk.context?.conversationId || 'chunk-1',
totalChunks: allChunks.length,
currentChunk: 1,
hasMoreChunks: allChunks.length > 1,
instructionText: `To get chunk 2 of ${allChunks.length}, call: getNextChunk(2)`
}
};
}
/**
* Get specific chunk by index (for subsequent chunk retrieval)
*/
export function getNextChunk(chunkIndex: number, subtitles: SRTSubtitle[]): SRTChunk | null {
const allChunks = detectAllConversations(subtitles);
if (chunkIndex < 1 || chunkIndex > allChunks.length) {
return null;
}
const chunk = allChunks[chunkIndex - 1];
// Add instruction for next chunk if available
if (chunkIndex < allChunks.length) {
chunk.context = {
...chunk.context,
conversationId: chunk.context?.conversationId || `chunk-${chunkIndex}`,
totalChunks: allChunks.length,
currentChunk: chunkIndex,
hasMoreChunks: true,
instructionText: `To get chunk ${chunkIndex + 1} of ${allChunks.length}, call: getNextChunk(${chunkIndex + 1})`
};
} else {
chunk.context = {
...chunk.context,
conversationId: chunk.context?.conversationId || `chunk-${chunkIndex}`,
totalChunks: allChunks.length,
currentChunk: chunkIndex,
hasMoreChunks: false,
instructionText: `This is the last chunk (${chunkIndex} of ${allChunks.length})`
};
}
return chunk;
}
/**
* Internal function to detect all conversations (used by getNextChunk)
*/
function detectAllConversations(subtitles: SRTSubtitle[]): SRTChunk[] {
// First pass: Basic boundary detection
const initialChunks = detectBasicBoundaries(subtitles);
// Second pass: Semantic analysis and topic modeling
const semanticChunks = applySemanticAnalysis(initialChunks);
// Third pass: Speaker diarization and context-aware merging
const finalChunks = applySpeakerDiarization(semanticChunks);
// Fourth pass: Optimize chunk sizes and merge similar contexts
return optimizeChunkSizes(finalChunks);
}
/**
* Basic boundary detection using timing and speaker changes
*/
function detectBasicBoundaries(subtitles: SRTSubtitle[]): SRTChunk[] {
const chunks: SRTChunk[] = [];
let currentChunk: SRTSubtitle[] = [];
let currentContext: ConversationContext | undefined;
for (let i = 0; i < subtitles.length; i++) {
const subtitle = subtitles[i];
const nextSubtitle = subtitles[i + 1];
// Multi-factor boundary detection
const boundaryScore = calculateBoundaryScore(subtitle, nextSubtitle, currentContext);
if (boundaryScore > 0.7 && currentChunk.length > 0) {
chunks.push(createChunk(currentChunk, chunks.length));
currentChunk = [subtitle];
currentContext = analyzeConversationContext(subtitle, nextSubtitle);
} else {
currentChunk.push(subtitle);
if (!currentContext) {
currentContext = analyzeConversationContext(subtitle, nextSubtitle);
}
}
}
if (currentChunk.length > 0) {
chunks.push(createChunk(currentChunk, chunks.length));
}
return chunks;
}
/**
* Calculate comprehensive boundary score using multiple factors
*/
function calculateBoundaryScore(
subtitle: SRTSubtitle,
nextSubtitle: SRTSubtitle | undefined,
currentContext?: ConversationContext
): number {
let score = 0;
// Speaker change detection (weight: 0.4)
const speakerScore = calculateSpeakerChangeScore(subtitle, nextSubtitle);
score += speakerScore * 0.4;
// Timing gap analysis (weight: 0.3)
const timingScore = calculateTimingScore(subtitle, nextSubtitle);
score += timingScore * 0.3;
// Semantic similarity analysis (weight: 0.2)
const semanticScore = calculateSemanticScore(subtitle, nextSubtitle, currentContext);
score += semanticScore * 0.2;
// Topic change detection (weight: 0.1)
const topicScore = calculateTopicChangeScore(subtitle, currentContext);
score += topicScore * 0.1;
return Math.min(score, 1.0);
}
/**
* Calculate speaker change score
*/
function calculateSpeakerChangeScore(
subtitle: SRTSubtitle,
nextSubtitle: SRTSubtitle | undefined
): number {
if (!nextSubtitle) return 0;
const currentSpeaker = detectSpeaker(subtitle);
const nextSpeaker = detectSpeaker(nextSubtitle);
if (!currentSpeaker || !nextSpeaker) return 0;
// Exact speaker change
if (currentSpeaker !== nextSpeaker) {
return 1.0;
}
// Speaker confidence analysis
const currentConfidence = calculateSpeakerConfidence(subtitle);
const nextConfidence = calculateSpeakerConfidence(nextSubtitle);
// If confidence drops significantly, it might indicate a speaker change
if (nextConfidence < currentConfidence * 0.5) {
return 0.6;
}
return 0;
}
/**
* Calculate timing-based boundary score
*/
function calculateTimingScore(
subtitle: SRTSubtitle,
nextSubtitle: SRTSubtitle | undefined
): number {
if (!nextSubtitle) return 0;
const timeGap = calculateTimeGap(subtitle, nextSubtitle);
// Normalize gap to 0-1 score
if (timeGap > 10000) return 1.0; // 10+ seconds
if (timeGap > 5000) return 0.8; // 5-10 seconds
if (timeGap > 2000) return 0.5; // 2-5 seconds
if (timeGap > 1000) return 0.2; // 1-2 seconds
return 0;
}
/**
* Calculate semantic similarity score
*/
function calculateSemanticScore(
subtitle: SRTSubtitle,
nextSubtitle: SRTSubtitle | undefined,
currentContext?: ConversationContext
): number {
if (!nextSubtitle) return 0;
const currentText = extractTextContent(subtitle.text);
const nextText = extractTextContent(nextSubtitle.text);
// Calculate semantic similarity using multiple methods
const cosineSimilarity = calculateCosineSimilarity(currentText, nextText);
const jaccardSimilarity = calculateJaccardSimilarity(currentText, nextText);
const levenshteinSimilarity = calculateLevenshteinSimilarity(currentText, nextText);
// Weighted combination
const semanticScore = (cosineSimilarity * 0.5) + (jaccardSimilarity * 0.3) + (levenshteinSimilarity * 0.2);
// Lower similarity indicates higher boundary probability
return 1 - semanticScore;
}
/**
* Calculate topic change score
*/
function calculateTopicChangeScore(
subtitle: SRTSubtitle,
currentContext?: ConversationContext
): number {
if (!currentContext) return 0;
const currentKeywords = extractContextKeywords(subtitle.text);
const previousKeywords = currentContext.previousContext || '';
if (!previousKeywords) return 0;
// Enhanced topic change detection
const keywordOverlap = calculateKeywordOverlap(currentKeywords, previousKeywords);
const sentimentChange = calculateSentimentChange(subtitle.text, currentContext);
// Combine keyword and sentiment analysis
return (1 - keywordOverlap) * 0.7 + sentimentChange * 0.3;
}
/**
* Enhanced speaker detection with confidence scoring
*/
function detectSpeaker(subtitle: SRTSubtitle): string | null {
if (!subtitle.text) return null;
const text = extractTextContent(subtitle.text);
// Enhanced speaker patterns with confidence scoring
const speakerPatterns = [
{ pattern: /^([A-Z][a-z]+):\s*(.+)$/, confidence: 1.0 }, // "Speaker: text"
{ pattern: /^([A-Z][A-Z\s]+):\s*(.+)$/, confidence: 0.9 }, // "SPEAKER NAME: text"
{ pattern: /^([A-Z][a-z]+)\s*-\s*(.+)$/, confidence: 0.8 }, // "Speaker - text"
{ pattern: /^([A-Z][a-z]+)\s*:\s*(.+)$/, confidence: 0.7 }, // "Speaker: text" (loose)
{ pattern: /^([A-Z][a-z]+\s+[A-Z][a-z]+):\s*(.+)$/, confidence: 0.9 }, // "First Last: text"
{ pattern: /^([A-Z][a-z]+)\s*\(([^)]+)\):\s*(.+)$/, confidence: 0.6 }, // "Speaker (role): text"
];
let bestMatch = null;
let bestConfidence = 0;
for (const { pattern, confidence } of speakerPatterns) {
const match = text.match(pattern);
if (match && confidence > bestConfidence) {
bestMatch = match[1].trim();
bestConfidence = confidence;
}
}
// If no explicit speaker pattern found, try to detect from content
if (!bestMatch) {
// Look for name mentions in first person context
const namePatterns = [
/my name is ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i,
/i am ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i,
/i'm ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i,
/this is ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i,
/call me ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i,
];
for (const pattern of namePatterns) {
const match = text.match(pattern);
if (match) {
return match[1].trim();
}
}
// Look for first-person indicators to suggest a narrator/speaker
const firstPersonPatterns = [
/^i\s+/i, // "I am", "I have", etc.
/^my\s+/i, // "My name", "My life", etc.
/^i'm\s+/i, // "I'm doing", etc.
];
for (const pattern of firstPersonPatterns) {
if (pattern.test(text)) {
return 'Narrator'; // Generic first-person speaker
}
}
}
return bestMatch;
}
/**
* Calculate speaker detection confidence
*/
function calculateSpeakerConfidence(subtitle: SRTSubtitle): number {
const text = extractTextContent(subtitle.text);
const speakerPatterns = [
{ pattern: /^([A-Z][a-z]+):\s*(.+)$/, confidence: 1.0 },
{ pattern: /^([A-Z][A-Z\s]+):\s*(.+)$/, confidence: 0.9 },
{ pattern: /^([A-Z][a-z]+)\s*-\s*(.+)$/, confidence: 0.8 },
{ pattern: /^([A-Z][a-z]+)\s*:\s*(.+)$/, confidence: 0.7 },
{ pattern: /^([A-Z][a-z]+\s+[A-Z][a-z]+):\s*(.+)$/, confidence: 0.9 },
{ pattern: /^([A-Z][a-z]+)\s*\(([^)]+)\):\s*(.+)$/, confidence: 0.6 },
];
for (const { pattern, confidence } of speakerPatterns) {
if (text.match(pattern)) {
return confidence;
}
}
return 0;
}
/**
* Calculate time gap between subtitles
*/
function calculateTimeGap(current: SRTSubtitle, next: SRTSubtitle): number {
const currentEndMs = (current.endTime.hours * 3600 + current.endTime.minutes * 60 + current.endTime.seconds) * 1000 + current.endTime.milliseconds;
const nextStartMs = (next.startTime.hours * 3600 + next.startTime.minutes * 60 + next.startTime.seconds) * 1000 + next.startTime.milliseconds;
return nextStartMs - currentEndMs;
}
/**
* Calculate cosine similarity between two texts
*/
function calculateCosineSimilarity(text1: string, text2: string): number {
const words1 = text1.toLowerCase().split(/\s+/).filter(word => word.length > 2);
const words2 = text2.toLowerCase().split(/\s+/).filter(word => word.length > 2);
const allWords = [...new Set([...words1, ...words2])];
const vector1 = allWords.map(word => words1.filter(w => w === word).length);
const vector2 = allWords.map(word => words2.filter(w => w === word).length);
const dotProduct = vector1.reduce((sum, val, i) => sum + val * vector2[i], 0);
const magnitude1 = Math.sqrt(vector1.reduce((sum, val) => sum + val * val, 0));
const magnitude2 = Math.sqrt(vector2.reduce((sum, val) => sum + val * val, 0));
if (magnitude1 === 0 || magnitude2 === 0) return 0;
return dotProduct / (magnitude1 * magnitude2);
}
/**
* Calculate Jaccard similarity between two texts
*/
function calculateJaccardSimilarity(text1: string, text2: string): number {
const words1 = new Set(text1.toLowerCase().split(/\s+/).filter(word => word.length > 2));
const words2 = new Set(text2.toLowerCase().split(/\s+/).filter(word => word.length > 2));
const intersection = new Set([...words1].filter(word => words2.has(word)));
const union = new Set([...words1, ...words2]);
return intersection.size / union.size;
}
/**
* Calculate Levenshtein similarity between two texts
*/
function calculateLevenshteinSimilarity(text1: string, text2: string): number {
const distance = levenshteinDistance(text1.toLowerCase(), text2.toLowerCase());
const maxLength = Math.max(text1.length, text2.length);
if (maxLength === 0) return 1;
return 1 - (distance / maxLength);
}
/**
* Calculate Levenshtein distance between two strings
*/
function levenshteinDistance(str1: string, str2: string): number {
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;
for (let j = 1; j <= str2.length; j++) {
for (let i = 1; i <= str1.length; i++) {
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
matrix[j][i] = Math.min(
matrix[j][i - 1] + 1,
matrix[j - 1][i] + 1,
matrix[j - 1][i - 1] + indicator
);
}
}
return matrix[str2.length][str1.length];
}
/**
* Calculate keyword overlap between two keyword strings
*/
function calculateKeywordOverlap(keywords1: string, keywords2: string): number {
const words1 = new Set(keywords1.toLowerCase().split(/\s+/));
const words2 = new Set(keywords2.toLowerCase().split(/\s+/));
const intersection = new Set([...words1].filter(word => words2.has(word)));
const union = new Set([...words1, ...words2]);
return intersection.size / union.size;
}
/**
* Calculate sentiment change score
*/
function calculateSentimentChange(text: string, context: ConversationContext): number {
const currentSentiment = analyzeSentiment(text);
const previousSentiment = context.previousContext ? analyzeSentiment(context.previousContext) : 0;
return Math.abs(currentSentiment - previousSentiment);
}
/**
* Simple sentiment analysis
*/
function analyzeSentiment(text: string): number {
const positiveWords = ['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'love', 'like', 'happy', 'joy'];
const negativeWords = ['bad', 'terrible', 'awful', 'hate', 'dislike', 'sad', 'angry', 'frustrated', 'disappointed', 'horrible'];
const words = text.toLowerCase().split(/\s+/);
let score = 0;
for (const word of words) {
if (positiveWords.includes(word)) score += 1;
if (negativeWords.includes(word)) score -= 1;
}
return Math.max(-1, Math.min(1, score / words.length));
}
/**
* Apply semantic analysis to chunks
*/
function applySemanticAnalysis(chunks: SRTChunk[]): SRTChunk[] {
return chunks.map(chunk => {
// Analyze semantic coherence within chunk
const coherenceScore = calculateChunkCoherence(chunk);
// If coherence is low, try to split the chunk
if (coherenceScore < 0.3 && chunk.subtitles.length > 3) {
return splitChunkBySemantics(chunk);
}
return chunk;
}).flat();
}
/**
* Apply speaker diarization to chunks
*/
function applySpeakerDiarization(chunks: SRTChunk[]): SRTChunk[] {
const merged: SRTChunk[] = [];
let currentChunk: SRTChunk | null = null;
for (const chunk of chunks) {
if (!currentChunk) {
currentChunk = chunk;
continue;
}
// Check if chunks should be merged based on speaker continuity
if (shouldMergeBySpeaker(currentChunk, chunk)) {
currentChunk = mergeChunks(currentChunk, chunk);
} else {
merged.push(currentChunk);
currentChunk = chunk;
}
}
if (currentChunk) {
merged.push(currentChunk);
}
return merged;
}
/**
* Optimize chunk sizes
*/
function optimizeChunkSizes(chunks: SRTChunk[]): SRTChunk[] {
return chunks.map(chunk => {
// Ensure chunks are not too small or too large
if (chunk.subtitles.length < 2) {
return chunk; // Keep very small chunks as is
}
if (chunk.subtitles.length > 20) {
// Split large chunks
return splitLargeChunk(chunk);
}
return chunk;
}).flat();
}
/**
* Calculate chunk semantic coherence
*/
function calculateChunkCoherence(chunk: SRTChunk): number {
if (chunk.subtitles.length < 2) return 1;
let totalSimilarity = 0;
let comparisons = 0;
for (let i = 0; i < chunk.subtitles.length - 1; i++) {
const current = extractTextContent(chunk.subtitles[i].text);
const next = extractTextContent(chunk.subtitles[i + 1].text);
const similarity = calculateCosineSimilarity(current, next);
totalSimilarity += similarity;
comparisons++;
}
return comparisons > 0 ? totalSimilarity / comparisons : 1;
}
/**
* Split chunk by semantic boundaries
*/
function splitChunkBySemantics(chunk: SRTChunk): SRTChunk[] {
const subtitles = chunk.subtitles;
const splits: number[] = [];
// Find semantic boundaries within the chunk
for (let i = 1; i < subtitles.length - 1; i++) {
const prev = extractTextContent(subtitles[i - 1].text);
const current = extractTextContent(subtitles[i].text);
const next = extractTextContent(subtitles[i + 1].text);
const prevSimilarity = calculateCosineSimilarity(prev, current);
const nextSimilarity = calculateCosineSimilarity(current, next);
// If current subtitle is more similar to next than previous, it's a boundary
if (nextSimilarity > prevSimilarity * 1.5) {
splits.push(i);
}
}
if (splits.length === 0) return [chunk];
// Create new chunks based on splits
const newChunks: SRTChunk[] = [];
let startIndex = 0;
for (const splitIndex of splits) {
newChunks.push(createChunk(subtitles.slice(startIndex, splitIndex), newChunks.length));
startIndex = splitIndex;
}
// Add final chunk
newChunks.push(createChunk(subtitles.slice(startIndex), newChunks.length));
return newChunks;
}
/**
* Check if chunks should be merged by speaker
*/
function shouldMergeBySpeaker(chunk1: SRTChunk, chunk2: SRTChunk): boolean {
const speaker1 = chunk1.context?.speaker;
const speaker2 = chunk2.context?.speaker;
// If both have the same speaker
if (speaker1 && speaker2 && speaker1 === speaker2) {
const timeGap = calculateTimeGap(
chunk1.subtitles[chunk1.subtitles.length - 1],
chunk2.subtitles[0]
);
return timeGap < 3000; // 3 seconds
}
// If one chunk has no speaker, merge if timing is close
if (!speaker1 || !speaker2) {
const timeGap = calculateTimeGap(
chunk1.subtitles[chunk1.subtitles.length - 1],
chunk2.subtitles[0]
);
return timeGap < 2000; // 2 seconds
}
return false;
}
/**
* Split large chunk into smaller ones
*/
function splitLargeChunk(chunk: SRTChunk): SRTChunk[] {
const subtitles = chunk.subtitles;
const maxSize = 10; // Maximum subtitles per chunk
const newChunks: SRTChunk[] = [];
for (let i = 0; i < subtitles.length; i += maxSize) {
const chunkSubtitles = subtitles.slice(i, i + maxSize);
newChunks.push(createChunk(chunkSubtitles, newChunks.length));
}
return newChunks;
}
/**
* Analyze conversation context
*/
function analyzeConversationContext(
subtitle: SRTSubtitle,
nextSubtitle?: SRTSubtitle
): ConversationContext {
const speaker = detectSpeaker(subtitle);
const conversationId = generateConversationId(subtitle, speaker);
return {
speaker: speaker || undefined,
conversationId,
previousContext: extractContextKeywords(subtitle.text),
nextContext: nextSubtitle ? extractContextKeywords(nextSubtitle.text) : undefined
};
}
/**
* Generate unique conversation ID
*/
function generateConversationId(subtitle: SRTSubtitle, speaker: string | null): string {
const timestamp = `${subtitle.startTime.hours}:${subtitle.startTime.minutes}`;
const speakerPart = speaker ? speaker.toLowerCase().replace(/\s+/g, '-') : 'unknown';
return `${speakerPart}-${timestamp}`;
}
/**
* Extract context keywords from text
*/
function extractContextKeywords(text: string): string {
const cleanText = extractTextContent(text).toLowerCase();
// Simple keyword extraction (could be enhanced with NLP)
const keywords = cleanText
.split(/\s+/)
.filter(word => word.length > 3)
.slice(0, 5); // Take first 5 keywords
return keywords.join(' ');
}
/**
* Check if subtitle represents a topic change
*/
function isTopicChange(subtitle: SRTSubtitle, currentContext: ConversationContext): boolean {
const currentKeywords = extractContextKeywords(subtitle.text);
const previousKeywords = currentContext.previousContext || '';
// Simple topic change detection based on keyword overlap
const currentSet = new Set(currentKeywords.split(' '));
const previousSet = new Set(previousKeywords.split(' '));
const intersection = new Set([...currentSet].filter(x => previousSet.has(x)));
const union = new Set([...currentSet, ...previousSet]);
// If less than 30% keyword overlap, consider it a topic change
return intersection.size / union.size < 0.3;
}
/**
* Create chunk from subtitle array
*/
function createChunk(subtitles: SRTSubtitle[], chunkIndex: number): SRTChunk {
const firstSubtitle = subtitles[0];
const lastSubtitle = subtitles[subtitles.length - 1];
return {
id: `chunk-${chunkIndex}`,
startIndex: firstSubtitle.index,
endIndex: lastSubtitle.index,
subtitles,
context: analyzeConversationContext(firstSubtitle, subtitles[1])
};
}
/**
* Advanced conversation detection with configurable parameters
*/
export function detectConversationsAdvanced(
subtitles: SRTSubtitle[],
options: {
boundaryThreshold?: number;
maxChunkSize?: number;
minChunkSize?: number;
enableSemanticAnalysis?: boolean;
enableSpeakerDiarization?: boolean;
} = {}
): SRTChunk[] {
const {
boundaryThreshold = 0.7,
maxChunkSize = 20,
minChunkSize = 2,
enableSemanticAnalysis = true,
enableSpeakerDiarization = true
} = options;
// First pass: Basic boundary detection with custom threshold
const initialChunks = detectBasicBoundariesWithThreshold(subtitles, boundaryThreshold);
let processedChunks = initialChunks;
// Second pass: Semantic analysis (optional)
if (enableSemanticAnalysis) {
processedChunks = applySemanticAnalysis(processedChunks);
}
// Third pass: Speaker diarization (optional)
if (enableSpeakerDiarization) {
processedChunks = applySpeakerDiarization(processedChunks);
}
// Fourth pass: Size optimization
processedChunks = optimizeChunkSizesWithLimits(processedChunks, maxChunkSize, minChunkSize);
return processedChunks;
}
/**
* Basic boundary detection with custom threshold
*/
function detectBasicBoundariesWithThreshold(subtitles: SRTSubtitle[], threshold: number): SRTChunk[] {
const chunks: SRTChunk[] = [];
let currentChunk: SRTSubtitle[] = [];
let currentContext: ConversationContext | undefined;
for (let i = 0; i < subtitles.length; i++) {
const subtitle = subtitles[i];
const nextSubtitle = subtitles[i + 1];
const boundaryScore = calculateBoundaryScore(subtitle, nextSubtitle, currentContext);
if (boundaryScore > threshold && currentChunk.length > 0) {
chunks.push(createChunk(currentChunk, chunks.length));
currentChunk = [subtitle];
currentContext = analyzeConversationContext(subtitle, nextSubtitle);
} else {
currentChunk.push(subtitle);
if (!currentContext) {
currentContext = analyzeConversationContext(subtitle, nextSubtitle);
}
}
}
if (currentChunk.length > 0) {
chunks.push(createChunk(currentChunk, chunks.length));
}
return chunks;
}
/**
* Optimize chunk sizes with custom limits
*/
function optimizeChunkSizesWithLimits(chunks: SRTChunk[], maxSize: number, minSize: number): SRTChunk[] {
return chunks.map(chunk => {
if (chunk.subtitles.length < minSize) {
return chunk; // Keep small chunks as is
}
if (chunk.subtitles.length > maxSize) {
return splitLargeChunkWithSize(chunk, maxSize);
}
return chunk;
}).flat();
}
/**
* Split large chunk with custom size limit
*/
function splitLargeChunkWithSize(chunk: SRTChunk, maxSize: number): SRTChunk[] {
const subtitles = chunk.subtitles;
const newChunks: SRTChunk[] = [];
for (let i = 0; i < subtitles.length; i += maxSize) {
const chunkSubtitles = subtitles.slice(i, i + maxSize);
newChunks.push(createChunk(chunkSubtitles, newChunks.length));
}
return newChunks;
}
/**
* Merge chunks with similar context (legacy function for backward compatibility)
*/
export function mergeSimilarChunks(chunks: SRTChunk[]): SRTChunk[] {
const merged: SRTChunk[] = [];
let currentChunk: SRTChunk | null = null;
for (const chunk of chunks) {
if (!currentChunk) {
currentChunk = chunk;
continue;
}
if (shouldMergeChunks(currentChunk, chunk)) {
currentChunk = mergeChunks(currentChunk, chunk);
} else {
merged.push(currentChunk);
currentChunk = chunk;
}
}
if (currentChunk) {
merged.push(currentChunk);
}
return merged;
}
/**
* Determine if two chunks should be merged
*/
function shouldMergeChunks(chunk1: SRTChunk, chunk2: SRTChunk): boolean {
// Merge if same speaker and close timing
if (chunk1.context?.speaker === chunk2.context?.speaker) {
const timeGap = calculateTimeGap(
chunk1.subtitles[chunk1.subtitles.length - 1],
chunk2.subtitles[0]
);
return timeGap < 2000; // 2 seconds
}
return false;
}
/**
* Merge two chunks
*/
function mergeChunks(chunk1: SRTChunk, chunk2: SRTChunk): SRTChunk {
// Generate a proper merged chunk ID
const mergedId = `chunk-${chunk1.startIndex}-${chunk2.endIndex}`;
return {
id: mergedId,
startIndex: chunk1.startIndex,
endIndex: chunk2.endIndex,
subtitles: [...chunk1.subtitles, ...chunk2.subtitles],
context: {
speaker: chunk1.context?.speaker || chunk2.context?.speaker,
conversationId: chunk1.context?.conversationId || chunk2.context?.conversationId || '',
previousContext: chunk1.context?.previousContext,
nextContext: chunk2.context?.previousContext,
isMergedChunk: true,
originalChunkIds: [chunk1.id, chunk2.id]
}
};
}