SRT Translation MCP Server

srt-mcp
src
chunking

conversation-detector.ts

conversation-detector.ts•27.6 kB

/** * Advanced conversation detection and context analysis for SRT files * Implements multiple algorithms for robust conversation boundary detection */ import { SRTSubtitle, SRTChunk, ConversationContext } from '../types/srt.js'; import { extractTextContent } from '../utils/style-tags.js'; /** * Advanced conversation detection with multiple algorithms * Returns only the first chunk with instructions to call subsequent chunks */ export function detectConversations(subtitles: SRTSubtitle[]): SRTChunk[] { // First pass: Basic boundary detection const initialChunks = detectBasicBoundaries(subtitles); // Second pass: Semantic analysis and topic modeling const semanticChunks = applySemanticAnalysis(initialChunks); // Third pass: Speaker diarization and context-aware merging const finalChunks = applySpeakerDiarization(semanticChunks); // Fourth pass: Optimize chunk sizes and merge similar contexts const allChunks = optimizeChunkSizes(finalChunks); // Return only the first chunk with instructions for subsequent chunks return [createFirstChunkWithInstructions(allChunks)]; } /** * Create first chunk with instructions to call subsequent chunks */ function createFirstChunkWithInstructions(allChunks: SRTChunk[]): SRTChunk { if (allChunks.length === 0) { throw new Error('No chunks detected'); } const firstChunk = allChunks[0]; // Create instruction subtitle for subsequent chunks const instructionText = `[CHUNK 1 of ${allChunks.length}] ${firstChunk.subtitles.map(s => s.text).join(' ')}... [INSTRUCTION] To get the next chunk, call: getNextChunk(2)`; // Create a modified first chunk with instructions return { id: firstChunk.id, startIndex: firstChunk.startIndex, endIndex: firstChunk.endIndex, subtitles: firstChunk.subtitles, context: { ...firstChunk.context, conversationId: firstChunk.context?.conversationId || 'chunk-1', totalChunks: allChunks.length, currentChunk: 1, hasMoreChunks: allChunks.length > 1, instructionText: `To get chunk 2 of ${allChunks.length}, call: getNextChunk(2)` } }; } /** * Get specific chunk by index (for subsequent chunk retrieval) */ export function getNextChunk(chunkIndex: number, subtitles: SRTSubtitle[]): SRTChunk | null { const allChunks = detectAllConversations(subtitles); if (chunkIndex < 1 || chunkIndex > allChunks.length) { return null; } const chunk = allChunks[chunkIndex - 1]; // Add instruction for next chunk if available if (chunkIndex < allChunks.length) { chunk.context = { ...chunk.context, conversationId: chunk.context?.conversationId || `chunk-${chunkIndex}`, totalChunks: allChunks.length, currentChunk: chunkIndex, hasMoreChunks: true, instructionText: `To get chunk ${chunkIndex + 1} of ${allChunks.length}, call: getNextChunk(${chunkIndex + 1})` }; } else { chunk.context = { ...chunk.context, conversationId: chunk.context?.conversationId || `chunk-${chunkIndex}`, totalChunks: allChunks.length, currentChunk: chunkIndex, hasMoreChunks: false, instructionText: `This is the last chunk (${chunkIndex} of ${allChunks.length})` }; } return chunk; } /** * Internal function to detect all conversations (used by getNextChunk) */ function detectAllConversations(subtitles: SRTSubtitle[]): SRTChunk[] { // First pass: Basic boundary detection const initialChunks = detectBasicBoundaries(subtitles); // Second pass: Semantic analysis and topic modeling const semanticChunks = applySemanticAnalysis(initialChunks); // Third pass: Speaker diarization and context-aware merging const finalChunks = applySpeakerDiarization(semanticChunks); // Fourth pass: Optimize chunk sizes and merge similar contexts return optimizeChunkSizes(finalChunks); } /** * Basic boundary detection using timing and speaker changes */ function detectBasicBoundaries(subtitles: SRTSubtitle[]): SRTChunk[] { const chunks: SRTChunk[] = []; let currentChunk: SRTSubtitle[] = []; let currentContext: ConversationContext | undefined; for (let i = 0; i < subtitles.length; i++) { const subtitle = subtitles[i]; const nextSubtitle = subtitles[i + 1]; // Multi-factor boundary detection const boundaryScore = calculateBoundaryScore(subtitle, nextSubtitle, currentContext); if (boundaryScore > 0.7 && currentChunk.length > 0) { chunks.push(createChunk(currentChunk, chunks.length)); currentChunk = [subtitle]; currentContext = analyzeConversationContext(subtitle, nextSubtitle); } else { currentChunk.push(subtitle); if (!currentContext) { currentContext = analyzeConversationContext(subtitle, nextSubtitle); } } } if (currentChunk.length > 0) { chunks.push(createChunk(currentChunk, chunks.length)); } return chunks; } /** * Calculate comprehensive boundary score using multiple factors */ function calculateBoundaryScore( subtitle: SRTSubtitle, nextSubtitle: SRTSubtitle | undefined, currentContext?: ConversationContext ): number { let score = 0; // Speaker change detection (weight: 0.4) const speakerScore = calculateSpeakerChangeScore(subtitle, nextSubtitle); score += speakerScore * 0.4; // Timing gap analysis (weight: 0.3) const timingScore = calculateTimingScore(subtitle, nextSubtitle); score += timingScore * 0.3; // Semantic similarity analysis (weight: 0.2) const semanticScore = calculateSemanticScore(subtitle, nextSubtitle, currentContext); score += semanticScore * 0.2; // Topic change detection (weight: 0.1) const topicScore = calculateTopicChangeScore(subtitle, currentContext); score += topicScore * 0.1; return Math.min(score, 1.0); } /** * Calculate speaker change score */ function calculateSpeakerChangeScore( subtitle: SRTSubtitle, nextSubtitle: SRTSubtitle | undefined ): number { if (!nextSubtitle) return 0; const currentSpeaker = detectSpeaker(subtitle); const nextSpeaker = detectSpeaker(nextSubtitle); if (!currentSpeaker || !nextSpeaker) return 0; // Exact speaker change if (currentSpeaker !== nextSpeaker) { return 1.0; } // Speaker confidence analysis const currentConfidence = calculateSpeakerConfidence(subtitle); const nextConfidence = calculateSpeakerConfidence(nextSubtitle); // If confidence drops significantly, it might indicate a speaker change if (nextConfidence < currentConfidence * 0.5) { return 0.6; } return 0; } /** * Calculate timing-based boundary score */ function calculateTimingScore( subtitle: SRTSubtitle, nextSubtitle: SRTSubtitle | undefined ): number { if (!nextSubtitle) return 0; const timeGap = calculateTimeGap(subtitle, nextSubtitle); // Normalize gap to 0-1 score if (timeGap > 10000) return 1.0; // 10+ seconds if (timeGap > 5000) return 0.8; // 5-10 seconds if (timeGap > 2000) return 0.5; // 2-5 seconds if (timeGap > 1000) return 0.2; // 1-2 seconds return 0; } /** * Calculate semantic similarity score */ function calculateSemanticScore( subtitle: SRTSubtitle, nextSubtitle: SRTSubtitle | undefined, currentContext?: ConversationContext ): number { if (!nextSubtitle) return 0; const currentText = extractTextContent(subtitle.text); const nextText = extractTextContent(nextSubtitle.text); // Calculate semantic similarity using multiple methods const cosineSimilarity = calculateCosineSimilarity(currentText, nextText); const jaccardSimilarity = calculateJaccardSimilarity(currentText, nextText); const levenshteinSimilarity = calculateLevenshteinSimilarity(currentText, nextText); // Weighted combination const semanticScore = (cosineSimilarity * 0.5) + (jaccardSimilarity * 0.3) + (levenshteinSimilarity * 0.2); // Lower similarity indicates higher boundary probability return 1 - semanticScore; } /** * Calculate topic change score */ function calculateTopicChangeScore( subtitle: SRTSubtitle, currentContext?: ConversationContext ): number { if (!currentContext) return 0; const currentKeywords = extractContextKeywords(subtitle.text); const previousKeywords = currentContext.previousContext || ''; if (!previousKeywords) return 0; // Enhanced topic change detection const keywordOverlap = calculateKeywordOverlap(currentKeywords, previousKeywords); const sentimentChange = calculateSentimentChange(subtitle.text, currentContext); // Combine keyword and sentiment analysis return (1 - keywordOverlap) * 0.7 + sentimentChange * 0.3; } /** * Enhanced speaker detection with confidence scoring */ function detectSpeaker(subtitle: SRTSubtitle): string | null { if (!subtitle.text) return null; const text = extractTextContent(subtitle.text); // Enhanced speaker patterns with confidence scoring const speakerPatterns = [ { pattern: /^([A-Z][a-z]+):\s*(.+)$/, confidence: 1.0 }, // "Speaker: text" { pattern: /^([A-Z][A-Z\s]+):\s*(.+)$/, confidence: 0.9 }, // "SPEAKER NAME: text" { pattern: /^([A-Z][a-z]+)\s*-\s*(.+)$/, confidence: 0.8 }, // "Speaker - text" { pattern: /^([A-Z][a-z]+)\s*:\s*(.+)$/, confidence: 0.7 }, // "Speaker: text" (loose) { pattern: /^([A-Z][a-z]+\s+[A-Z][a-z]+):\s*(.+)$/, confidence: 0.9 }, // "First Last: text" { pattern: /^([A-Z][a-z]+)\s*$([^)]+)$:\s*(.+)$/, confidence: 0.6 }, // "Speaker (role): text" ]; let bestMatch = null; let bestConfidence = 0; for (const { pattern, confidence } of speakerPatterns) { const match = text.match(pattern); if (match && confidence > bestConfidence) { bestMatch = match[1].trim(); bestConfidence = confidence; } } // If no explicit speaker pattern found, try to detect from content if (!bestMatch) { // Look for name mentions in first person context const namePatterns = [ /my name is ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i, /i am ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i, /i'm ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i, /this is ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i, /call me ([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)/i, ]; for (const pattern of namePatterns) { const match = text.match(pattern); if (match) { return match[1].trim(); } } // Look for first-person indicators to suggest a narrator/speaker const firstPersonPatterns = [ /^i\s+/i, // "I am", "I have", etc. /^my\s+/i, // "My name", "My life", etc. /^i'm\s+/i, // "I'm doing", etc. ]; for (const pattern of firstPersonPatterns) { if (pattern.test(text)) { return 'Narrator'; // Generic first-person speaker } } } return bestMatch; } /** * Calculate speaker detection confidence */ function calculateSpeakerConfidence(subtitle: SRTSubtitle): number { const text = extractTextContent(subtitle.text); const speakerPatterns = [ { pattern: /^([A-Z][a-z]+):\s*(.+)$/, confidence: 1.0 }, { pattern: /^([A-Z][A-Z\s]+):\s*(.+)$/, confidence: 0.9 }, { pattern: /^([A-Z][a-z]+)\s*-\s*(.+)$/, confidence: 0.8 }, { pattern: /^([A-Z][a-z]+)\s*:\s*(.+)$/, confidence: 0.7 }, { pattern: /^([A-Z][a-z]+\s+[A-Z][a-z]+):\s*(.+)$/, confidence: 0.9 }, { pattern: /^([A-Z][a-z]+)\s*$([^)]+)$:\s*(.+)$/, confidence: 0.6 }, ]; for (const { pattern, confidence } of speakerPatterns) { if (text.match(pattern)) { return confidence; } } return 0; } /** * Calculate time gap between subtitles */ function calculateTimeGap(current: SRTSubtitle, next: SRTSubtitle): number { const currentEndMs = (current.endTime.hours * 3600 + current.endTime.minutes * 60 + current.endTime.seconds) * 1000 + current.endTime.milliseconds; const nextStartMs = (next.startTime.hours * 3600 + next.startTime.minutes * 60 + next.startTime.seconds) * 1000 + next.startTime.milliseconds; return nextStartMs - currentEndMs; } /** * Calculate cosine similarity between two texts */ function calculateCosineSimilarity(text1: string, text2: string): number { const words1 = text1.toLowerCase().split(/\s+/).filter(word => word.length > 2); const words2 = text2.toLowerCase().split(/\s+/).filter(word => word.length > 2); const allWords = [...new Set([...words1, ...words2])]; const vector1 = allWords.map(word => words1.filter(w => w === word).length); const vector2 = allWords.map(word => words2.filter(w => w === word).length); const dotProduct = vector1.reduce((sum, val, i) => sum + val * vector2[i], 0); const magnitude1 = Math.sqrt(vector1.reduce((sum, val) => sum + val * val, 0)); const magnitude2 = Math.sqrt(vector2.reduce((sum, val) => sum + val * val, 0)); if (magnitude1 === 0 || magnitude2 === 0) return 0; return dotProduct / (magnitude1 * magnitude2); } /** * Calculate Jaccard similarity between two texts */ function calculateJaccardSimilarity(text1: string, text2: string): number { const words1 = new Set(text1.toLowerCase().split(/\s+/).filter(word => word.length > 2)); const words2 = new Set(text2.toLowerCase().split(/\s+/).filter(word => word.length > 2)); const intersection = new Set([...words1].filter(word => words2.has(word))); const union = new Set([...words1, ...words2]); return intersection.size / union.size; } /** * Calculate Levenshtein similarity between two texts */ function calculateLevenshteinSimilarity(text1: string, text2: string): number { const distance = levenshteinDistance(text1.toLowerCase(), text2.toLowerCase()); const maxLength = Math.max(text1.length, text2.length); if (maxLength === 0) return 1; return 1 - (distance / maxLength); } /** * Calculate Levenshtein distance between two strings */ function levenshteinDistance(str1: string, str2: string): number { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i++) matrix[0][i] = i; for (let j = 0; j <= str2.length; j++) matrix[j][0] = j; for (let j = 1; j <= str2.length; j++) { for (let i = 1; i <= str1.length; i++) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min( matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator ); } } return matrix[str2.length][str1.length]; } /** * Calculate keyword overlap between two keyword strings */ function calculateKeywordOverlap(keywords1: string, keywords2: string): number { const words1 = new Set(keywords1.toLowerCase().split(/\s+/)); const words2 = new Set(keywords2.toLowerCase().split(/\s+/)); const intersection = new Set([...words1].filter(word => words2.has(word))); const union = new Set([...words1, ...words2]); return intersection.size / union.size; } /** * Calculate sentiment change score */ function calculateSentimentChange(text: string, context: ConversationContext): number { const currentSentiment = analyzeSentiment(text); const previousSentiment = context.previousContext ? analyzeSentiment(context.previousContext) : 0; return Math.abs(currentSentiment - previousSentiment); } /** * Simple sentiment analysis */ function analyzeSentiment(text: string): number { const positiveWords = ['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'love', 'like', 'happy', 'joy']; const negativeWords = ['bad', 'terrible', 'awful', 'hate', 'dislike', 'sad', 'angry', 'frustrated', 'disappointed', 'horrible']; const words = text.toLowerCase().split(/\s+/); let score = 0; for (const word of words) { if (positiveWords.includes(word)) score += 1; if (negativeWords.includes(word)) score -= 1; } return Math.max(-1, Math.min(1, score / words.length)); } /** * Apply semantic analysis to chunks */ function applySemanticAnalysis(chunks: SRTChunk[]): SRTChunk[] { return chunks.map(chunk => { // Analyze semantic coherence within chunk const coherenceScore = calculateChunkCoherence(chunk); // If coherence is low, try to split the chunk if (coherenceScore < 0.3 && chunk.subtitles.length > 3) { return splitChunkBySemantics(chunk); } return chunk; }).flat(); } /** * Apply speaker diarization to chunks */ function applySpeakerDiarization(chunks: SRTChunk[]): SRTChunk[] { const merged: SRTChunk[] = []; let currentChunk: SRTChunk | null = null; for (const chunk of chunks) { if (!currentChunk) { currentChunk = chunk; continue; } // Check if chunks should be merged based on speaker continuity if (shouldMergeBySpeaker(currentChunk, chunk)) { currentChunk = mergeChunks(currentChunk, chunk); } else { merged.push(currentChunk); currentChunk = chunk; } } if (currentChunk) { merged.push(currentChunk); } return merged; } /** * Optimize chunk sizes */ function optimizeChunkSizes(chunks: SRTChunk[]): SRTChunk[] { return chunks.map(chunk => { // Ensure chunks are not too small or too large if (chunk.subtitles.length < 2) { return chunk; // Keep very small chunks as is } if (chunk.subtitles.length > 20) { // Split large chunks return splitLargeChunk(chunk); } return chunk; }).flat(); } /** * Calculate chunk semantic coherence */ function calculateChunkCoherence(chunk: SRTChunk): number { if (chunk.subtitles.length < 2) return 1; let totalSimilarity = 0; let comparisons = 0; for (let i = 0; i < chunk.subtitles.length - 1; i++) { const current = extractTextContent(chunk.subtitles[i].text); const next = extractTextContent(chunk.subtitles[i + 1].text); const similarity = calculateCosineSimilarity(current, next); totalSimilarity += similarity; comparisons++; } return comparisons > 0 ? totalSimilarity / comparisons : 1; } /** * Split chunk by semantic boundaries */ function splitChunkBySemantics(chunk: SRTChunk): SRTChunk[] { const subtitles = chunk.subtitles; const splits: number[] = []; // Find semantic boundaries within the chunk for (let i = 1; i < subtitles.length - 1; i++) { const prev = extractTextContent(subtitles[i - 1].text); const current = extractTextContent(subtitles[i].text); const next = extractTextContent(subtitles[i + 1].text); const prevSimilarity = calculateCosineSimilarity(prev, current); const nextSimilarity = calculateCosineSimilarity(current, next); // If current subtitle is more similar to next than previous, it's a boundary if (nextSimilarity > prevSimilarity * 1.5) { splits.push(i); } } if (splits.length === 0) return [chunk]; // Create new chunks based on splits const newChunks: SRTChunk[] = []; let startIndex = 0; for (const splitIndex of splits) { newChunks.push(createChunk(subtitles.slice(startIndex, splitIndex), newChunks.length)); startIndex = splitIndex; } // Add final chunk newChunks.push(createChunk(subtitles.slice(startIndex), newChunks.length)); return newChunks; } /** * Check if chunks should be merged by speaker */ function shouldMergeBySpeaker(chunk1: SRTChunk, chunk2: SRTChunk): boolean { const speaker1 = chunk1.context?.speaker; const speaker2 = chunk2.context?.speaker; // If both have the same speaker if (speaker1 && speaker2 && speaker1 === speaker2) { const timeGap = calculateTimeGap( chunk1.subtitles[chunk1.subtitles.length - 1], chunk2.subtitles[0] ); return timeGap < 3000; // 3 seconds } // If one chunk has no speaker, merge if timing is close if (!speaker1 || !speaker2) { const timeGap = calculateTimeGap( chunk1.subtitles[chunk1.subtitles.length - 1], chunk2.subtitles[0] ); return timeGap < 2000; // 2 seconds } return false; } /** * Split large chunk into smaller ones */ function splitLargeChunk(chunk: SRTChunk): SRTChunk[] { const subtitles = chunk.subtitles; const maxSize = 10; // Maximum subtitles per chunk const newChunks: SRTChunk[] = []; for (let i = 0; i < subtitles.length; i += maxSize) { const chunkSubtitles = subtitles.slice(i, i + maxSize); newChunks.push(createChunk(chunkSubtitles, newChunks.length)); } return newChunks; } /** * Analyze conversation context */ function analyzeConversationContext( subtitle: SRTSubtitle, nextSubtitle?: SRTSubtitle ): ConversationContext { const speaker = detectSpeaker(subtitle); const conversationId = generateConversationId(subtitle, speaker); return { speaker: speaker || undefined, conversationId, previousContext: extractContextKeywords(subtitle.text), nextContext: nextSubtitle ? extractContextKeywords(nextSubtitle.text) : undefined }; } /** * Generate unique conversation ID */ function generateConversationId(subtitle: SRTSubtitle, speaker: string | null): string { const timestamp = `${subtitle.startTime.hours}:${subtitle.startTime.minutes}`; const speakerPart = speaker ? speaker.toLowerCase().replace(/\s+/g, '-') : 'unknown'; return `${speakerPart}-${timestamp}`; } /** * Extract context keywords from text */ function extractContextKeywords(text: string): string { const cleanText = extractTextContent(text).toLowerCase(); // Simple keyword extraction (could be enhanced with NLP) const keywords = cleanText .split(/\s+/) .filter(word => word.length > 3) .slice(0, 5); // Take first 5 keywords return keywords.join(' '); } /** * Check if subtitle represents a topic change */ function isTopicChange(subtitle: SRTSubtitle, currentContext: ConversationContext): boolean { const currentKeywords = extractContextKeywords(subtitle.text); const previousKeywords = currentContext.previousContext || ''; // Simple topic change detection based on keyword overlap const currentSet = new Set(currentKeywords.split(' ')); const previousSet = new Set(previousKeywords.split(' ')); const intersection = new Set([...currentSet].filter(x => previousSet.has(x))); const union = new Set([...currentSet, ...previousSet]); // If less than 30% keyword overlap, consider it a topic change return intersection.size / union.size < 0.3; } /** * Create chunk from subtitle array */ function createChunk(subtitles: SRTSubtitle[], chunkIndex: number): SRTChunk { const firstSubtitle = subtitles[0]; const lastSubtitle = subtitles[subtitles.length - 1]; return { id: `chunk-${chunkIndex}`, startIndex: firstSubtitle.index, endIndex: lastSubtitle.index, subtitles, context: analyzeConversationContext(firstSubtitle, subtitles[1]) }; } /** * Advanced conversation detection with configurable parameters */ export function detectConversationsAdvanced( subtitles: SRTSubtitle[], options: { boundaryThreshold?: number; maxChunkSize?: number; minChunkSize?: number; enableSemanticAnalysis?: boolean; enableSpeakerDiarization?: boolean; } = {} ): SRTChunk[] { const { boundaryThreshold = 0.7, maxChunkSize = 20, minChunkSize = 2, enableSemanticAnalysis = true, enableSpeakerDiarization = true } = options; // First pass: Basic boundary detection with custom threshold const initialChunks = detectBasicBoundariesWithThreshold(subtitles, boundaryThreshold); let processedChunks = initialChunks; // Second pass: Semantic analysis (optional) if (enableSemanticAnalysis) { processedChunks = applySemanticAnalysis(processedChunks); } // Third pass: Speaker diarization (optional) if (enableSpeakerDiarization) { processedChunks = applySpeakerDiarization(processedChunks); } // Fourth pass: Size optimization processedChunks = optimizeChunkSizesWithLimits(processedChunks, maxChunkSize, minChunkSize); return processedChunks; } /** * Basic boundary detection with custom threshold */ function detectBasicBoundariesWithThreshold(subtitles: SRTSubtitle[], threshold: number): SRTChunk[] { const chunks: SRTChunk[] = []; let currentChunk: SRTSubtitle[] = []; let currentContext: ConversationContext | undefined; for (let i = 0; i < subtitles.length; i++) { const subtitle = subtitles[i]; const nextSubtitle = subtitles[i + 1]; const boundaryScore = calculateBoundaryScore(subtitle, nextSubtitle, currentContext); if (boundaryScore > threshold && currentChunk.length > 0) { chunks.push(createChunk(currentChunk, chunks.length)); currentChunk = [subtitle]; currentContext = analyzeConversationContext(subtitle, nextSubtitle); } else { currentChunk.push(subtitle); if (!currentContext) { currentContext = analyzeConversationContext(subtitle, nextSubtitle); } } } if (currentChunk.length > 0) { chunks.push(createChunk(currentChunk, chunks.length)); } return chunks; } /** * Optimize chunk sizes with custom limits */ function optimizeChunkSizesWithLimits(chunks: SRTChunk[], maxSize: number, minSize: number): SRTChunk[] { return chunks.map(chunk => { if (chunk.subtitles.length < minSize) { return chunk; // Keep small chunks as is } if (chunk.subtitles.length > maxSize) { return splitLargeChunkWithSize(chunk, maxSize); } return chunk; }).flat(); } /** * Split large chunk with custom size limit */ function splitLargeChunkWithSize(chunk: SRTChunk, maxSize: number): SRTChunk[] { const subtitles = chunk.subtitles; const newChunks: SRTChunk[] = []; for (let i = 0; i < subtitles.length; i += maxSize) { const chunkSubtitles = subtitles.slice(i, i + maxSize); newChunks.push(createChunk(chunkSubtitles, newChunks.length)); } return newChunks; } /** * Merge chunks with similar context (legacy function for backward compatibility) */ export function mergeSimilarChunks(chunks: SRTChunk[]): SRTChunk[] { const merged: SRTChunk[] = []; let currentChunk: SRTChunk | null = null; for (const chunk of chunks) { if (!currentChunk) { currentChunk = chunk; continue; } if (shouldMergeChunks(currentChunk, chunk)) { currentChunk = mergeChunks(currentChunk, chunk); } else { merged.push(currentChunk); currentChunk = chunk; } } if (currentChunk) { merged.push(currentChunk); } return merged; } /** * Determine if two chunks should be merged */ function shouldMergeChunks(chunk1: SRTChunk, chunk2: SRTChunk): boolean { // Merge if same speaker and close timing if (chunk1.context?.speaker === chunk2.context?.speaker) { const timeGap = calculateTimeGap( chunk1.subtitles[chunk1.subtitles.length - 1], chunk2.subtitles[0] ); return timeGap < 2000; // 2 seconds } return false; } /** * Merge two chunks */ function mergeChunks(chunk1: SRTChunk, chunk2: SRTChunk): SRTChunk { // Generate a proper merged chunk ID const mergedId = `chunk-${chunk1.startIndex}-${chunk2.endIndex}`; return { id: mergedId, startIndex: chunk1.startIndex, endIndex: chunk2.endIndex, subtitles: [...chunk1.subtitles, ...chunk2.subtitles], context: { speaker: chunk1.context?.speaker || chunk2.context?.speaker, conversationId: chunk1.context?.conversationId || chunk2.context?.conversationId || '', previousContext: chunk1.context?.previousContext, nextContext: chunk2.context?.previousContext, isMergedChunk: true, originalChunkIds: [chunk1.id, chunk2.id] } }; }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/omd0/srt-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server