conversation-translation-workflow.jsā¢13.9 kB
#!/usr/bin/env node
/**
* Conversation Translation Workflow
* Detects conversation chunks and translates them individually for better context preservation
*/
import fs from 'fs';
import { parseSRTFile } from './dist/parsers/srt-parser.js';
import { detectConversations } from './dist/chunking/conversation-detector.js';
class ConversationTranslationWorkflow {
constructor() {
this.inputFile = '/home/omd/Documents/Mix/SRT-MCP/Example.srt';
this.outputFile = '/home/omd/Documents/Mix/SRT-MCP/Example_Conversation_Translated.srt';
this.targetLanguage = 'es'; // Spanish translation
this.sourceLanguage = 'en'; // English source
}
async processWorkflow() {
try {
console.log(`š Starting Conversation Translation Workflow...`);
console.log(`š Input: ${this.inputFile}`);
console.log(`š Target Language: ${this.targetLanguage}`);
// Step 1: Load and parse SRT file
console.log(`\nš Step 1: Loading and parsing SRT file...`);
const content = fs.readFileSync(this.inputFile, 'utf8');
const parseResult = parseSRTFile(content);
if (!parseResult.success || !parseResult.file) {
throw new Error('Failed to parse SRT file');
}
console.log(`ā
Successfully parsed ${parseResult.file.subtitles.length} subtitle entries`);
// Step 2: Detect conversation chunks
console.log(`\nš Step 2: Detecting conversation chunks...`);
const chunks = detectConversations(parseResult.file.subtitles);
console.log(`ā
Detected ${chunks.length} conversation chunks`);
// Step 3: Process each chunk with translation
console.log(`\nš Step 3: Translating conversation chunks...`);
const translatedChunks = [];
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
console.log(`\nš Processing Chunk ${i + 1}/${chunks.length}:`);
console.log(` Range: ${chunk.startIndex}-${chunk.endIndex} (${chunk.subtitles.length} entries)`);
console.log(` Speaker: ${chunk.context?.speaker || 'Unknown'}`);
console.log(` Context: ${chunk.context?.previousContext || 'N/A'}`);
// Translate this chunk
const translatedChunk = await this.translateChunk(chunk, i + 1);
translatedChunks.push(translatedChunk);
console.log(` ā
Translated ${translatedChunk.subtitles.length} entries`);
}
// Step 4: Reconstruct the full SRT file
console.log(`\nš§ Step 4: Reconstructing translated SRT file...`);
const finalSRT = this.reconstructSRT(translatedChunks);
// Step 5: Save the result
console.log(`\nš¾ Step 5: Saving translated file...`);
fs.writeFileSync(this.outputFile, finalSRT, 'utf8');
// Step 6: Generate workflow report
console.log(`\nš Step 6: Generating workflow report...`);
this.generateWorkflowReport(chunks, translatedChunks);
console.log(`\nš Workflow completed successfully!`);
console.log(`š Output saved to: ${this.outputFile}`);
} catch (error) {
console.error('ā Workflow error:', error);
throw error;
}
}
async translateChunk(chunk, chunkNumber) {
// Simulate translation process with context-aware adjustments
const translatedSubtitles = [];
for (let i = 0; i < chunk.subtitles.length; i++) {
const subtitle = chunk.subtitles[i];
const nextSubtitle = chunk.subtitles[i + 1];
const prevSubtitle = chunk.subtitles[i - 1];
// Analyze conversation context for this subtitle
const context = this.analyzeSubtitleContext(subtitle, nextSubtitle, prevSubtitle, chunk);
// Apply conversation-aware translation
const translatedSubtitle = await this.translateSubtitle(subtitle, context, chunkNumber, i + 1);
translatedSubtitles.push(translatedSubtitle);
}
return {
...chunk,
subtitles: translatedSubtitles,
translated: true,
translationLanguage: this.targetLanguage
};
}
analyzeSubtitleContext(subtitle, nextSubtitle, prevSubtitle, chunk) {
return {
isFirstInChunk: chunk.subtitles.indexOf(subtitle) === 0,
isLastInChunk: chunk.subtitles.indexOf(subtitle) === chunk.subtitles.length - 1,
isQuestion: subtitle.text.includes('?'),
isExclamation: subtitle.text.includes('!'),
isNarration: subtitle.text.includes('<i>'),
isDialogue: !subtitle.text.includes('<i>'),
hasSpeaker: this.extractSpeaker(subtitle.text) !== null,
speaker: this.extractSpeaker(subtitle.text),
chunkSize: chunk.subtitles.length,
conversationId: chunk.context?.conversationId,
previousContext: chunk.context?.previousContext,
nextContext: chunk.context?.nextContext
};
}
async translateSubtitle(subtitle, context, chunkNumber, subtitleNumber) {
// Simulate translation with conversation context
const baseTranslation = this.simulateTranslation(subtitle.text);
// Apply conversation-aware adjustments
let adjustedTranslation = baseTranslation;
// Add conversation context markers
if (context.isFirstInChunk) {
adjustedTranslation = `[Chunk ${chunkNumber}] ${adjustedTranslation}`;
}
// Handle questions with conversation context
if (context.isQuestion) {
adjustedTranslation = `[Q] ${adjustedTranslation}`;
}
// Handle narration with context
if (context.isNarration) {
adjustedTranslation = `[Narration] ${adjustedTranslation}`;
}
// Handle speaker identification
if (context.hasSpeaker) {
adjustedTranslation = `[Speaker: ${context.speaker}] ${adjustedTranslation}`;
}
// Apply conversation timing adjustments
const timingAdjustments = this.calculateConversationTiming(subtitle, context);
return {
...subtitle,
text: adjustedTranslation,
originalText: subtitle.text,
translationContext: {
chunkNumber,
subtitleNumber,
isQuestion: context.isQuestion,
isNarration: context.isNarration,
hasSpeaker: context.hasSpeaker,
speaker: context.speaker,
timingAdjustments
},
startTime: this.adjustTiming(subtitle.startTime, timingAdjustments.startOffset),
endTime: this.adjustTiming(subtitle.endTime, timingAdjustments.endOffset)
};
}
simulateTranslation(text) {
// Simulate translation based on content type
if (text.includes('?')) {
return `[Traducido] ${text.replace(/\?/g, '?')}`;
} else if (text.includes('!')) {
return `[Traducido] ${text.replace(/!/g, '!')}`;
} else if (text.includes('<i>')) {
return `<i>[Traducido] ${text.replace(/<i>/g, '').replace(/<\/i>/g, '')}</i>`;
} else {
return `[Traducido] ${text}`;
}
}
calculateConversationTiming(subtitle, context) {
const adjustments = {
startOffset: 0,
endOffset: 0
};
// Conversation flow timing
if (context.isFirstInChunk) {
adjustments.startOffset += 100; // Pause before new conversation
}
if (context.isLastInChunk) {
adjustments.endOffset += 200; // Pause after conversation
}
// Question timing
if (context.isQuestion) {
adjustments.endOffset += 300; // Longer pause for questions
}
// Exclamation timing
if (context.isExclamation) {
adjustments.endOffset += 250; // Pause for exclamations
}
// Narration timing
if (context.isNarration) {
adjustments.startOffset += 150; // Pause before narration
adjustments.endOffset += 200; // Pause after narration
}
// Speaker change timing
if (context.hasSpeaker) {
adjustments.startOffset += 100; // Pause for speaker identification
}
return adjustments;
}
adjustTiming(time, offset) {
const totalMs = (time.hours * 3600 + time.minutes * 60 + time.seconds) * 1000 + time.milliseconds + offset;
const hours = Math.floor(totalMs / 3600000);
const minutes = Math.floor((totalMs % 3600000) / 60000);
const seconds = Math.floor((totalMs % 60000) / 1000);
const milliseconds = totalMs % 1000;
return { hours, minutes, seconds, milliseconds };
}
extractSpeaker(text) {
const speakerPatterns = [
/^([A-Z][a-z]+):\s*(.+)$/,
/^([A-Z][A-Z\s]+):\s*(.+)$/,
/^([A-Z][a-z]+)\s*-\s*(.+)$/,
/<b>Speaker (\d+):<\/b>/i
];
for (const pattern of speakerPatterns) {
const match = text.match(pattern);
if (match) {
return match[1].trim();
}
}
return null;
}
reconstructSRT(translatedChunks) {
const allSubtitles = [];
translatedChunks.forEach(chunk => {
allSubtitles.push(...chunk.subtitles);
});
// Sort by index to maintain order
allSubtitles.sort((a, b) => a.index - b.index);
// Convert to SRT format
return allSubtitles.map(subtitle => {
const startTime = this.formatTime(subtitle.startTime);
const endTime = this.formatTime(subtitle.endTime);
return `${subtitle.index}\n${startTime} --> ${endTime}\n${subtitle.text}\n`;
}).join('\n');
}
formatTime(time) {
return `${time.hours.toString().padStart(2, '0')}:${time.minutes.toString().padStart(2, '0')}:${time.seconds.toString().padStart(2, '0')},${time.milliseconds.toString().padStart(3, '0')}`;
}
generateWorkflowReport(originalChunks, translatedChunks) {
const report = {
workflow: 'Conversation Translation Workflow',
inputFile: this.inputFile,
outputFile: this.outputFile,
targetLanguage: this.targetLanguage,
sourceLanguage: this.sourceLanguage,
processing: {
totalChunks: originalChunks.length,
totalSubtitles: translatedChunks.reduce((sum, chunk) => sum + chunk.subtitles.length, 0),
averageChunkSize: (translatedChunks.reduce((sum, chunk) => sum + chunk.subtitles.length, 0) / originalChunks.length).toFixed(1)
},
translation: {
translatedChunks: translatedChunks.filter(chunk => chunk.translated).length,
questionsTranslated: translatedChunks.reduce((sum, chunk) =>
sum + chunk.subtitles.filter(sub => sub.translationContext?.isQuestion).length, 0),
narrationTranslated: translatedChunks.reduce((sum, chunk) =>
sum + chunk.subtitles.filter(sub => sub.translationContext?.isNarration).length, 0),
speakersIdentified: translatedChunks.reduce((sum, chunk) =>
sum + chunk.subtitles.filter(sub => sub.translationContext?.hasSpeaker).length, 0)
},
timing: {
totalAdjustments: translatedChunks.reduce((sum, chunk) =>
sum + chunk.subtitles.reduce((subSum, sub) =>
subSum + (sub.translationContext?.timingAdjustments?.startOffset || 0) +
(sub.translationContext?.timingAdjustments?.endOffset || 0), 0), 0),
averageAdjustment: 0
}
};
report.timing.averageAdjustment = report.timing.totalAdjustments / report.processing.totalSubtitles;
console.log(`\nš WORKFLOW REPORT:`);
console.log(` Total Chunks: ${report.processing.totalChunks}`);
console.log(` Total Subtitles: ${report.processing.totalSubtitles}`);
console.log(` Average Chunk Size: ${report.processing.averageChunkSize} entries`);
console.log(` Questions Translated: ${report.translation.questionsTranslated}`);
console.log(` Narration Translated: ${report.translation.narrationTranslated}`);
console.log(` Speakers Identified: ${report.translation.speakersIdentified}`);
console.log(` Total Timing Adjustments: ${report.timing.totalAdjustments}ms`);
console.log(` Average Adjustment: ${report.timing.averageAdjustment.toFixed(1)}ms`);
// Save detailed report
fs.writeFileSync('/home/omd/Documents/Mix/SRT-MCP/conversation-translation-report.json', JSON.stringify(report, null, 2));
console.log(`\nš Detailed report saved to conversation-translation-report.json`);
}
}
// Main execution
async function main() {
const workflow = new ConversationTranslationWorkflow();
await workflow.processWorkflow();
}
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch(console.error);
}
export default ConversationTranslationWorkflow;