process-example-srt.js•8.58 kB
#!/usr/bin/env node
/**
* Process Example.srt with MCP Tools
* Handles large SRT files by processing them in chunks
*/
import fs from 'fs';
import path from 'path';
class ExampleSRTProcessor {
constructor() {
this.inputFile = '/home/omd/Documents/Mix/SRT-MCP/Example.srt';
this.outputFile = '/home/omd/Documents/Mix/SRT-MCP/Example_Processed.srt';
this.chunkSize = 50; // Process 50 subtitles at a time
this.processedCount = 0;
this.totalCount = 0;
}
async processFile() {
try {
console.log(`Starting to process ${this.inputFile}...`);
// Read the input file in chunks
const content = fs.readFileSync(this.inputFile, 'utf8');
console.log(`File loaded: ${content.length} characters`);
// Parse the SRT content
const srtData = this.parseSRT(content);
this.totalCount = srtData.length;
console.log(`Found ${this.totalCount} subtitle entries`);
// Process in chunks
const processedEntries = [];
for (let i = 0; i < srtData.length; i += this.chunkSize) {
const chunk = srtData.slice(i, i + this.chunkSize);
console.log(`Processing chunk ${Math.floor(i / this.chunkSize) + 1}/${Math.ceil(srtData.length / this.chunkSize)} (${chunk.length} entries)`);
const processedChunk = await this.processChunk(chunk);
processedEntries.push(...processedChunk);
this.processedCount += chunk.length;
console.log(`Progress: ${this.processedCount}/${this.totalCount} (${Math.round((this.processedCount / this.totalCount) * 100)}%)`);
}
// Write the processed file
const outputContent = this.writeSRT(processedEntries);
fs.writeFileSync(this.outputFile, outputContent, 'utf8');
console.log(`Processing complete! Output saved to ${this.outputFile}`);
console.log(`Processed ${processedEntries.length} subtitle entries`);
} catch (error) {
console.error('Error processing file:', error);
throw error;
}
}
parseSRT(content) {
const entries = [];
const blocks = content.split(/\n\s*\n/);
for (const block of blocks) {
if (block.trim()) {
const lines = block.trim().split('\n');
if (lines.length >= 3) {
const entry = {
index: parseInt(lines[0]),
timing: lines[1],
text: lines.slice(2).join('\n')
};
entries.push(entry);
}
}
}
return entries;
}
writeSRT(entries) {
return entries.map(entry =>
`${entry.index}\n${entry.timing}\n${entry.text}\n`
).join('\n');
}
async processChunk(chunk) {
// Process each chunk with conversation timing alignment
const processedChunk = [];
for (let i = 0; i < chunk.length; i++) {
const entry = chunk[i];
const nextEntry = chunk[i + 1];
// Analyze conversation flow
const isConversation = this.detectConversation(entry.text);
const isConversationEnd = this.detectConversationEnd(entry.text);
const isConversationStart = this.detectConversationStart(entry.text);
// Calculate timing adjustments
const adjustments = this.calculateTimingAdjustments(entry, nextEntry, {
isConversation,
isConversationEnd,
isConversationStart
});
// Apply adjustments
const processedEntry = {
...entry,
timing: this.adjustTiming(entry.timing, adjustments)
};
processedChunk.push(processedEntry);
}
return processedChunk;
}
detectConversation(text) {
// Detect if this is part of a conversation
const conversationPatterns = [
/<i>.*<\/i>/, // Italic text often indicates narration
/{\\an\d+}/, // Position tags
/^[A-Z][^.!?]*[?!]/, // Questions or exclamations
/^[A-Z][^.!?]*\.$/ // Statements
];
return conversationPatterns.some(pattern => pattern.test(text));
}
detectConversationEnd(text) {
// Detect conversation endings
const endPatterns = [
/\.$/,
/\?$/,
/!$/,
/<i>.*<\/i>$/ // Italic endings often indicate narration end
];
return endPatterns.some(pattern => pattern.test(text.trim()));
}
detectConversationStart(text) {
// Detect conversation starts
const startPatterns = [
/^[A-Z][^.!?]*[?!]/,
/^[A-Z][^.!?]*:/,
/<i>.*<\/i>/
];
return startPatterns.some(pattern => pattern.test(text.trim()));
}
calculateTimingAdjustments(entry, nextEntry, context) {
const adjustments = {
startOffset: 0,
endOffset: 0
};
// Add pause for questions
if (entry.text.includes('?')) {
adjustments.endOffset += 300; // 300ms pause for questions
}
// Add pause for exclamations
if (entry.text.includes('!')) {
adjustments.endOffset += 200; // 200ms pause for exclamations
}
// Add pause for statements
if (entry.text.includes('.')) {
adjustments.endOffset += 150; // 150ms pause for statements
}
// Add pause for italic text (narration)
if (entry.text.includes('<i>')) {
adjustments.startOffset += 100; // 100ms pause before narration
adjustments.endOffset += 200; // 200ms pause after narration
}
// Add pause for position tags
if (entry.text.includes('{\\an')) {
adjustments.startOffset += 50; // 50ms pause for positioned text
}
// Add pause for conversation flow
if (context.isConversation && nextEntry) {
const timeGap = this.calculateTimeGap(entry.timing, nextEntry.timing);
if (timeGap < 500) { // Less than 500ms gap
adjustments.endOffset += 200; // Add buffer
}
}
return adjustments;
}
calculateTimeGap(currentTiming, nextTiming) {
if (!nextTiming) return 0;
const [currentEnd] = currentTiming.split(' --> ')[1].split(',');
const [nextStart] = nextTiming.split(' --> ')[0].split(',');
const currentEndTime = this.parseTime(currentEnd);
const nextStartTime = this.parseTime(nextStart);
return nextStartTime - currentEndTime;
}
adjustTiming(timing, adjustments) {
const [start, end] = timing.split(' --> ');
const startTime = this.parseTime(start);
const endTime = this.parseTime(end);
const adjustedStart = startTime + adjustments.startOffset;
const adjustedEnd = endTime + adjustments.endOffset;
return `${this.formatTime(adjustedStart)} --> ${this.formatTime(adjustedEnd)}`;
}
parseTime(timeStr) {
const [time, ms] = timeStr.split(',');
const [hours, minutes, seconds] = time.split(':').map(Number);
return (hours * 3600 + minutes * 60 + seconds) * 1000 + parseInt(ms);
}
formatTime(milliseconds) {
const totalSeconds = Math.floor(milliseconds / 1000);
const ms = milliseconds % 1000;
const hours = Math.floor(totalSeconds / 3600);
const minutes = Math.floor((totalSeconds % 3600) / 60);
const seconds = totalSeconds % 60;
return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')},${ms.toString().padStart(3, '0')}`;
}
}
// Main execution
async function main() {
const processor = new ExampleSRTProcessor();
await processor.processFile();
}
// Run if this is the main module
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch(console.error);
}
export default ExampleSRTProcessor;