show-chunks.js•5.96 kB
#!/usr/bin/env node
/**
* Display conversation chunks in a readable format
*/
import fs from 'fs';
import { parseSRTFile } from './dist/parsers/srt-parser.js';
import { detectConversations } from './dist/chunking/conversation-detector.js';
class ChunkDisplay {
constructor() {
this.inputFile = '/home/omd/Documents/Mix/SRT-MCP/Example.srt';
}
async showChunks() {
try {
console.log(`Loading and analyzing ${this.inputFile}...`);
// Read and parse the SRT file
const content = fs.readFileSync(this.inputFile, 'utf8');
const parseResult = parseSRTFile(content);
if (!parseResult.success || !parseResult.file) {
throw new Error('Failed to parse SRT file');
}
console.log(`Successfully parsed ${parseResult.file.subtitles.length} subtitle entries`);
// Detect conversation chunks
const chunks = detectConversations(parseResult.file.subtitles);
console.log(`\n=== CONVERSATION CHUNKS DETECTED ===`);
console.log(`Total Chunks: ${chunks.length}`);
console.log(`Average Chunk Size: ${(parseResult.file.subtitles.length / chunks.length).toFixed(1)} entries\n`);
// Display each chunk
for (let i = 0; i < Math.min(chunks.length, 10); i++) {
const chunk = chunks[i];
console.log(`--- CHUNK ${i + 1} ---`);
console.log(`ID: ${chunk.id}`);
console.log(`Range: ${chunk.startIndex} - ${chunk.endIndex} (${chunk.subtitles.length} entries)`);
console.log(`Speaker: ${chunk.context?.speaker || 'Unknown'}`);
console.log(`Conversation ID: ${chunk.context?.conversationId || 'N/A'}`);
console.log(`Previous Context: ${chunk.context?.previousContext || 'N/A'}`);
console.log(`Next Context: ${chunk.context?.nextContext || 'N/A'}`);
// Show first few subtitles in the chunk
console.log(`\nFirst 3 subtitles in chunk:`);
for (let j = 0; j < Math.min(3, chunk.subtitles.length); j++) {
const subtitle = chunk.subtitles[j];
console.log(` ${subtitle.index}: ${subtitle.text.substring(0, 80)}${subtitle.text.length > 80 ? '...' : ''}`);
}
if (chunk.subtitles.length > 3) {
console.log(` ... and ${chunk.subtitles.length - 3} more subtitles`);
}
console.log(''); // Empty line for readability
}
if (chunks.length > 10) {
console.log(`... and ${chunks.length - 10} more chunks`);
}
// Generate chunk statistics
this.generateChunkStatistics(chunks);
} catch (error) {
console.error('Error displaying chunks:', error);
throw error;
}
}
generateChunkStatistics(chunks) {
console.log(`\n=== CHUNK STATISTICS ===`);
const chunkSizes = chunks.map(chunk => chunk.subtitles.length);
const avgSize = chunkSizes.reduce((a, b) => a + b, 0) / chunkSizes.length;
const minSize = Math.min(...chunkSizes);
const maxSize = Math.max(...chunkSizes);
console.log(`Average Chunk Size: ${avgSize.toFixed(1)} entries`);
console.log(`Smallest Chunk: ${minSize} entries`);
console.log(`Largest Chunk: ${maxSize} entries`);
// Size distribution
const sizeRanges = {
'1-5 entries': 0,
'6-10 entries': 0,
'11-20 entries': 0,
'21-50 entries': 0,
'50+ entries': 0
};
chunkSizes.forEach(size => {
if (size <= 5) sizeRanges['1-5 entries']++;
else if (size <= 10) sizeRanges['6-10 entries']++;
else if (size <= 20) sizeRanges['11-20 entries']++;
else if (size <= 50) sizeRanges['21-50 entries']++;
else sizeRanges['50+ entries']++;
});
console.log(`\nSize Distribution:`);
Object.entries(sizeRanges).forEach(([range, count]) => {
const percentage = ((count / chunks.length) * 100).toFixed(1);
console.log(` ${range}: ${count} chunks (${percentage}%)`);
});
// Speaker analysis
const speakers = new Set();
chunks.forEach(chunk => {
if (chunk.context?.speaker) {
speakers.add(chunk.context.speaker);
}
});
console.log(`\nSpeaker Analysis:`);
console.log(` Unique Speakers: ${speakers.size}`);
console.log(` Chunks with Speaker: ${chunks.filter(c => c.context?.speaker).length}`);
console.log(` Chunks without Speaker: ${chunks.filter(c => !c.context?.speaker).length}`);
// Conversation flow analysis
let questions = 0;
let exclamations = 0;
let narration = 0;
chunks.forEach(chunk => {
chunk.subtitles.forEach(subtitle => {
if (subtitle.text.includes('?')) questions++;
if (subtitle.text.includes('!')) exclamations++;
if (subtitle.text.includes('<i>')) narration++;
});
});
console.log(`\nContent Analysis:`);
console.log(` Questions: ${questions}`);
console.log(` Exclamations: ${exclamations}`);
console.log(` Narration (italic): ${narration}`);
}
}
// Main execution
async function main() {
const display = new ChunkDisplay();
await display.showChunks();
}
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch(console.error);
}
export default ChunkDisplay;