claude-integration.ts•22.3 kB
/**
* Claude-specific integration for SRT chunking functions
* Leverages Claude's Todo tool for progress tracking and task management
*/
import { SRTSubtitle, SRTChunk } from '../types/srt.js';
import { detectConversations, detectConversationsAdvanced } from '../chunking/conversation-detector.js';
import { AIModelIntegration, AIModelConfig, ChunkingIntegrationOptions } from './ai-model-integration.js';
/**
* Claude-specific configuration
*/
export interface ClaudeConfig extends AIModelConfig {
modelType: 'claude';
supportsTodoTool: true;
maxContextSize: 200000; // Claude's context limit
chunkSizeLimit: 15; // Optimal for Claude
processingStrategy: 'sequential' | 'parallel' | 'batch';
contextOptimization: true;
claudeSpecific: {
useAnthropicFormat: boolean;
enableReasoning: boolean;
maxTokensPerRequest: number;
temperature: number;
};
}
/**
* Claude Todo Tool Integration
*/
export class ClaudeTodoIntegration {
private todos: Map<string, any> = new Map();
private todoCounter = 0;
/**
* Create todo list for Claude processing
*/
async createTodoList(todos: any[]): Promise<string> {
const todoList = todos.map(todo => ({
id: `claude-todo-${++this.todoCounter}`,
content: todo.content,
status: todo.status || 'pending',
priority: todo.priority || 'medium',
category: todo.category || 'processing',
metadata: {
...todo.metadata,
claudeOptimized: true,
createdAt: new Date().toISOString()
}
}));
// Store todos for Claude to access
todoList.forEach(todo => {
this.todos.set(todo.id, todo);
});
return `Created ${todoList.length} Claude-optimized todos for SRT processing`;
}
/**
* Update todo status (Claude-compatible)
*/
async updateTodoStatus(todoId: string, status: string): Promise<boolean> {
const todo = this.todos.get(todoId);
if (todo) {
todo.status = status;
todo.updatedAt = new Date().toISOString();
this.todos.set(todoId, todo);
return true;
}
return false;
}
/**
* Get todo list for Claude
*/
async getTodoList(): Promise<any[]> {
return Array.from(this.todos.values());
}
/**
* Mark todo as complete
*/
async markTodoComplete(todoId: string): Promise<boolean> {
return this.updateTodoStatus(todoId, 'completed');
}
/**
* Get todo by ID
*/
async getTodoById(todoId: string): Promise<any | null> {
return this.todos.get(todoId) || null;
}
}
/**
* Claude-specific SRT processing integration
*/
export class ClaudeSRTIntegration extends AIModelIntegration {
private claudeConfig: ClaudeConfig;
private claudeTodoIntegration: ClaudeTodoIntegration;
constructor(config: ClaudeConfig, options: ChunkingIntegrationOptions) {
super(config, options);
this.claudeConfig = config;
this.claudeTodoIntegration = new ClaudeTodoIntegration();
}
/**
* Process SRT with Claude-specific optimizations
*/
async processSRTWithClaude(
subtitles: SRTSubtitle[],
processingType: 'translation' | 'analysis' | 'conversation-detection',
claudeOptions?: {
useReasoning?: boolean;
temperature?: number;
maxTokens?: number;
}
): Promise<ClaudeProcessingResult> {
// Create Claude-optimized todo list
const claudeTodos = await this.createClaudeTodos(processingType);
try {
// Step 1: Detect conversations with Claude-optimized chunking
const chunks = await this.detectConversationsForClaude(subtitles);
// Step 2: Create Claude-specific processing plan
const claudePlan = await this.createClaudeProcessingPlan(chunks, processingType);
// Step 3: Execute processing with Claude Todo tool integration
const results = await this.executeClaudeProcessing(
chunks,
claudePlan,
claudeTodos,
claudeOptions
);
return {
success: true,
chunks,
claudePlan,
results,
claudeTodos,
claudeMetadata: this.generateClaudeMetadata(chunks, results),
reasoning: claudeOptions?.useReasoning ? await this.generateClaudeReasoning(chunks) : undefined
};
} catch (error) {
await this.handleClaudeError(error, claudeTodos);
throw error;
}
}
/**
* Create Claude-specific todos
*/
private async createClaudeTodos(processingType: string): Promise<any[]> {
const baseTodos = [
{
content: `Analyze SRT file structure for ${processingType}`,
status: 'pending',
priority: 'high',
category: 'analysis',
metadata: {
claudeTask: 'file-analysis',
reasoning: 'Claude needs to understand the file structure before processing'
}
},
{
content: 'Detect conversation boundaries using advanced algorithms',
status: 'pending',
priority: 'high',
category: 'chunking',
metadata: {
claudeTask: 'conversation-detection',
reasoning: 'Claude will use semantic analysis to identify natural conversation boundaries'
}
},
{
content: 'Optimize chunks for Claude context limits',
status: 'pending',
priority: 'high',
category: 'optimization',
metadata: {
claudeTask: 'context-optimization',
reasoning: 'Chunks must be sized appropriately for Claude\'s context window'
}
},
{
content: `Execute ${processingType} with Claude reasoning`,
status: 'pending',
priority: 'medium',
category: 'execution',
metadata: {
claudeTask: 'ai-processing',
reasoning: 'Claude will apply its reasoning capabilities to the processing task'
}
},
{
content: 'Validate results using Claude quality assessment',
status: 'pending',
priority: 'medium',
category: 'validation',
metadata: {
claudeTask: 'quality-validation',
reasoning: 'Claude will assess the quality and coherence of the results'
}
}
];
const todoListString = await this.claudeTodoIntegration.createTodoList(baseTodos);
return JSON.parse(todoListString);
}
/**
* Detect conversations optimized for Claude
*/
private async detectConversationsForClaude(subtitles: SRTSubtitle[]): Promise<SRTChunk[]> {
// Claude-optimized chunking parameters
const claudeOptions = {
boundaryThreshold: 0.65, // Slightly more aggressive for Claude
maxChunkSize: this.claudeConfig.chunkSizeLimit,
minChunkSize: 2,
enableSemanticAnalysis: true,
enableSpeakerDiarization: true
};
return detectConversationsAdvanced(subtitles, claudeOptions);
}
/**
* Create Claude-specific processing plan
*/
private async createClaudeProcessingPlan(
chunks: SRTChunk[],
processingType: string
): Promise<ClaudeProcessingPlan> {
const plan: ClaudeProcessingPlan = {
totalChunks: chunks.length,
claudeStrategy: this.determineClaudeStrategy(chunks),
reasoningRequired: this.requiresClaudeReasoning(processingType),
chunks: chunks.map((chunk, index) => ({
chunkId: chunk.id,
index,
claudePriority: this.calculateClaudePriority(chunk),
estimatedTokens: this.estimateClaudeTokens(chunk),
claudeInstructions: this.generateClaudeInstructions(chunk, processingType),
reasoningSteps: this.generateClaudeReasoningSteps(chunk, processingType)
})),
estimatedClaudeTime: this.estimateClaudeProcessingTime(chunks),
contextOptimization: this.claudeConfig.contextOptimization,
claudeSpecific: {
useAnthropicFormat: this.claudeConfig.claudeSpecific.useAnthropicFormat,
enableReasoning: this.claudeConfig.claudeSpecific.enableReasoning,
maxTokensPerRequest: this.claudeConfig.claudeSpecific.maxTokensPerRequest,
temperature: this.claudeConfig.claudeSpecific.temperature
}
};
return plan;
}
/**
* Execute Claude processing with Todo tool integration
*/
private async executeClaudeProcessing(
chunks: SRTChunk[],
plan: ClaudeProcessingPlan,
todos: any[],
options?: any
): Promise<ClaudeProcessingResults> {
const results: ClaudeProcessingResults = {
processedChunks: [],
claudeResponses: [],
reasoning: [],
errors: [],
warnings: [],
totalProcessingTime: 0,
claudeMetrics: {
totalTokensUsed: 0,
averageTokensPerChunk: 0,
reasoningSteps: 0,
qualityScore: 0
}
};
const startTime = Date.now();
// Update todo status
await this.claudeTodoIntegration.updateTodoStatus('claude-todo-1', 'completed');
await this.claudeTodoIntegration.updateTodoStatus('claude-todo-2', 'completed');
await this.claudeTodoIntegration.updateTodoStatus('claude-todo-3', 'in_progress');
// Process chunks with Claude
for (const chunk of chunks) {
try {
const chunkResult = await this.processChunkWithClaude(chunk, plan, options);
results.processedChunks.push(chunkResult);
results.claudeResponses.push(chunkResult.claudeResponse);
if (chunkResult.reasoning) {
results.reasoning.push(chunkResult.reasoning);
}
results.claudeMetrics.totalTokensUsed += chunkResult.tokensUsed;
results.claudeMetrics.reasoningSteps += chunkResult.reasoningSteps || 0;
} catch (error) {
results.errors.push({
chunkId: chunk.id,
error: error instanceof Error ? error.message : 'Unknown error',
claudeContext: this.getClaudeErrorContext(chunk)
});
}
}
results.totalProcessingTime = Date.now() - startTime;
results.claudeMetrics.averageTokensPerChunk =
results.claudeMetrics.totalTokensUsed / chunks.length;
results.claudeMetrics.qualityScore = this.calculateClaudeQualityScore(results);
// Update final todo status
await this.claudeTodoIntegration.updateTodoStatus('claude-todo-4', 'completed');
await this.claudeTodoIntegration.updateTodoStatus('claude-todo-5', 'completed');
return results;
}
/**
* Process individual chunk with Claude
*/
private async processChunkWithClaude(
chunk: SRTChunk,
plan: ClaudeProcessingPlan,
options?: any
): Promise<ClaudeChunkResult> {
const startTime = Date.now();
// Simulate Claude processing (replace with actual Claude API calls)
const claudeResponse = await this.simulateClaudeProcessing(chunk, plan, options);
const reasoning = plan.reasoningRequired ?
await this.generateClaudeReasoningForChunk(chunk) : undefined;
return {
chunkId: chunk.id,
claudeResponse,
reasoning,
tokensUsed: this.estimateClaudeTokens(chunk),
reasoningSteps: reasoning?.steps?.length || 0,
processingTime: Date.now() - startTime,
qualityScore: this.calculateChunkQualityScore(chunk, claudeResponse)
};
}
/**
* Simulate Claude processing (replace with actual Claude API integration)
*/
private async simulateClaudeProcessing(
chunk: SRTChunk,
plan: ClaudeProcessingPlan,
options?: any
): Promise<ClaudeResponse> {
// This would be replaced with actual Claude API calls
return {
content: `Claude processed chunk ${chunk.id} with ${chunk.subtitles.length} subtitles`,
reasoning: plan.reasoningRequired ? 'Applied Claude reasoning to analyze content' : undefined,
confidence: 0.85,
tokensUsed: this.estimateClaudeTokens(chunk),
processingTime: Date.now()
};
}
/**
* Determine Claude processing strategy
*/
private determineClaudeStrategy(chunks: SRTChunk[]): 'sequential' | 'parallel' | 'batch' {
const totalContextSize = chunks.reduce((sum, chunk) =>
sum + this.estimateClaudeTokens(chunk), 0);
if (totalContextSize > this.claudeConfig.maxContextSize * 0.8) {
return 'sequential'; // Process one at a time for large files
} else if (chunks.length > 10) {
return 'batch'; // Process in batches for many chunks
} else {
return 'parallel'; // Process in parallel for small files
}
}
/**
* Check if Claude reasoning is required
*/
private requiresClaudeReasoning(processingType: string): boolean {
return processingType === 'translation' || processingType === 'analysis';
}
/**
* Calculate Claude-specific priority
*/
private calculateClaudePriority(chunk: SRTChunk): 'low' | 'medium' | 'high' {
const subtitleCount = chunk.subtitles.length;
const hasSpeaker = !!chunk.context?.speaker;
const isQuestion = chunk.subtitles.some(s => s.text.includes('?'));
const complexity = this.assessClaudeComplexity(chunk);
if (isQuestion || hasSpeaker || complexity === 'high') return 'high';
if (subtitleCount > 8 || complexity === 'medium') return 'medium';
return 'low';
}
/**
* Assess complexity for Claude processing
*/
private assessClaudeComplexity(chunk: SRTChunk): 'low' | 'medium' | 'high' {
const textLength = chunk.subtitles.reduce((sum, s) => sum + s.text.length, 0);
const wordCount = chunk.subtitles.reduce((sum, s) =>
sum + s.text.split(/\s+/).length, 0);
const hasComplexFormatting = chunk.subtitles.some(s =>
s.text.includes('<i>') || s.text.includes('<b>') || s.text.includes('<u>'));
let complexity = 0;
if (textLength > 500) complexity += 2;
if (wordCount > 50) complexity += 2;
if (hasComplexFormatting) complexity += 1;
if (chunk.subtitles.length > 10) complexity += 1;
if (complexity >= 4) return 'high';
if (complexity >= 2) return 'medium';
return 'low';
}
/**
* Estimate Claude tokens for chunk
*/
private estimateClaudeTokens(chunk: SRTChunk): number {
const textLength = chunk.subtitles.reduce((sum, s) => sum + s.text.length, 0);
// Rough estimation: 1 token ≈ 4 characters
return Math.ceil(textLength / 4);
}
/**
* Generate Claude instructions for chunk
*/
private generateClaudeInstructions(chunk: SRTChunk, processingType: string): string[] {
const instructions = [
'Analyze the conversation context and speaker information',
'Maintain consistency with previous chunks if applicable',
'Apply appropriate processing based on content type'
];
switch (processingType) {
case 'translation':
instructions.push('Detect source language and apply appropriate translation strategy');
instructions.push('Maintain speaker voice consistency across the conversation');
break;
case 'analysis':
instructions.push('Extract key information and themes');
instructions.push('Identify conversation patterns and speaker dynamics');
break;
case 'conversation-detection':
instructions.push('Validate conversation boundaries');
instructions.push('Assess chunk coherence and speaker consistency');
break;
}
return instructions;
}
/**
* Generate Claude reasoning steps
*/
private generateClaudeReasoningSteps(chunk: SRTChunk, processingType: string): string[] {
const steps = [
'Analyze the context and content of this chunk',
'Consider the relationship to previous and next chunks',
'Apply appropriate processing strategy based on content type'
];
if (processingType === 'translation') {
steps.push('Identify source language and cultural context');
steps.push('Apply translation principles while maintaining meaning');
}
return steps;
}
/**
* Estimate Claude processing time
*/
private estimateClaudeProcessingTime(chunks: SRTChunk[]): number {
const baseTimePerChunk = 2000; // 2 seconds base
const reasoningFactor = this.claudeConfig.claudeSpecific.enableReasoning ? 1.5 : 1.0;
const complexityFactor = chunks.reduce((sum, chunk) => {
const complexity = this.assessClaudeComplexity(chunk);
return sum + (complexity === 'high' ? 2 : complexity === 'medium' ? 1.5 : 1);
}, 0) / chunks.length;
return chunks.length * baseTimePerChunk * reasoningFactor * complexityFactor;
}
/**
* Generate Claude reasoning for chunks
*/
private async generateClaudeReasoning(chunks: SRTChunk[]): Promise<ClaudeReasoning> {
return {
overallStrategy: 'Apply Claude reasoning to understand conversation flow and context',
keyInsights: [
'Conversation boundaries detected using semantic analysis',
'Speaker consistency maintained across chunks',
'Context optimization applied for Claude processing'
],
reasoningSteps: [
'Analyze overall conversation structure',
'Identify key themes and topics',
'Assess speaker dynamics and relationships',
'Optimize processing strategy based on content complexity'
],
confidence: 0.9
};
}
/**
* Generate Claude reasoning for specific chunk
*/
private async generateClaudeReasoningForChunk(chunk: SRTChunk): Promise<ClaudeChunkReasoning> {
return {
chunkId: chunk.id,
reasoning: `Claude analyzed chunk ${chunk.id} with ${chunk.subtitles.length} subtitles`,
steps: [
'Analyzed conversation context and speaker information',
'Assessed content complexity and processing requirements',
'Applied appropriate Claude reasoning for optimal results'
],
confidence: 0.85,
insights: [
`Chunk contains ${chunk.subtitles.length} subtitles`,
`Speaker: ${chunk.context?.speaker || 'Unknown'}`,
`Complexity: ${this.assessClaudeComplexity(chunk)}`
]
};
}
/**
* Calculate Claude quality score
*/
private calculateClaudeQualityScore(results: ClaudeProcessingResults): number {
const errorPenalty = results.errors.length * 0.1;
const warningPenalty = results.warnings.length * 0.05;
const baseScore = 0.9;
return Math.max(0, baseScore - errorPenalty - warningPenalty);
}
/**
* Calculate chunk quality score
*/
private calculateChunkQualityScore(chunk: SRTChunk, response: ClaudeResponse): number {
const baseScore = 0.8;
const confidenceBonus = response.confidence * 0.2;
return Math.min(1.0, baseScore + confidenceBonus);
}
/**
* Get Claude error context
*/
private getClaudeErrorContext(chunk: SRTChunk): string {
return `Chunk ${chunk.id} with ${chunk.subtitles.length} subtitles, speaker: ${chunk.context?.speaker || 'Unknown'}`;
}
/**
* Handle Claude-specific errors
*/
private async handleClaudeError(error: any, todos: any[]): Promise<void> {
// Mark relevant todos as failed
for (const todo of todos) {
if (todo.status === 'in_progress') {
await this.claudeTodoIntegration.updateTodoStatus(todo.id, 'cancelled');
}
}
}
/**
* Generate Claude metadata
*/
private generateClaudeMetadata(chunks: SRTChunk[], results: ClaudeProcessingResults): ClaudeMetadata {
return {
totalChunks: chunks.length,
processedChunks: results.processedChunks.length,
claudeMetrics: results.claudeMetrics,
reasoningApplied: results.reasoning.length > 0,
qualityScore: results.claudeMetrics.qualityScore,
claudeConfig: this.claudeConfig,
processingEfficiency: this.calculateClaudeEfficiency(chunks, results)
};
}
/**
* Calculate Claude processing efficiency
*/
private calculateClaudeEfficiency(chunks: SRTChunk[], results: ClaudeProcessingResults): number {
const totalTokens = results.claudeMetrics.totalTokensUsed;
const maxPossibleTokens = chunks.length * this.claudeConfig.maxContextSize;
return totalTokens / maxPossibleTokens;
}
}
/**
* Claude-specific types
*/
export interface ClaudeProcessingResult {
success: boolean;
chunks: SRTChunk[];
claudePlan: ClaudeProcessingPlan;
results: ClaudeProcessingResults;
claudeTodos: any[];
claudeMetadata: ClaudeMetadata;
reasoning?: ClaudeReasoning;
}
export interface ClaudeProcessingPlan {
totalChunks: number;
claudeStrategy: 'sequential' | 'parallel' | 'batch';
reasoningRequired: boolean;
chunks: ClaudeChunkPlan[];
estimatedClaudeTime: number;
contextOptimization: boolean;
claudeSpecific: {
useAnthropicFormat: boolean;
enableReasoning: boolean;
maxTokensPerRequest: number;
temperature: number;
};
}
export interface ClaudeChunkPlan {
chunkId: string;
index: number;
claudePriority: 'low' | 'medium' | 'high';
estimatedTokens: number;
claudeInstructions: string[];
reasoningSteps: string[];
}
export interface ClaudeProcessingResults {
processedChunks: ClaudeChunkResult[];
claudeResponses: ClaudeResponse[];
reasoning: ClaudeChunkReasoning[];
errors: ClaudeError[];
warnings: ClaudeWarning[];
totalProcessingTime: number;
claudeMetrics: ClaudeMetrics;
}
export interface ClaudeChunkResult {
chunkId: string;
claudeResponse: ClaudeResponse;
reasoning?: ClaudeChunkReasoning;
tokensUsed: number;
reasoningSteps: number;
processingTime: number;
qualityScore: number;
}
export interface ClaudeResponse {
content: string;
reasoning?: string;
confidence: number;
tokensUsed: number;
processingTime: number;
}
export interface ClaudeChunkReasoning {
chunkId: string;
reasoning: string;
steps: string[];
confidence: number;
insights: string[];
}
export interface ClaudeReasoning {
overallStrategy: string;
keyInsights: string[];
reasoningSteps: string[];
confidence: number;
}
export interface ClaudeError {
chunkId: string;
error: string;
claudeContext: string;
}
export interface ClaudeWarning {
chunkId: string;
warning: string;
claudeContext: string;
}
export interface ClaudeMetrics {
totalTokensUsed: number;
averageTokensPerChunk: number;
reasoningSteps: number;
qualityScore: number;
}
export interface ClaudeMetadata {
totalChunks: number;
processedChunks: number;
claudeMetrics: ClaudeMetrics;
reasoningApplied: boolean;
qualityScore: number;
claudeConfig: ClaudeConfig;
processingEfficiency: number;
}