chat-optimization-example.tsā¢14.4 kB
/**
 * Complete Example: Chat Application Optimization
 * 
 * This example demonstrates how to use GEPA to optimize prompts for a customer service
 * chat application, improving response quality, empathy, and problem-solving effectiveness.
 */
import { 
  EvolutionEngine, 
  ParetoFrontier, 
  ReflectionEngine, 
  PromptMutator,
  PerformanceTracker,
  LLMAdapter 
} from '../src/index';
import {
  PromptCandidate,
  TaskContext,
  EvolutionConfig,
  ParetoObjective,
  ExecutionTrajectory
} from '../src/types/gepa';
// 1. Define Custom Objectives for Chat Application
const chatObjectives: ParetoObjective[] = [
  {
    name: 'helpfulness',
    weight: 0.4,
    direction: 'maximize',
    extractor: (candidate: PromptCandidate) => {
      // Extract helpfulness score from task performance
      return candidate.taskPerformance.get('helpfulness-evaluation') || 0;
    }
  },
  {
    name: 'empathy',
    weight: 0.3,
    direction: 'maximize',
    extractor: (candidate: PromptCandidate) => {
      // Extract empathy score from evaluation metrics
      return candidate.taskPerformance.get('empathy-score') || 0;
    }
  },
  {
    name: 'response_time',
    weight: 0.2,
    direction: 'minimize',
    extractor: (candidate: PromptCandidate) => {
      // Extract average response generation time
      const responseTime = candidate.taskPerformance.get('avg-response-time') || 1000;
      return responseTime / 1000; // Normalize to seconds
    }
  },
  {
    name: 'safety',
    weight: 0.1,
    direction: 'maximize',
    extractor: (candidate: PromptCandidate) => {
      // Extract safety compliance score
      return candidate.taskPerformance.get('safety-score') || 0.5;
    }
  }
];
// 2. Custom LLM Adapter for Chat Evaluation
class ChatEvaluationAdapter extends LLMAdapter {
  async evaluatePrompt(prompt: string, context: TaskContext): Promise<any> {
    // Simulate customer service scenarios
    const testScenarios = [
      {
        scenario: "Customer complaining about delayed delivery",
        expectedBehavior: "empathetic, solution-focused, proactive"
      },
      {
        scenario: "Technical support request for complex issue",
        expectedBehavior: "patient, detailed, educational"
      },
      {
        scenario: "Billing dispute requiring careful handling",
        expectedBehavior: "understanding, thorough, precise"
      }
    ];
    const results = [];
    
    for (const test of testScenarios) {
      const startTime = Date.now();
      
      // Generate response using the candidate prompt
      const response = await this.callLLM(`
        ${prompt}
        
        Customer Message: ${test.scenario}
        
        Please provide a helpful customer service response.
      `);
      
      const responseTime = Date.now() - startTime;
      
      // Evaluate response quality
      const evaluation = await this.evaluateResponse(response.content, test);
      
      results.push({
        scenario: test.scenario,
        response: response.content,
        responseTime,
        scores: evaluation
      });
    }
    // Aggregate scores
    const avgHelpfulness = results.reduce((sum, r) => sum + r.scores.helpfulness, 0) / results.length;
    const avgEmpathy = results.reduce((sum, r) => sum + r.scores.empathy, 0) / results.length;
    const avgResponseTime = results.reduce((sum, r) => sum + r.responseTime, 0) / results.length;
    const avgSafety = results.reduce((sum, r) => sum + r.scores.safety, 0) / results.length;
    return {
      score: (avgHelpfulness + avgEmpathy + avgSafety) / 3, // Overall fitness
      trajectory: {
        id: `eval-${Date.now()}`,
        promptId: 'candidate-prompt',
        taskId: context.description,
        startTime: Date.now(),
        endTime: Date.now(),
        steps: results.map(r => ({
          action: `evaluate-${r.scenario}`,
          input: r.scenario,
          output: r.response,
          timestamp: Date.now(),
          duration: r.responseTime
        })),
        finalResult: {
          success: avgHelpfulness > 0.7,
          score: avgHelpfulness,
          output: results,
          metrics: {
            'helpfulness-evaluation': avgHelpfulness,
            'empathy-score': avgEmpathy,
            'avg-response-time': avgResponseTime,
            'safety-score': avgSafety
          }
        }
      }
    };
  }
  private async evaluateResponse(response: string, testCase: any): Promise<any> {
    // LLM-based evaluation of response quality
    const evaluationPrompt = `
      Evaluate this customer service response on a scale of 0-1 for each criterion:
      
      Customer Scenario: ${testCase.scenario}
      Expected Behavior: ${testCase.expectedBehavior}
      
      Agent Response: ${response}
      
      Rate the response for:
      1. Helpfulness (0-1): How well does it address the customer's need?
      2. Empathy (0-1): How empathetic and understanding is the tone?
      3. Safety (0-1): How safe and appropriate is the response?
      
      Respond in JSON format:
      {
        "helpfulness": 0.8,
        "empathy": 0.9,
        "safety": 1.0,
        "reasoning": "Brief explanation of scores"
      }
    `;
    try {
      const evaluation = await this.callLLM(evaluationPrompt);
      return JSON.parse(evaluation.content);
    } catch (error) {
      // Fallback scoring if evaluation fails
      return {
        helpfulness: 0.5,
        empathy: 0.5,
        safety: 0.8,
        reasoning: "Evaluation failed, using default scores"
      };
    }
  }
}
// 3. Chat-Specific Task Context Generator
class ChatTaskContextGenerator {
  static generateContexts(): TaskContext[] {
    return [
      {
        description: 'Handle customer complaints with empathy',
        category: 'customer-service',
        difficulty: 'medium',
        requiredCapabilities: ['empathy', 'problem-solving', 'communication'],
        expectedDuration: 30
      },
      {
        description: 'Provide technical support for complex issues',
        category: 'technical-support',
        difficulty: 'hard',
        requiredCapabilities: ['technical-knowledge', 'patience', 'education'],
        expectedDuration: 60
      },
      {
        description: 'Process billing inquiries accurately',
        category: 'billing-support',
        difficulty: 'medium',
        requiredCapabilities: ['attention-to-detail', 'math', 'policy-knowledge'],
        expectedDuration: 45
      }
    ];
  }
}
// 4. Main Chat Optimization Function
export async function optimizeChatPrompts(): Promise<void> {
  console.log('š Starting GEPA Chat Application Optimization');
  
  // Initialize performance tracking
  const performanceTracker = new PerformanceTracker({
    enableRealTimeMonitoring: true,
    memoryTrackingEnabled: true,
    tokenUsageTrackingEnabled: true
  });
  // Track the complete optimization process
  const optimizationMetricId = performanceTracker.startMetricCollection(
    'chat-optimization',
    { category: 'evolution', tags: { type: 'chat-application' } }
  );
  try {
    // 1. Setup Components
    const llmAdapter = new ChatEvaluationAdapter();
    const paretoFrontier = new ParetoFrontier({
      objectives: chatObjectives,
      maxSize: 50,
      samplingStrategy: { name: 'ucb', parameters: { confidence: 1.96 } }
    });
    
    const reflectionEngine = new ReflectionEngine({
      llmAdapter,
      trajectoryStore: new InMemoryTrajectoryStore(),
      config: {
        confidenceThreshold: 0.7,
        batchSize: 5
      }
    });
    
    const promptMutator = new PromptMutator({
      maxMutationsPerGeneration: 8,
      mutationRate: 0.3,
      crossoverRate: 0.6,
      enableCaching: true
    });
    // 2. Configure Evolution
    const evolutionConfig: EvolutionConfig = {
      taskDescription: 'Optimize customer service chat responses for helpfulness, empathy, and efficiency',
      seedPrompt: `You are a professional customer service representative. 
        Be helpful, empathetic, and solution-focused in all interactions.
        Always maintain a friendly and professional tone.`,
      maxGenerations: 12,
      populationSize: 25,
      mutationRate: 0.35
    };
    // 3. Initialize Evolution Engine
    const evolutionEngine = new EvolutionEngine({
      llmAdapter,
      paretoFrontier,
      reflectionEngine,
      promptMutator,
      trajectoryStore: new InMemoryTrajectoryStore(),
      config: evolutionConfig
    });
    // 4. Setup Real-time Monitoring
    performanceTracker.subscribeToMetrics((metric) => {
      if (metric.category === 'evolution') {
        console.log(`š ${metric.name}: ${metric.duration}ms`);
      }
    });
    // 5. Run Evolution Process
    console.log('𧬠Starting evolution process...');
    const evolutionResult = await evolutionEngine.startEvolution({
      taskDescription: evolutionConfig.taskDescription,
      seedPrompt: evolutionConfig.seedPrompt,
      config: evolutionConfig
    });
    // 6. Analyze Results
    console.log('\nš Evolution Results:');
    console.log(`- Generations: ${evolutionResult.generations}`);
    console.log(`- Convergence: ${evolutionResult.convergenceAchieved}`);
    console.log(`- Total Rollouts: ${evolutionResult.totalRollouts}`);
    
    const bestPrompt = evolutionResult.bestPrompt;
    console.log(`\nš Best Prompt (Score: ${bestPrompt.averageScore.toFixed(3)}):`);
    console.log(`"${bestPrompt.content}"`);
    // 7. Performance Analysis
    const finalMetric = performanceTracker.endMetricCollection(optimizationMetricId);
    console.log(`\nā±ļø  Total optimization time: ${finalMetric.duration}ms`);
    // 8. Generate Detailed Analysis
    const analysisResults = await analyzeOptimizationResults(
      evolutionResult,
      performanceTracker,
      paretoFrontier
    );
    console.log('\nš Detailed Analysis:');
    console.log(JSON.stringify(analysisResults, null, 2));
    // 9. Save Results for Production Use
    await saveOptimizedPrompt(bestPrompt, analysisResults);
  } catch (error) {
    console.error('ā Optimization failed:', error);
    throw error;
  } finally {
    // Cleanup
    await evolutionEngine.shutdown();
    performanceTracker.clearMetrics();
  }
}
// 5. Results Analysis Function
async function analyzeOptimizationResults(
  evolutionResult: any,
  performanceTracker: PerformanceTracker,
  paretoFrontier: ParetoFrontier
): Promise<any> {
  
  const convergenceMetrics = paretoFrontier.getConvergenceMetrics();
  const frontierStats = paretoFrontier.getStatistics();
  
  return {
    evolution: {
      totalGenerations: evolutionResult.generations,
      convergenceAchieved: evolutionResult.convergenceAchieved,
      finalScore: evolutionResult.bestPrompt.averageScore,
      improvementOverSeed: evolutionResult.bestPrompt.averageScore - 0.5 // Assume seed score
    },
    
    paretoAnalysis: {
      frontierSize: frontierStats.frontierSize,
      diversity: convergenceMetrics.diversity,
      hypervolume: convergenceMetrics.hypervolume,
      objectives: Object.fromEntries(frontierStats.objectives)
    },
    
    performance: {
      memoryUsage: performanceTracker.analyzeMemoryUsage(),
      executionStats: performanceTracker.calculateStatistics('evolution'),
      resourceMetrics: performanceTracker.getResourceUsageMetrics?.() || {}
    },
    
    recommendations: [
      evolutionResult.bestPrompt.averageScore > 0.8 
        ? 'Excellent optimization achieved - ready for production'
        : 'Consider additional generations or parameter tuning',
      
      convergenceMetrics.diversity < 0.1 
        ? 'Low diversity detected - increase mutation rate'
        : 'Good genetic diversity maintained',
        
      frontierStats.frontierSize > 30 
        ? 'Large frontier - consider increasing selection pressure'
        : 'Appropriate frontier size for objectives'
    ]
  };
}
// 6. Production Deployment Helper
async function saveOptimizedPrompt(
  bestPrompt: PromptCandidate, 
  analysis: any
): Promise<void> {
  
  const deploymentPackage = {
    optimizedPrompt: bestPrompt.content,
    metadata: {
      version: '1.0.0',
      optimizationDate: new Date().toISOString(),
      score: bestPrompt.averageScore,
      generation: bestPrompt.generation,
      rollouts: bestPrompt.rolloutCount
    },
    performance: analysis.performance,
    validation: {
      helpfulnessScore: bestPrompt.taskPerformance.get('helpfulness-evaluation'),
      empathyScore: bestPrompt.taskPerformance.get('empathy-score'),
      safetyScore: bestPrompt.taskPerformance.get('safety-score'),
      avgResponseTime: bestPrompt.taskPerformance.get('avg-response-time')
    },
    deploymentInstructions: [
      '1. Test in staging environment with real customer scenarios',
      '2. Monitor performance metrics for first 48 hours',
      '3. Collect user feedback and satisfaction scores',
      '4. Consider A/B testing against current prompt',
      '5. Plan regular re-optimization cycles (monthly recommended)'
    ]
  };
  // Save to file for production deployment
  const fs = await import('fs/promises');
  await fs.writeFile(
    './optimized-chat-prompt.json',
    JSON.stringify(deploymentPackage, null, 2)
  );
  
  console.log('š¾ Optimized prompt saved to optimized-chat-prompt.json');
}
// 7. Simple In-Memory Trajectory Store for Example
class InMemoryTrajectoryStore {
  private trajectories = new Map<string, ExecutionTrajectory>();
  async save(trajectory: ExecutionTrajectory): Promise<void> {
    this.trajectories.set(trajectory.id, trajectory);
  }
  async query(filter: any): Promise<ExecutionTrajectory[]> {
    const results = Array.from(this.trajectories.values());
    
    if (filter.promptId) {
      return results.filter(t => t.promptId === filter.promptId);
    }
    
    return results.slice(0, filter.limit || 100);
  }
  async load(id: string): Promise<ExecutionTrajectory | null> {
    return this.trajectories.get(id) || null;
  }
}
// 8. Example Usage
if (require.main === module) {
  optimizeChatPrompts()
    .then(() => {
      console.log('ā
 Chat optimization completed successfully!');
      process.exit(0);
    })
    .catch((error) => {
      console.error('ā Chat optimization failed:', error);
      process.exit(1);
    });
}
export {
  ChatEvaluationAdapter,
  ChatTaskContextGenerator,
  InMemoryTrajectoryStore,
  optimizeChatPrompts
};