chat-optimization-example.tsā¢14.4 kB
/**
* Complete Example: Chat Application Optimization
*
* This example demonstrates how to use GEPA to optimize prompts for a customer service
* chat application, improving response quality, empathy, and problem-solving effectiveness.
*/
import {
EvolutionEngine,
ParetoFrontier,
ReflectionEngine,
PromptMutator,
PerformanceTracker,
LLMAdapter
} from '../src/index';
import {
PromptCandidate,
TaskContext,
EvolutionConfig,
ParetoObjective,
ExecutionTrajectory
} from '../src/types/gepa';
// 1. Define Custom Objectives for Chat Application
const chatObjectives: ParetoObjective[] = [
{
name: 'helpfulness',
weight: 0.4,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
// Extract helpfulness score from task performance
return candidate.taskPerformance.get('helpfulness-evaluation') || 0;
}
},
{
name: 'empathy',
weight: 0.3,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
// Extract empathy score from evaluation metrics
return candidate.taskPerformance.get('empathy-score') || 0;
}
},
{
name: 'response_time',
weight: 0.2,
direction: 'minimize',
extractor: (candidate: PromptCandidate) => {
// Extract average response generation time
const responseTime = candidate.taskPerformance.get('avg-response-time') || 1000;
return responseTime / 1000; // Normalize to seconds
}
},
{
name: 'safety',
weight: 0.1,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
// Extract safety compliance score
return candidate.taskPerformance.get('safety-score') || 0.5;
}
}
];
// 2. Custom LLM Adapter for Chat Evaluation
class ChatEvaluationAdapter extends LLMAdapter {
async evaluatePrompt(prompt: string, context: TaskContext): Promise<any> {
// Simulate customer service scenarios
const testScenarios = [
{
scenario: "Customer complaining about delayed delivery",
expectedBehavior: "empathetic, solution-focused, proactive"
},
{
scenario: "Technical support request for complex issue",
expectedBehavior: "patient, detailed, educational"
},
{
scenario: "Billing dispute requiring careful handling",
expectedBehavior: "understanding, thorough, precise"
}
];
const results = [];
for (const test of testScenarios) {
const startTime = Date.now();
// Generate response using the candidate prompt
const response = await this.callLLM(`
${prompt}
Customer Message: ${test.scenario}
Please provide a helpful customer service response.
`);
const responseTime = Date.now() - startTime;
// Evaluate response quality
const evaluation = await this.evaluateResponse(response.content, test);
results.push({
scenario: test.scenario,
response: response.content,
responseTime,
scores: evaluation
});
}
// Aggregate scores
const avgHelpfulness = results.reduce((sum, r) => sum + r.scores.helpfulness, 0) / results.length;
const avgEmpathy = results.reduce((sum, r) => sum + r.scores.empathy, 0) / results.length;
const avgResponseTime = results.reduce((sum, r) => sum + r.responseTime, 0) / results.length;
const avgSafety = results.reduce((sum, r) => sum + r.scores.safety, 0) / results.length;
return {
score: (avgHelpfulness + avgEmpathy + avgSafety) / 3, // Overall fitness
trajectory: {
id: `eval-${Date.now()}`,
promptId: 'candidate-prompt',
taskId: context.description,
startTime: Date.now(),
endTime: Date.now(),
steps: results.map(r => ({
action: `evaluate-${r.scenario}`,
input: r.scenario,
output: r.response,
timestamp: Date.now(),
duration: r.responseTime
})),
finalResult: {
success: avgHelpfulness > 0.7,
score: avgHelpfulness,
output: results,
metrics: {
'helpfulness-evaluation': avgHelpfulness,
'empathy-score': avgEmpathy,
'avg-response-time': avgResponseTime,
'safety-score': avgSafety
}
}
}
};
}
private async evaluateResponse(response: string, testCase: any): Promise<any> {
// LLM-based evaluation of response quality
const evaluationPrompt = `
Evaluate this customer service response on a scale of 0-1 for each criterion:
Customer Scenario: ${testCase.scenario}
Expected Behavior: ${testCase.expectedBehavior}
Agent Response: ${response}
Rate the response for:
1. Helpfulness (0-1): How well does it address the customer's need?
2. Empathy (0-1): How empathetic and understanding is the tone?
3. Safety (0-1): How safe and appropriate is the response?
Respond in JSON format:
{
"helpfulness": 0.8,
"empathy": 0.9,
"safety": 1.0,
"reasoning": "Brief explanation of scores"
}
`;
try {
const evaluation = await this.callLLM(evaluationPrompt);
return JSON.parse(evaluation.content);
} catch (error) {
// Fallback scoring if evaluation fails
return {
helpfulness: 0.5,
empathy: 0.5,
safety: 0.8,
reasoning: "Evaluation failed, using default scores"
};
}
}
}
// 3. Chat-Specific Task Context Generator
class ChatTaskContextGenerator {
static generateContexts(): TaskContext[] {
return [
{
description: 'Handle customer complaints with empathy',
category: 'customer-service',
difficulty: 'medium',
requiredCapabilities: ['empathy', 'problem-solving', 'communication'],
expectedDuration: 30
},
{
description: 'Provide technical support for complex issues',
category: 'technical-support',
difficulty: 'hard',
requiredCapabilities: ['technical-knowledge', 'patience', 'education'],
expectedDuration: 60
},
{
description: 'Process billing inquiries accurately',
category: 'billing-support',
difficulty: 'medium',
requiredCapabilities: ['attention-to-detail', 'math', 'policy-knowledge'],
expectedDuration: 45
}
];
}
}
// 4. Main Chat Optimization Function
export async function optimizeChatPrompts(): Promise<void> {
console.log('š Starting GEPA Chat Application Optimization');
// Initialize performance tracking
const performanceTracker = new PerformanceTracker({
enableRealTimeMonitoring: true,
memoryTrackingEnabled: true,
tokenUsageTrackingEnabled: true
});
// Track the complete optimization process
const optimizationMetricId = performanceTracker.startMetricCollection(
'chat-optimization',
{ category: 'evolution', tags: { type: 'chat-application' } }
);
try {
// 1. Setup Components
const llmAdapter = new ChatEvaluationAdapter();
const paretoFrontier = new ParetoFrontier({
objectives: chatObjectives,
maxSize: 50,
samplingStrategy: { name: 'ucb', parameters: { confidence: 1.96 } }
});
const reflectionEngine = new ReflectionEngine({
llmAdapter,
trajectoryStore: new InMemoryTrajectoryStore(),
config: {
confidenceThreshold: 0.7,
batchSize: 5
}
});
const promptMutator = new PromptMutator({
maxMutationsPerGeneration: 8,
mutationRate: 0.3,
crossoverRate: 0.6,
enableCaching: true
});
// 2. Configure Evolution
const evolutionConfig: EvolutionConfig = {
taskDescription: 'Optimize customer service chat responses for helpfulness, empathy, and efficiency',
seedPrompt: `You are a professional customer service representative.
Be helpful, empathetic, and solution-focused in all interactions.
Always maintain a friendly and professional tone.`,
maxGenerations: 12,
populationSize: 25,
mutationRate: 0.35
};
// 3. Initialize Evolution Engine
const evolutionEngine = new EvolutionEngine({
llmAdapter,
paretoFrontier,
reflectionEngine,
promptMutator,
trajectoryStore: new InMemoryTrajectoryStore(),
config: evolutionConfig
});
// 4. Setup Real-time Monitoring
performanceTracker.subscribeToMetrics((metric) => {
if (metric.category === 'evolution') {
console.log(`š ${metric.name}: ${metric.duration}ms`);
}
});
// 5. Run Evolution Process
console.log('𧬠Starting evolution process...');
const evolutionResult = await evolutionEngine.startEvolution({
taskDescription: evolutionConfig.taskDescription,
seedPrompt: evolutionConfig.seedPrompt,
config: evolutionConfig
});
// 6. Analyze Results
console.log('\nš Evolution Results:');
console.log(`- Generations: ${evolutionResult.generations}`);
console.log(`- Convergence: ${evolutionResult.convergenceAchieved}`);
console.log(`- Total Rollouts: ${evolutionResult.totalRollouts}`);
const bestPrompt = evolutionResult.bestPrompt;
console.log(`\nš Best Prompt (Score: ${bestPrompt.averageScore.toFixed(3)}):`);
console.log(`"${bestPrompt.content}"`);
// 7. Performance Analysis
const finalMetric = performanceTracker.endMetricCollection(optimizationMetricId);
console.log(`\nā±ļø Total optimization time: ${finalMetric.duration}ms`);
// 8. Generate Detailed Analysis
const analysisResults = await analyzeOptimizationResults(
evolutionResult,
performanceTracker,
paretoFrontier
);
console.log('\nš Detailed Analysis:');
console.log(JSON.stringify(analysisResults, null, 2));
// 9. Save Results for Production Use
await saveOptimizedPrompt(bestPrompt, analysisResults);
} catch (error) {
console.error('ā Optimization failed:', error);
throw error;
} finally {
// Cleanup
await evolutionEngine.shutdown();
performanceTracker.clearMetrics();
}
}
// 5. Results Analysis Function
async function analyzeOptimizationResults(
evolutionResult: any,
performanceTracker: PerformanceTracker,
paretoFrontier: ParetoFrontier
): Promise<any> {
const convergenceMetrics = paretoFrontier.getConvergenceMetrics();
const frontierStats = paretoFrontier.getStatistics();
return {
evolution: {
totalGenerations: evolutionResult.generations,
convergenceAchieved: evolutionResult.convergenceAchieved,
finalScore: evolutionResult.bestPrompt.averageScore,
improvementOverSeed: evolutionResult.bestPrompt.averageScore - 0.5 // Assume seed score
},
paretoAnalysis: {
frontierSize: frontierStats.frontierSize,
diversity: convergenceMetrics.diversity,
hypervolume: convergenceMetrics.hypervolume,
objectives: Object.fromEntries(frontierStats.objectives)
},
performance: {
memoryUsage: performanceTracker.analyzeMemoryUsage(),
executionStats: performanceTracker.calculateStatistics('evolution'),
resourceMetrics: performanceTracker.getResourceUsageMetrics?.() || {}
},
recommendations: [
evolutionResult.bestPrompt.averageScore > 0.8
? 'Excellent optimization achieved - ready for production'
: 'Consider additional generations or parameter tuning',
convergenceMetrics.diversity < 0.1
? 'Low diversity detected - increase mutation rate'
: 'Good genetic diversity maintained',
frontierStats.frontierSize > 30
? 'Large frontier - consider increasing selection pressure'
: 'Appropriate frontier size for objectives'
]
};
}
// 6. Production Deployment Helper
async function saveOptimizedPrompt(
bestPrompt: PromptCandidate,
analysis: any
): Promise<void> {
const deploymentPackage = {
optimizedPrompt: bestPrompt.content,
metadata: {
version: '1.0.0',
optimizationDate: new Date().toISOString(),
score: bestPrompt.averageScore,
generation: bestPrompt.generation,
rollouts: bestPrompt.rolloutCount
},
performance: analysis.performance,
validation: {
helpfulnessScore: bestPrompt.taskPerformance.get('helpfulness-evaluation'),
empathyScore: bestPrompt.taskPerformance.get('empathy-score'),
safetyScore: bestPrompt.taskPerformance.get('safety-score'),
avgResponseTime: bestPrompt.taskPerformance.get('avg-response-time')
},
deploymentInstructions: [
'1. Test in staging environment with real customer scenarios',
'2. Monitor performance metrics for first 48 hours',
'3. Collect user feedback and satisfaction scores',
'4. Consider A/B testing against current prompt',
'5. Plan regular re-optimization cycles (monthly recommended)'
]
};
// Save to file for production deployment
const fs = await import('fs/promises');
await fs.writeFile(
'./optimized-chat-prompt.json',
JSON.stringify(deploymentPackage, null, 2)
);
console.log('š¾ Optimized prompt saved to optimized-chat-prompt.json');
}
// 7. Simple In-Memory Trajectory Store for Example
class InMemoryTrajectoryStore {
private trajectories = new Map<string, ExecutionTrajectory>();
async save(trajectory: ExecutionTrajectory): Promise<void> {
this.trajectories.set(trajectory.id, trajectory);
}
async query(filter: any): Promise<ExecutionTrajectory[]> {
const results = Array.from(this.trajectories.values());
if (filter.promptId) {
return results.filter(t => t.promptId === filter.promptId);
}
return results.slice(0, filter.limit || 100);
}
async load(id: string): Promise<ExecutionTrajectory | null> {
return this.trajectories.get(id) || null;
}
}
// 8. Example Usage
if (require.main === module) {
optimizeChatPrompts()
.then(() => {
console.log('ā
Chat optimization completed successfully!');
process.exit(0);
})
.catch((error) => {
console.error('ā Chat optimization failed:', error);
process.exit(1);
});
}
export {
ChatEvaluationAdapter,
ChatTaskContextGenerator,
InMemoryTrajectoryStore,
optimizeChatPrompts
};