code-generation-example.tsโข24.4 kB
/**
* Complete Example: Code Generation Prompt Optimization
*
* This example demonstrates optimizing prompts for code generation tasks,
* focusing on code quality, correctness, efficiency, and maintainability.
*/
import {
EvolutionEngine,
ParetoFrontier,
ReflectionEngine,
PromptMutator,
PerformanceTracker,
LLMAdapter
} from '../src/index';
import {
PromptCandidate,
TaskContext,
EvolutionConfig,
ParetoObjective,
ExecutionTrajectory
} from '../src/types/gepa';
// 1. Code Quality Evaluation Objectives
const codeGenerationObjectives: ParetoObjective[] = [
{
name: 'correctness',
weight: 0.35,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('correctness-score') || 0;
}
},
{
name: 'code_quality',
weight: 0.25,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('quality-score') || 0;
}
},
{
name: 'efficiency',
weight: 0.20,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('efficiency-score') || 0;
}
},
{
name: 'maintainability',
weight: 0.15,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('maintainability-score') || 0;
}
},
{
name: 'generation_time',
weight: 0.05,
direction: 'minimize',
extractor: (candidate: PromptCandidate) => {
const time = candidate.taskPerformance.get('generation-time') || 5000;
return time / 1000; // Normalize to seconds
}
}
];
// 2. Code Generation Test Scenarios
interface CodeTestScenario {
id: string;
description: string;
language: string;
difficulty: 'easy' | 'medium' | 'hard';
requirements: string[];
testCases: Array<{
input: any;
expectedOutput: any;
description: string;
}>;
qualityCriteria: string[];
}
const codeTestScenarios: CodeTestScenario[] = [
{
id: 'fibonacci-recursive',
description: 'Generate a recursive fibonacci function',
language: 'typescript',
difficulty: 'easy',
requirements: [
'Function should be recursive',
'Handle edge cases (n <= 1)',
'Include proper type annotations',
'Add JSDoc documentation'
],
testCases: [
{ input: 0, expectedOutput: 0, description: 'Base case: fib(0)' },
{ input: 1, expectedOutput: 1, description: 'Base case: fib(1)' },
{ input: 5, expectedOutput: 5, description: 'Normal case: fib(5)' },
{ input: 10, expectedOutput: 55, description: 'Larger case: fib(10)' }
],
qualityCriteria: [
'Correct recursive implementation',
'Proper error handling',
'Clear variable names',
'Comprehensive documentation'
]
},
{
id: 'binary-search',
description: 'Implement binary search algorithm',
language: 'typescript',
difficulty: 'medium',
requirements: [
'Efficient O(log n) implementation',
'Handle sorted arrays',
'Return correct index or -1',
'Include comprehensive tests'
],
testCases: [
{
input: { arr: [1, 3, 5, 7, 9], target: 5 },
expectedOutput: 2,
description: 'Find element in middle'
},
{
input: { arr: [1, 3, 5, 7, 9], target: 1 },
expectedOutput: 0,
description: 'Find first element'
},
{
input: { arr: [1, 3, 5, 7, 9], target: 9 },
expectedOutput: 4,
description: 'Find last element'
},
{
input: { arr: [1, 3, 5, 7, 9], target: 4 },
expectedOutput: -1,
description: 'Element not found'
}
],
qualityCriteria: [
'Correct algorithm implementation',
'Optimal time complexity',
'Edge case handling',
'Clean, readable code'
]
},
{
id: 'async-api-client',
description: 'Create an async API client with error handling',
language: 'typescript',
difficulty: 'hard',
requirements: [
'Generic request/response handling',
'Retry logic with exponential backoff',
'Comprehensive error handling',
'Type-safe implementation',
'Configurable timeout'
],
testCases: [
{
input: { url: '/api/users', method: 'GET' },
expectedOutput: { success: true, data: [] },
description: 'Successful GET request'
},
{
input: { url: '/api/invalid', method: 'GET' },
expectedOutput: { success: false, error: 'Not Found' },
description: 'Handle 404 error'
}
],
qualityCriteria: [
'Robust error handling',
'Proper async/await usage',
'Type safety',
'Configurable behavior',
'Production-ready code'
]
}
];
// 3. Code Evaluation LLM Adapter
class CodeGenerationAdapter extends LLMAdapter {
private scenarios = codeTestScenarios;
async evaluatePrompt(prompt: string, context: TaskContext): Promise<any> {
const results = [];
for (const scenario of this.scenarios) {
const startTime = Date.now();
try {
// Generate code using the candidate prompt
const codeResult = await this.generateCode(prompt, scenario);
const generationTime = Date.now() - startTime;
// Evaluate the generated code
const evaluation = await this.evaluateGeneratedCode(codeResult.code, scenario);
results.push({
scenarioId: scenario.id,
generatedCode: codeResult.code,
generationTime,
evaluation,
success: evaluation.overall > 0.7
});
} catch (error) {
results.push({
scenarioId: scenario.id,
generatedCode: '',
generationTime: Date.now() - startTime,
evaluation: this.getFailureEvaluation(),
success: false,
error: error.message
});
}
}
// Aggregate scores
const avgCorrectness = this.calculateAverage(results, 'correctness');
const avgQuality = this.calculateAverage(results, 'quality');
const avgEfficiency = this.calculateAverage(results, 'efficiency');
const avgMaintainability = this.calculateAverage(results, 'maintainability');
const avgGenerationTime = results.reduce((sum, r) => sum + r.generationTime, 0) / results.length;
const overallScore = (avgCorrectness + avgQuality + avgEfficiency + avgMaintainability) / 4;
return {
score: overallScore,
trajectory: this.createTrajectory(context, results, {
'correctness-score': avgCorrectness,
'quality-score': avgQuality,
'efficiency-score': avgEfficiency,
'maintainability-score': avgMaintainability,
'generation-time': avgGenerationTime
})
};
}
private async generateCode(prompt: string, scenario: CodeTestScenario): Promise<{ code: string }> {
const codePrompt = `
${prompt}
Task: ${scenario.description}
Language: ${scenario.language}
Difficulty: ${scenario.difficulty}
Requirements:
${scenario.requirements.map(req => `- ${req}`).join('\n')}
Test Cases:
${scenario.testCases.map(tc => `- ${tc.description}: ${JSON.stringify(tc.input)} โ ${JSON.stringify(tc.expectedOutput)}`).join('\n')}
Please provide a complete, production-ready implementation that satisfies all requirements and passes all test cases.
Return only the code, properly formatted and commented.
`;
const response = await this.callLLM(codePrompt);
return { code: response.content };
}
private async evaluateGeneratedCode(
code: string,
scenario: CodeTestScenario
): Promise<any> {
// Syntax and structure analysis
const syntaxScore = await this.evaluateSyntax(code, scenario.language);
// Functional correctness (simulated)
const correctnessScore = await this.evaluateCorrectness(code, scenario);
// Code quality assessment
const qualityScore = await this.evaluateQuality(code, scenario.qualityCriteria);
// Performance and efficiency
const efficiencyScore = await this.evaluateEfficiency(code, scenario.difficulty);
// Maintainability assessment
const maintainabilityScore = await this.evaluateMaintainability(code);
return {
syntax: syntaxScore,
correctness: correctnessScore,
quality: qualityScore,
efficiency: efficiencyScore,
maintainability: maintainabilityScore,
overall: (correctnessScore * 0.4 + qualityScore * 0.3 + efficiencyScore * 0.2 + maintainabilityScore * 0.1)
};
}
private async evaluateSyntax(code: string, language: string): Promise<number> {
// Simplified syntax evaluation
const basicChecks = [
code.includes('function') || code.includes('=>') || code.includes('class'),
code.includes('{') && code.includes('}'),
!code.includes('undefined') || code.includes('null'),
code.length > 50 && code.length < 5000
];
return basicChecks.filter(Boolean).length / basicChecks.length;
}
private async evaluateCorrectness(
code: string,
scenario: CodeTestScenario
): Promise<number> {
// Simulate test execution by checking for key patterns
let score = 0.5; // Base score
// Check for required patterns based on scenario
switch (scenario.id) {
case 'fibonacci-recursive':
if (code.includes('fibonacci') && code.includes('return') &&
(code.includes('fibonacci(n-1)') || code.includes('fib(n-1)'))) {
score += 0.3;
}
if (code.includes('n <= 1') || code.includes('n < 2')) {
score += 0.2;
}
break;
case 'binary-search':
if (code.includes('while') || code.includes('recursiv')) {
score += 0.2;
}
if (code.includes('mid') && code.includes('left') && code.includes('right')) {
score += 0.3;
}
break;
case 'async-api-client':
if (code.includes('async') && code.includes('await')) {
score += 0.2;
}
if (code.includes('try') && code.includes('catch')) {
score += 0.2;
}
if (code.includes('retry') || code.includes('timeout')) {
score += 0.1;
}
break;
}
return Math.min(score, 1.0);
}
private async evaluateQuality(
code: string,
criteria: string[]
): Promise<number> {
let qualityScore = 0;
const totalCriteria = criteria.length;
// Check documentation
if (code.includes('/**') || code.includes('//')) {
qualityScore += 1;
}
// Check for meaningful variable names (not single letters except loop counters)
const hasGoodNaming = !/\b[a-z]\b/.test(code.replace(/\bfor\s*\([^)]*[ij]\b/g, ''));
if (hasGoodNaming) {
qualityScore += 1;
}
// Check for proper structure
if (code.includes('\n') && code.split('\n').length > 5) {
qualityScore += 1;
}
// Check for type annotations (TypeScript)
if (code.includes(':') && (code.includes('number') || code.includes('string') || code.includes('boolean'))) {
qualityScore += 1;
}
return Math.min(qualityScore / Math.max(totalCriteria, 4), 1.0);
}
private async evaluateEfficiency(code: string, difficulty: string): Promise<number> {
let efficiencyScore = 0.7; // Base efficiency score
// Penalize obvious inefficiencies
if (code.includes('while(true)') && !code.includes('break')) {
efficiencyScore -= 0.3;
}
// Reward efficient patterns
if (code.includes('O(log n)') || (code.includes('binary') && code.includes('search'))) {
efficiencyScore += 0.2;
}
// Adjust based on difficulty
switch (difficulty) {
case 'easy':
return Math.max(efficiencyScore, 0.6);
case 'medium':
return efficiencyScore;
case 'hard':
return Math.max(efficiencyScore - 0.1, 0.4);
}
return efficiencyScore;
}
private async evaluateMaintainability(code: string): Promise<number> {
let maintainabilityScore = 0;
// Check for modularity
if (code.includes('function') && code.split('function').length > 1) {
maintainabilityScore += 0.25;
}
// Check for comments/documentation
if (code.includes('//') || code.includes('/**')) {
maintainabilityScore += 0.25;
}
// Check for consistent formatting
const lines = code.split('\n');
const hasConsistentIndentation = lines.every(line =>
line.trim() === '' || line.startsWith(' ') || line.startsWith('\t') || !line.startsWith(' ')
);
if (hasConsistentIndentation) {
maintainabilityScore += 0.25;
}
// Check for reasonable function length
const avgFunctionLength = this.calculateAverageFunctionLength(code);
if (avgFunctionLength > 0 && avgFunctionLength < 50) {
maintainabilityScore += 0.25;
}
return maintainabilityScore;
}
private calculateAverageFunctionLength(code: string): number {
const functions = code.split(/function|=>|\basync\s+function/);
if (functions.length <= 1) return 0;
const totalLines = functions.slice(1).reduce((sum, func) => {
return sum + func.split('\n').length;
}, 0);
return totalLines / (functions.length - 1);
}
private calculateAverage(results: any[], metric: string): number {
const values = results
.filter(r => r.evaluation && r.evaluation[metric] !== undefined)
.map(r => r.evaluation[metric]);
return values.length > 0 ? values.reduce((sum, val) => sum + val, 0) / values.length : 0;
}
private getFailureEvaluation() {
return {
syntax: 0,
correctness: 0,
quality: 0,
efficiency: 0,
maintainability: 0,
overall: 0
};
}
private createTrajectory(context: TaskContext, results: any[], metrics: any): ExecutionTrajectory {
return {
id: `code-eval-${Date.now()}`,
promptId: 'candidate-prompt',
taskId: context.description,
startTime: Date.now() - 10000,
endTime: Date.now(),
steps: results.map((result, index) => ({
action: `generate-code-${result.scenarioId}`,
input: result.scenarioId,
output: result.generatedCode,
timestamp: Date.now() - (results.length - index) * 1000,
duration: result.generationTime,
success: result.success,
error: result.error
})),
finalResult: {
success: results.every(r => r.success),
score: metrics['correctness-score'],
output: results,
metrics
}
};
}
}
// 4. Code Generation Optimization Function
export async function optimizeCodeGenerationPrompts(): Promise<void> {
console.log('๐ Starting GEPA Code Generation Optimization');
const performanceTracker = new PerformanceTracker({
enableRealTimeMonitoring: true,
memoryTrackingEnabled: true,
tokenUsageTrackingEnabled: true
});
const optimizationMetricId = performanceTracker.startMetricCollection(
'code-generation-optimization',
{ category: 'evolution', tags: { type: 'code-generation' } }
);
try {
// 1. Setup Components
const llmAdapter = new CodeGenerationAdapter();
const paretoFrontier = new ParetoFrontier({
objectives: codeGenerationObjectives,
maxSize: 40,
samplingStrategy: { name: 'epsilon-greedy', parameters: { epsilon: 0.15 } }
});
const reflectionEngine = new ReflectionEngine({
llmAdapter,
trajectoryStore: new InMemoryTrajectoryStore(),
config: {
confidenceThreshold: 0.75,
batchSize: 3
}
});
const promptMutator = new PromptMutator({
maxMutationsPerGeneration: 6,
mutationRate: 0.25,
crossoverRate: 0.7,
enableCaching: true
});
// 2. Configure Evolution for Code Generation
const evolutionConfig: EvolutionConfig = {
taskDescription: 'Optimize code generation prompts for correctness, quality, and maintainability',
seedPrompt: `You are an expert software engineer and coding assistant.
When generating code, follow these principles:
- Write clean, readable, and well-documented code
- Include proper error handling and edge cases
- Use meaningful variable and function names
- Follow language-specific best practices
- Optimize for both correctness and performance
- Include comprehensive comments and documentation`,
maxGenerations: 15,
populationSize: 20,
mutationRate: 0.3
};
// 3. Initialize Evolution Engine
const evolutionEngine = new EvolutionEngine({
llmAdapter,
paretoFrontier,
reflectionEngine,
promptMutator,
trajectoryStore: new InMemoryTrajectoryStore(),
config: evolutionConfig
});
// 4. Monitor Evolution Progress
performanceTracker.subscribeToMetrics((metric) => {
if (metric.category === 'evolution') {
console.log(`๐ ${metric.name}: ${metric.duration}ms`);
}
});
// 5. Run Evolution
console.log('๐งฌ Starting code generation evolution...');
const evolutionResult = await evolutionEngine.startEvolution({
taskDescription: evolutionConfig.taskDescription,
seedPrompt: evolutionConfig.seedPrompt,
config: evolutionConfig
});
// 6. Analyze Results
console.log('\n๐ Code Generation Optimization Results:');
console.log(`- Generations: ${evolutionResult.generations}`);
console.log(`- Convergence: ${evolutionResult.convergenceAchieved}`);
console.log(`- Total Evaluations: ${evolutionResult.totalRollouts}`);
const bestPrompt = evolutionResult.bestPrompt;
console.log(`\n๐ Best Code Generation Prompt (Score: ${bestPrompt.averageScore.toFixed(3)}):`);
console.log(`"${bestPrompt.content}"`);
// 7. Detailed Analysis
const codeAnalysis = await analyzeCodeOptimizationResults(
evolutionResult,
performanceTracker,
paretoFrontier
);
console.log('\n๐ Code Generation Analysis:');
console.log(JSON.stringify(codeAnalysis, null, 2));
// 8. Test the Optimized Prompt
await testOptimizedCodePrompt(bestPrompt, llmAdapter);
// 9. Save Results
await saveOptimizedCodePrompt(bestPrompt, codeAnalysis);
const finalMetric = performanceTracker.endMetricCollection(optimizationMetricId);
console.log(`\nโฑ๏ธ Total optimization time: ${finalMetric.duration}ms`);
} catch (error) {
console.error('โ Code generation optimization failed:', error);
throw error;
} finally {
await evolutionEngine.shutdown();
}
}
// 5. Results Analysis
async function analyzeCodeOptimizationResults(
evolutionResult: any,
performanceTracker: PerformanceTracker,
paretoFrontier: ParetoFrontier
): Promise<any> {
const bestPrompt = evolutionResult.bestPrompt;
const frontierStats = paretoFrontier.getStatistics();
return {
optimization: {
finalScore: bestPrompt.averageScore,
generations: evolutionResult.generations,
convergenceAchieved: evolutionResult.convergenceAchieved
},
codeQualityMetrics: {
correctness: bestPrompt.taskPerformance.get('correctness-score'),
quality: bestPrompt.taskPerformance.get('quality-score'),
efficiency: bestPrompt.taskPerformance.get('efficiency-score'),
maintainability: bestPrompt.taskPerformance.get('maintainability-score'),
avgGenerationTime: bestPrompt.taskPerformance.get('generation-time')
},
paretoAnalysis: {
frontierSize: frontierStats.frontierSize,
objectiveStats: Object.fromEntries(frontierStats.objectives)
},
performance: performanceTracker.analyzeMemoryUsage(),
recommendations: generateCodeOptimizationRecommendations(bestPrompt, frontierStats)
};
}
// 6. Test the Optimized Prompt
async function testOptimizedCodePrompt(
optimizedPrompt: PromptCandidate,
adapter: CodeGenerationAdapter
): Promise<void> {
console.log('\n๐งช Testing optimized code generation prompt...');
const testContext: TaskContext = {
description: 'Generate a simple sorting algorithm',
category: 'algorithm-implementation',
difficulty: 'medium',
requiredCapabilities: ['algorithms', 'optimization', 'testing'],
expectedDuration: 60
};
try {
const testResult = await adapter.evaluatePrompt(optimizedPrompt.content, testContext);
console.log('โ
Test Results:');
console.log(`- Overall Score: ${testResult.score.toFixed(3)}`);
console.log(`- Generation successful: ${testResult.trajectory.finalResult.success}`);
console.log(`- Code quality metrics:`, testResult.trajectory.finalResult.metrics);
} catch (error) {
console.error('โ Test failed:', error.message);
}
}
// 7. Save Optimized Prompt
async function saveOptimizedCodePrompt(
bestPrompt: PromptCandidate,
analysis: any
): Promise<void> {
const deployment = {
optimizedPrompt: bestPrompt.content,
metadata: {
version: '1.0.0',
type: 'code-generation',
optimizationDate: new Date().toISOString(),
performance: analysis.codeQualityMetrics
},
usageGuidelines: [
'Use for general programming tasks across multiple languages',
'Effective for algorithms, data structures, and utility functions',
'Include specific requirements in task description',
'Monitor generated code quality in production'
],
integrationExample: `
// Example usage in a code generation system
const codeGenerator = new CodeGenerator({
prompt: "${bestPrompt.content}",
temperature: 0.7,
maxTokens: 2000
});
const result = await codeGenerator.generate({
task: "Implement a binary search tree",
language: "typescript",
requirements: ["Generic types", "Full CRUD operations"]
});
`
};
const fs = await import('fs/promises');
await fs.writeFile(
'./optimized-code-generation-prompt.json',
JSON.stringify(deployment, null, 2)
);
console.log('๐พ Optimized code generation prompt saved');
}
function generateCodeOptimizationRecommendations(
bestPrompt: PromptCandidate,
frontierStats: any
): string[] {
const recommendations = [];
const correctness = bestPrompt.taskPerformance.get('correctness-score') || 0;
const quality = bestPrompt.taskPerformance.get('quality-score') || 0;
const efficiency = bestPrompt.taskPerformance.get('efficiency-score') || 0;
if (correctness > 0.9) {
recommendations.push('Excellent correctness achieved - suitable for production use');
} else if (correctness > 0.7) {
recommendations.push('Good correctness - consider additional testing scenarios');
} else {
recommendations.push('Low correctness - needs more optimization or revised approach');
}
if (quality > 0.8) {
recommendations.push('High code quality standards met');
} else {
recommendations.push('Code quality could be improved - focus on documentation and structure');
}
if (efficiency > 0.7) {
recommendations.push('Efficient code generation - good algorithmic choices');
} else {
recommendations.push('Consider optimizing for better algorithmic efficiency');
}
return recommendations;
}
// 8. Simple Trajectory Store Implementation
class InMemoryTrajectoryStore {
private trajectories = new Map<string, ExecutionTrajectory>();
async save(trajectory: ExecutionTrajectory): Promise<void> {
this.trajectories.set(trajectory.id, trajectory);
}
async query(filter: any): Promise<ExecutionTrajectory[]> {
const results = Array.from(this.trajectories.values());
return results.slice(0, filter.limit || 100);
}
async load(id: string): Promise<ExecutionTrajectory | null> {
return this.trajectories.get(id) || null;
}
}
// Example usage
if (require.main === module) {
optimizeCodeGenerationPrompts()
.then(() => {
console.log('โ
Code generation optimization completed!');
process.exit(0);
})
.catch((error) => {
console.error('โ Optimization failed:', error);
process.exit(1);
});
}
export {
CodeGenerationAdapter,
codeTestScenarios,
optimizeCodeGenerationPrompts
};