/**
* Prompt optimization engine using APE/OPRO patterns
*
* This is PRODUCTION code. All evaluation is done via actual LLM calls.
*/
import OpenAI from 'openai';
import type { OptimizePromptResult, PromptRecord } from './types.js';
interface OptimizationConfig {
maxIterations: number;
targetScore: number;
convergenceThreshold: number;
convergenceWindow: number;
}
const DEFAULT_CONFIG: OptimizationConfig = {
maxIterations: 10,
targetScore: 0.95,
convergenceThreshold: 0.02,
convergenceWindow: 3
};
// Evaluation criteria with weights
const EVALUATION_CRITERIA = {
clarity: { weight: 0.25, description: 'How clear and unambiguous is the instruction?' },
specificity: { weight: 0.25, description: 'Does it provide specific guidance without being overly restrictive?' },
completeness: { weight: 0.20, description: 'Does it cover all necessary aspects of the task?' },
structure: { weight: 0.15, description: 'Is it well-organized with appropriate formatting?' },
effectiveness: { weight: 0.15, description: 'How likely is it to produce the desired output?' }
};
// Pattern-based improvements that don't require LLM calls
const IMPROVEMENT_PATTERNS = [
{
name: 'add_structure',
check: (p: string) => !p.includes('1.') && !p.includes('step') && !p.includes('first'),
apply: (p: string) => `${p}\n\nProvide your response in a structured format with clear sections.`,
expectedImprovement: 0.15
},
{
name: 'add_chain_of_thought',
check: (p: string) => !p.toLowerCase().includes('step by step') && !p.toLowerCase().includes('think through'),
apply: (p: string) => `${p}\n\nThink through this step by step, showing your reasoning.`,
expectedImprovement: 0.20
},
{
name: 'add_constraints',
check: (p: string) => p.length < 150 && !p.includes('Requirements'),
apply: (p: string) => `${p}\n\nRequirements:\n- Be specific and precise\n- Support claims with evidence\n- Stay focused on the core question`,
expectedImprovement: 0.10
},
{
name: 'add_output_format',
check: (p: string) => !p.toLowerCase().includes('format') && !p.toLowerCase().includes('respond with'),
apply: (p: string) => `${p}\n\nFormat your response clearly with headers where appropriate.`,
expectedImprovement: 0.08
},
{
name: 'add_context_request',
check: (p: string) => !p.toLowerCase().includes('context') && !p.toLowerCase().includes('background'),
apply: (p: string) => `${p}\n\nConsider relevant context and background information.`,
expectedImprovement: 0.05
}
];
export class PromptOptimizer {
private openai: OpenAI;
private config: OptimizationConfig;
private history: Array<{ prompt: string; score: number }> = [];
constructor(openai: OpenAI, config: Partial<OptimizationConfig> = {}) {
this.openai = openai;
this.config = { ...DEFAULT_CONFIG, ...config };
}
/**
* Optimize a prompt using pattern-based and LLM-based approaches
*/
async optimize(
originalPrompt: string,
similarPrompts: PromptRecord[] = [],
domain: string = 'general'
): Promise<OptimizePromptResult> {
const improvements: string[] = [];
let currentPrompt = originalPrompt;
let iterations = 0;
const scores: number[] = [];
// Score the original prompt first
const originalScore = await this.scorePrompt(originalPrompt, domain);
improvements.push(`Original score: ${(originalScore * 100).toFixed(1)}%`);
// Phase 1: Apply pattern-based improvements (fast, no API calls)
const { improved: patternImproved, applied } = this.applyPatterns(currentPrompt);
if (applied.length > 0) {
currentPrompt = patternImproved;
improvements.push(...applied.map(p => `Applied pattern: ${p}`));
}
// Phase 2: If we have similar high-performing prompts, learn from them
if (similarPrompts.length > 0) {
const { improved: ragImproved, insights } = await this.learnFromSimilar(
currentPrompt,
similarPrompts
);
if (ragImproved !== currentPrompt) {
currentPrompt = ragImproved;
improvements.push(`RAG improvement: ${insights}`);
}
}
// Score after pattern/RAG improvements
let currentScore = await this.scorePrompt(currentPrompt, domain);
scores.push(currentScore);
this.history = [{ prompt: originalPrompt, score: originalScore }];
// Phase 3: OPRO-style iterative optimization
while (iterations < this.config.maxIterations) {
iterations++;
const candidate = await this.generateCandidate(currentPrompt, domain);
const candidateScore = await this.scorePrompt(candidate, domain);
scores.push(candidateScore);
this.history.push({ prompt: candidate, score: candidateScore });
// Check if improvement
if (candidateScore > currentScore) {
const improvement = candidateScore - currentScore;
currentPrompt = candidate;
currentScore = candidateScore;
improvements.push(`Iteration ${iterations}: +${(improvement * 100).toFixed(1)}% (now ${(currentScore * 100).toFixed(1)}%)`);
// Include evaluation reasoning if available
const evaluation = this.getLastEvaluation();
if (evaluation?.reasoning) {
improvements.push(` Reason: ${evaluation.reasoning.slice(0, 100)}...`);
}
} else {
improvements.push(`Iteration ${iterations}: No improvement (${(candidateScore * 100).toFixed(1)}% vs ${(currentScore * 100).toFixed(1)}%)`);
}
// Check convergence
if (this.checkConvergence(scores)) {
improvements.push('Converged - stopping optimization');
break;
}
// Check if target reached
if (currentScore >= this.config.targetScore) {
improvements.push(`Target score ${(this.config.targetScore * 100).toFixed(0)}% reached!`);
break;
}
}
const finalScore = currentScore;
return {
original_prompt: originalPrompt,
optimized_prompt: currentPrompt,
improvements_made: improvements,
iterations,
estimated_improvement: finalScore - originalScore,
similar_prompts_used: similarPrompts.length
};
}
/**
* Apply pattern-based improvements
*/
applyPatterns(prompt: string): { improved: string; applied: string[] } {
let improved = prompt;
const applied: string[] = [];
for (const pattern of IMPROVEMENT_PATTERNS) {
if (pattern.check(improved)) {
improved = pattern.apply(improved);
applied.push(pattern.name);
}
}
return { improved, applied };
}
/**
* Learn from similar high-performing prompts
* CONSERVATIVE: Only adds clarity/structure, never changes the core request
*/
async learnFromSimilar(
prompt: string,
similarPrompts: PromptRecord[]
): Promise<{ improved: string; insights: string }> {
// Sort by success rate
const sorted = [...similarPrompts].sort(
(a, b) => b.metrics.success_rate - a.metrics.success_rate
);
// Extract patterns from top performers
const topPrompts = sorted.slice(0, 3).map(p => p.prompt_text);
// Use LLM to synthesize improvements - CONSERVATIVE mode
const response = await this.openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{
role: 'system',
content: `You are a prompt clarity specialist. Your job is to make prompts clearer and more specific WITHOUT changing what the user is asking for.
RULES:
1. PRESERVE the exact intent - if they say "fix bug", don't change it to "refactor" or add new features
2. NEVER add requirements the user didn't mention (no "add tests", "add comments", "implement X" unless asked)
3. ONLY add clarity: specify what kind of output, ask for explanation of changes, request step-by-step approach
4. Keep it concise - don't bloat a simple request
5. If the prompt is already clear, return it unchanged`
},
{
role: 'user',
content: `Original prompt to clarify:
"${prompt}"
Reference patterns from effective prompts (for style, not content):
${topPrompts.map((p, i) => `${i + 1}. ${p}`).join('\n')}
Clarify the original prompt while keeping the EXACT same request. Output only the improved prompt:`
}
],
max_tokens: 300,
temperature: 0.3 // Lower temp for more conservative output
});
const improved = response.choices[0]?.message?.content?.trim() || prompt;
// Safety check: if the improved version is way longer or seems to have added scope, reject it
if (improved.length > prompt.length * 3) {
return { improved: prompt, insights: 'Skipped - would add too much scope' };
}
return {
improved,
insights: `Clarified based on ${topPrompts.length} effective prompts`
};
}
/**
* Generate a candidate using OPRO-style meta-prompting
* CONSERVATIVE: Improves clarity without changing scope
*/
async generateCandidate(currentPrompt: string, domain: string): Promise<string> {
// Build meta-prompt with history
const historyText = this.history
.slice(-5)
.map(h => `Prompt (score: ${h.score.toFixed(2)}): ${h.prompt.slice(0, 100)}...`)
.join('\n');
const response = await this.openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{
role: 'system',
content: `You are a prompt clarity specialist for "${domain}" tasks. Make prompts clearer WITHOUT changing their scope.
STRICT RULES:
- NEVER add new requirements or features the user didn't ask for
- NEVER change "fix" to "refactor" or "implement" to "redesign"
- ONLY improve: clarity, specificity about the existing request, output format
- Keep the improved prompt concise - don't add fluff
- If the prompt is already good, return it unchanged`
},
{
role: 'user',
content: `Previous attempts and scores:
${historyText}
Current prompt:
${currentPrompt}
Make this clearer while preserving the EXACT intent. Output only the improved prompt:`
}
],
max_tokens: 300,
temperature: 0.4 // Lower temp for conservative output
});
const result = response.choices[0]?.message?.content?.trim() || currentPrompt;
// Safety: reject if it added too much
if (result.length > currentPrompt.length * 2.5) {
return currentPrompt;
}
return result;
}
/**
* Score a prompt using actual LLM-based evaluation
* This is production code - we use real evaluation, not heuristics
*/
async scorePrompt(prompt: string, domain: string = 'general'): Promise<number> {
const criteriaList = Object.entries(EVALUATION_CRITERIA)
.map(([name, { description }]) => `- ${name}: ${description}`)
.join('\n');
try {
const response = await this.openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{
role: 'system',
content: `You are an expert prompt engineer evaluating prompt quality. Rate prompts on a 0-10 scale for each criterion. Be critical but fair. Output ONLY valid JSON.`
},
{
role: 'user',
content: `Evaluate this prompt for the "${domain}" domain:
---
${prompt}
---
Rate each criterion (0-10):
${criteriaList}
Output JSON format:
{
"clarity": <0-10>,
"specificity": <0-10>,
"completeness": <0-10>,
"structure": <0-10>,
"effectiveness": <0-10>,
"reasoning": "<brief explanation of strengths and weaknesses>"
}`
}
],
max_tokens: 300,
temperature: 0.3, // Low temp for consistent evaluation
response_format: { type: 'json_object' }
});
const evaluation = JSON.parse(response.choices[0]?.message?.content || '{}');
// Calculate weighted score
let weightedScore = 0;
for (const [criterion, { weight }] of Object.entries(EVALUATION_CRITERIA)) {
const score = evaluation[criterion] || 0;
weightedScore += (score / 10) * weight;
}
// Store reasoning for potential use
this.lastEvaluation = {
scores: evaluation,
reasoning: evaluation.reasoning,
weightedScore
};
return weightedScore;
} catch (e) {
console.error('[Optimizer] Failed to score prompt:', e);
// Fallback to quick heuristic only if LLM fails
return this.quickHeuristicFallback(prompt);
}
}
/**
* Fallback heuristic ONLY used if LLM evaluation fails
*/
private quickHeuristicFallback(prompt: string): number {
let score = 0.5;
const length = prompt.length;
if (length > 50 && length < 500) score += 0.1;
if (prompt.includes('\n')) score += 0.05;
if (prompt.match(/\d\./)) score += 0.05;
if (prompt.toLowerCase().includes('step by step')) score += 0.1;
return Math.min(score, 1.0);
}
// Store last evaluation for debugging/insight
private lastEvaluation: {
scores: Record<string, number>;
reasoning: string;
weightedScore: number;
} | null = null;
/**
* Get the last evaluation details
*/
getLastEvaluation() {
return this.lastEvaluation;
}
/**
* Check if optimization has converged
*/
checkConvergence(scores: number[]): boolean {
if (scores.length < this.config.convergenceWindow) return false;
const recent = scores.slice(-this.config.convergenceWindow);
const maxRecent = Math.max(...recent);
const minRecent = Math.min(...recent);
return (maxRecent - minRecent) < this.config.convergenceThreshold;
}
/**
* Get suggestions without full optimization
*/
getSuggestions(prompt: string): Array<{ type: string; description: string; example: string; expectedImprovement: number }> {
const suggestions: Array<{ type: string; description: string; example: string; expectedImprovement: number }> = [];
for (const pattern of IMPROVEMENT_PATTERNS) {
if (pattern.check(prompt)) {
suggestions.push({
type: pattern.name,
description: `Add ${pattern.name.replace(/_/g, ' ')}`,
example: pattern.apply(prompt).slice(prompt.length).trim(),
expectedImprovement: pattern.expectedImprovement
});
}
}
return suggestions;
}
}