Prompt Learning MCP Server

optimizer.ts•13.9 KiB

/** * Prompt optimization engine using APE/OPRO patterns * * This is PRODUCTION code. All evaluation is done via actual LLM calls. */ import OpenAI from 'openai'; import type { OptimizePromptResult, PromptRecord } from './types.js'; interface OptimizationConfig { maxIterations: number; targetScore: number; convergenceThreshold: number; convergenceWindow: number; } const DEFAULT_CONFIG: OptimizationConfig = { maxIterations: 10, targetScore: 0.95, convergenceThreshold: 0.02, convergenceWindow: 3 }; // Evaluation criteria with weights const EVALUATION_CRITERIA = { clarity: { weight: 0.25, description: 'How clear and unambiguous is the instruction?' }, specificity: { weight: 0.25, description: 'Does it provide specific guidance without being overly restrictive?' }, completeness: { weight: 0.20, description: 'Does it cover all necessary aspects of the task?' }, structure: { weight: 0.15, description: 'Is it well-organized with appropriate formatting?' }, effectiveness: { weight: 0.15, description: 'How likely is it to produce the desired output?' } }; // Pattern-based improvements that don't require LLM calls const IMPROVEMENT_PATTERNS = [ { name: 'add_structure', check: (p: string) => !p.includes('1.') && !p.includes('step') && !p.includes('first'), apply: (p: string) => `${p}\n\nProvide your response in a structured format with clear sections.`, expectedImprovement: 0.15 }, { name: 'add_chain_of_thought', check: (p: string) => !p.toLowerCase().includes('step by step') && !p.toLowerCase().includes('think through'), apply: (p: string) => `${p}\n\nThink through this step by step, showing your reasoning.`, expectedImprovement: 0.20 }, { name: 'add_constraints', check: (p: string) => p.length < 150 && !p.includes('Requirements'), apply: (p: string) => `${p}\n\nRequirements:\n- Be specific and precise\n- Support claims with evidence\n- Stay focused on the core question`, expectedImprovement: 0.10 }, { name: 'add_output_format', check: (p: string) => !p.toLowerCase().includes('format') && !p.toLowerCase().includes('respond with'), apply: (p: string) => `${p}\n\nFormat your response clearly with headers where appropriate.`, expectedImprovement: 0.08 }, { name: 'add_context_request', check: (p: string) => !p.toLowerCase().includes('context') && !p.toLowerCase().includes('background'), apply: (p: string) => `${p}\n\nConsider relevant context and background information.`, expectedImprovement: 0.05 } ]; export class PromptOptimizer { private openai: OpenAI; private config: OptimizationConfig; private history: Array<{ prompt: string; score: number }> = []; constructor(openai: OpenAI, config: Partial<OptimizationConfig> = {}) { this.openai = openai; this.config = { ...DEFAULT_CONFIG, ...config }; } /** * Optimize a prompt using pattern-based and LLM-based approaches */ async optimize( originalPrompt: string, similarPrompts: PromptRecord[] = [], domain: string = 'general' ): Promise<OptimizePromptResult> { const improvements: string[] = []; let currentPrompt = originalPrompt; let iterations = 0; const scores: number[] = []; // Score the original prompt first const originalScore = await this.scorePrompt(originalPrompt, domain); improvements.push(`Original score: ${(originalScore * 100).toFixed(1)}%`); // Phase 1: Apply pattern-based improvements (fast, no API calls) const { improved: patternImproved, applied } = this.applyPatterns(currentPrompt); if (applied.length > 0) { currentPrompt = patternImproved; improvements.push(...applied.map(p => `Applied pattern: ${p}`)); } // Phase 2: If we have similar high-performing prompts, learn from them if (similarPrompts.length > 0) { const { improved: ragImproved, insights } = await this.learnFromSimilar( currentPrompt, similarPrompts ); if (ragImproved !== currentPrompt) { currentPrompt = ragImproved; improvements.push(`RAG improvement: ${insights}`); } } // Score after pattern/RAG improvements let currentScore = await this.scorePrompt(currentPrompt, domain); scores.push(currentScore); this.history = [{ prompt: originalPrompt, score: originalScore }]; // Phase 3: OPRO-style iterative optimization while (iterations < this.config.maxIterations) { iterations++; const candidate = await this.generateCandidate(currentPrompt, domain); const candidateScore = await this.scorePrompt(candidate, domain); scores.push(candidateScore); this.history.push({ prompt: candidate, score: candidateScore }); // Check if improvement if (candidateScore > currentScore) { const improvement = candidateScore - currentScore; currentPrompt = candidate; currentScore = candidateScore; improvements.push(`Iteration ${iterations}: +${(improvement * 100).toFixed(1)}% (now ${(currentScore * 100).toFixed(1)}%)`); // Include evaluation reasoning if available const evaluation = this.getLastEvaluation(); if (evaluation?.reasoning) { improvements.push(` Reason: ${evaluation.reasoning.slice(0, 100)}...`); } } else { improvements.push(`Iteration ${iterations}: No improvement (${(candidateScore * 100).toFixed(1)}% vs ${(currentScore * 100).toFixed(1)}%)`); } // Check convergence if (this.checkConvergence(scores)) { improvements.push('Converged - stopping optimization'); break; } // Check if target reached if (currentScore >= this.config.targetScore) { improvements.push(`Target score ${(this.config.targetScore * 100).toFixed(0)}% reached!`); break; } } const finalScore = currentScore; return { original_prompt: originalPrompt, optimized_prompt: currentPrompt, improvements_made: improvements, iterations, estimated_improvement: finalScore - originalScore, similar_prompts_used: similarPrompts.length }; } /** * Apply pattern-based improvements */ applyPatterns(prompt: string): { improved: string; applied: string[] } { let improved = prompt; const applied: string[] = []; for (const pattern of IMPROVEMENT_PATTERNS) { if (pattern.check(improved)) { improved = pattern.apply(improved); applied.push(pattern.name); } } return { improved, applied }; } /** * Learn from similar high-performing prompts * CONSERVATIVE: Only adds clarity/structure, never changes the core request */ async learnFromSimilar( prompt: string, similarPrompts: PromptRecord[] ): Promise<{ improved: string; insights: string }> { // Sort by success rate const sorted = [...similarPrompts].sort( (a, b) => b.metrics.success_rate - a.metrics.success_rate ); // Extract patterns from top performers const topPrompts = sorted.slice(0, 3).map(p => p.prompt_text); // Use LLM to synthesize improvements - CONSERVATIVE mode const response = await this.openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [ { role: 'system', content: `You are a prompt clarity specialist. Your job is to make prompts clearer and more specific WITHOUT changing what the user is asking for. RULES: 1. PRESERVE the exact intent - if they say "fix bug", don't change it to "refactor" or add new features 2. NEVER add requirements the user didn't mention (no "add tests", "add comments", "implement X" unless asked) 3. ONLY add clarity: specify what kind of output, ask for explanation of changes, request step-by-step approach 4. Keep it concise - don't bloat a simple request 5. If the prompt is already clear, return it unchanged` }, { role: 'user', content: `Original prompt to clarify: "${prompt}" Reference patterns from effective prompts (for style, not content): ${topPrompts.map((p, i) => `${i + 1}. ${p}`).join('\n')} Clarify the original prompt while keeping the EXACT same request. Output only the improved prompt:` } ], max_tokens: 300, temperature: 0.3 // Lower temp for more conservative output }); const improved = response.choices[0]?.message?.content?.trim() || prompt; // Safety check: if the improved version is way longer or seems to have added scope, reject it if (improved.length > prompt.length * 3) { return { improved: prompt, insights: 'Skipped - would add too much scope' }; } return { improved, insights: `Clarified based on ${topPrompts.length} effective prompts` }; } /** * Generate a candidate using OPRO-style meta-prompting * CONSERVATIVE: Improves clarity without changing scope */ async generateCandidate(currentPrompt: string, domain: string): Promise<string> { // Build meta-prompt with history const historyText = this.history .slice(-5) .map(h => `Prompt (score: ${h.score.toFixed(2)}): ${h.prompt.slice(0, 100)}...`) .join('\n'); const response = await this.openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [ { role: 'system', content: `You are a prompt clarity specialist for "${domain}" tasks. Make prompts clearer WITHOUT changing their scope. STRICT RULES: - NEVER add new requirements or features the user didn't ask for - NEVER change "fix" to "refactor" or "implement" to "redesign" - ONLY improve: clarity, specificity about the existing request, output format - Keep the improved prompt concise - don't add fluff - If the prompt is already good, return it unchanged` }, { role: 'user', content: `Previous attempts and scores: ${historyText} Current prompt: ${currentPrompt} Make this clearer while preserving the EXACT intent. Output only the improved prompt:` } ], max_tokens: 300, temperature: 0.4 // Lower temp for conservative output }); const result = response.choices[0]?.message?.content?.trim() || currentPrompt; // Safety: reject if it added too much if (result.length > currentPrompt.length * 2.5) { return currentPrompt; } return result; } /** * Score a prompt using actual LLM-based evaluation * This is production code - we use real evaluation, not heuristics */ async scorePrompt(prompt: string, domain: string = 'general'): Promise<number> { const criteriaList = Object.entries(EVALUATION_CRITERIA) .map(([name, { description }]) => `- ${name}: ${description}`) .join('\n'); try { const response = await this.openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [ { role: 'system', content: `You are an expert prompt engineer evaluating prompt quality. Rate prompts on a 0-10 scale for each criterion. Be critical but fair. Output ONLY valid JSON.` }, { role: 'user', content: `Evaluate this prompt for the "${domain}" domain: --- ${prompt} --- Rate each criterion (0-10): ${criteriaList} Output JSON format: { "clarity": <0-10>, "specificity": <0-10>, "completeness": <0-10>, "structure": <0-10>, "effectiveness": <0-10>, "reasoning": "<brief explanation of strengths and weaknesses>" }` } ], max_tokens: 300, temperature: 0.3, // Low temp for consistent evaluation response_format: { type: 'json_object' } }); const evaluation = JSON.parse(response.choices[0]?.message?.content || '{}'); // Calculate weighted score let weightedScore = 0; for (const [criterion, { weight }] of Object.entries(EVALUATION_CRITERIA)) { const score = evaluation[criterion] || 0; weightedScore += (score / 10) * weight; } // Store reasoning for potential use this.lastEvaluation = { scores: evaluation, reasoning: evaluation.reasoning, weightedScore }; return weightedScore; } catch (e) { console.error('[Optimizer] Failed to score prompt:', e); // Fallback to quick heuristic only if LLM fails return this.quickHeuristicFallback(prompt); } } /** * Fallback heuristic ONLY used if LLM evaluation fails */ private quickHeuristicFallback(prompt: string): number { let score = 0.5; const length = prompt.length; if (length > 50 && length < 500) score += 0.1; if (prompt.includes('\n')) score += 0.05; if (prompt.match(/\d\./)) score += 0.05; if (prompt.toLowerCase().includes('step by step')) score += 0.1; return Math.min(score, 1.0); } // Store last evaluation for debugging/insight private lastEvaluation: { scores: Record<string, number>; reasoning: string; weightedScore: number; } | null = null; /** * Get the last evaluation details */ getLastEvaluation() { return this.lastEvaluation; } /** * Check if optimization has converged */ checkConvergence(scores: number[]): boolean { if (scores.length < this.config.convergenceWindow) return false; const recent = scores.slice(-this.config.convergenceWindow); const maxRecent = Math.max(...recent); const minRecent = Math.min(...recent); return (maxRecent - minRecent) < this.config.convergenceThreshold; } /** * Get suggestions without full optimization */ getSuggestions(prompt: string): Array<{ type: string; description: string; example: string; expectedImprovement: number }> { const suggestions: Array<{ type: string; description: string; example: string; expectedImprovement: number }> = []; for (const pattern of IMPROVEMENT_PATTERNS) { if (pattern.check(prompt)) { suggestions.push({ type: pattern.name, description: `Add ${pattern.name.replace(/_/g, ' ')}`, example: pattern.apply(prompt).slice(prompt.length).trim(), expectedImprovement: pattern.expectedImprovement }); } } return suggestions; } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/erichowens/prompt-learning-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

optimizer.ts•13.9 KiB