multimodal-optimization-example.tsโข29.7 kB
/**
* Complete Example: Multi-modal Prompt Evolution
*
* This example demonstrates optimizing prompts for multi-modal tasks that involve
* both text and images, such as image description, visual question answering,
* and content moderation.
*/
import {
EvolutionEngine,
ParetoFrontier,
ReflectionEngine,
PromptMutator,
PerformanceTracker,
LLMAdapter
} from '../src/index';
import {
PromptCandidate,
TaskContext,
EvolutionConfig,
ParetoObjective,
ExecutionTrajectory
} from '../src/types/gepa';
// 1. Multi-modal Objectives
const multimodalObjectives: ParetoObjective[] = [
{
name: 'visual_accuracy',
weight: 0.30,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('visual-accuracy') || 0;
}
},
{
name: 'description_quality',
weight: 0.25,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('description-quality') || 0;
}
},
{
name: 'contextual_understanding',
weight: 0.20,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('contextual-understanding') || 0;
}
},
{
name: 'safety_compliance',
weight: 0.15,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
return candidate.taskPerformance.get('safety-compliance') || 0;
}
},
{
name: 'response_time',
weight: 0.10,
direction: 'minimize',
extractor: (candidate: PromptCandidate) => {
const time = candidate.taskPerformance.get('response-time') || 3000;
return time / 1000; // Normalize to seconds
}
}
];
// 2. Multi-modal Test Scenarios
interface MultimodalTestScenario {
id: string;
type: 'image-description' | 'visual-qa' | 'content-moderation' | 'scene-analysis';
description: string;
imageMetadata: {
type: string;
complexity: 'low' | 'medium' | 'high';
expectedElements: string[];
safetyRating: 'safe' | 'warning' | 'unsafe';
};
groundTruth: {
description?: string;
answers?: Record<string, string>;
safetyAssessment?: string;
keyElements: string[];
};
evaluationCriteria: string[];
}
const multimodalScenarios: MultimodalTestScenario[] = [
{
id: 'nature-landscape',
type: 'image-description',
description: 'Mountain landscape with lake and forest',
imageMetadata: {
type: 'landscape',
complexity: 'medium',
expectedElements: ['mountains', 'lake', 'trees', 'sky', 'reflection'],
safetyRating: 'safe'
},
groundTruth: {
description: 'A serene mountain lake surrounded by dense forest with snow-capped peaks reflected in the calm water',
keyElements: ['lake', 'mountains', 'forest', 'reflection', 'peaceful scene']
},
evaluationCriteria: [
'Accurate identification of main elements',
'Descriptive language quality',
'Spatial relationship understanding',
'Atmospheric description'
]
},
{
id: 'urban-street-scene',
type: 'visual-qa',
description: 'Busy city street with traffic and pedestrians',
imageMetadata: {
type: 'urban',
complexity: 'high',
expectedElements: ['cars', 'people', 'buildings', 'traffic lights', 'sidewalk'],
safetyRating: 'safe'
},
groundTruth: {
answers: {
'How many cars are visible?': '3-5 cars',
'What time of day appears to be?': 'Daytime',
'Are there pedestrians?': 'Yes, multiple pedestrians',
'What type of area is this?': 'Commercial/downtown area'
},
keyElements: ['traffic', 'pedestrians', 'urban environment', 'commercial area']
},
evaluationCriteria: [
'Accurate counting and observation',
'Contextual understanding',
'Time and environment assessment',
'Detail attention'
]
},
{
id: 'content-safety-check',
type: 'content-moderation',
description: 'Image requiring safety assessment',
imageMetadata: {
type: 'mixed-content',
complexity: 'medium',
expectedElements: ['people', 'objects', 'setting'],
safetyRating: 'warning'
},
groundTruth: {
safetyAssessment: 'Contains potentially sensitive content that requires careful handling',
keyElements: ['safety analysis', 'content classification', 'risk assessment']
},
evaluationCriteria: [
'Accurate safety assessment',
'Appropriate content classification',
'Risk level identification',
'Compliance with safety guidelines'
]
},
{
id: 'complex-scene-analysis',
type: 'scene-analysis',
description: 'Indoor scene with multiple objects and activities',
imageMetadata: {
type: 'interior',
complexity: 'high',
expectedElements: ['furniture', 'people', 'activities', 'lighting', 'decorations'],
safetyRating: 'safe'
},
groundTruth: {
description: 'Living room with family members engaged in various activities',
keyElements: ['social interaction', 'domestic setting', 'multiple activities', 'casual atmosphere']
},
evaluationCriteria: [
'Scene understanding and context',
'Activity recognition',
'Relationship interpretation',
'Comprehensive analysis'
]
}
];
// 3. Multi-modal LLM Adapter
class MultimodalAdapter extends LLMAdapter {
private scenarios = multimodalScenarios;
async evaluatePrompt(prompt: string, context: TaskContext): Promise<any> {
const results = [];
for (const scenario of this.scenarios) {
const startTime = Date.now();
try {
// Simulate multi-modal analysis
const analysisResult = await this.analyzeMultimodalScenario(prompt, scenario);
const responseTime = Date.now() - startTime;
// Evaluate the analysis
const evaluation = await this.evaluateMultimodalResponse(
analysisResult,
scenario
);
results.push({
scenarioId: scenario.id,
response: analysisResult,
responseTime,
evaluation,
success: evaluation.overall > 0.7
});
} catch (error) {
results.push({
scenarioId: scenario.id,
response: '',
responseTime: Date.now() - startTime,
evaluation: this.getFailureEvaluation(),
success: false,
error: error.message
});
}
}
// Aggregate metrics
const metrics = this.aggregateMultimodalMetrics(results);
const overallScore = this.calculateOverallScore(metrics);
return {
score: overallScore,
trajectory: this.createMultimodalTrajectory(context, results, metrics)
};
}
private async analyzeMultimodalScenario(
prompt: string,
scenario: MultimodalTestScenario
): Promise<any> {
// Create task-specific prompt based on scenario type
let taskPrompt = '';
switch (scenario.type) {
case 'image-description':
taskPrompt = `
${prompt}
Task: Provide a detailed description of the image showing: ${scenario.description}
Image contains: ${scenario.imageMetadata.expectedElements.join(', ')}
Complexity level: ${scenario.imageMetadata.complexity}
Please provide a comprehensive description that captures the scene, mood, and important details.
`;
break;
case 'visual-qa':
taskPrompt = `
${prompt}
Task: Answer questions about the image showing: ${scenario.description}
Questions to answer:
${Object.keys(scenario.groundTruth.answers || {}).map(q => `- ${q}`).join('\n')}
Please provide accurate, specific answers based on what you observe in the image.
`;
break;
case 'content-moderation':
taskPrompt = `
${prompt}
Task: Assess the safety and appropriateness of this image for content moderation.
Image description: ${scenario.description}
Expected safety rating: ${scenario.imageMetadata.safetyRating}
Provide a safety assessment including content classification and any warnings needed.
`;
break;
case 'scene-analysis':
taskPrompt = `
${prompt}
Task: Perform comprehensive scene analysis of the image showing: ${scenario.description}
Analyze:
- Main elements and objects
- Activities and interactions
- Spatial relationships
- Context and setting
Provide detailed analysis covering all aspects of the scene.
`;
break;
}
const response = await this.callLLM(taskPrompt);
return {
type: scenario.type,
content: response.content,
analysis: this.parseResponseForScenario(response.content, scenario)
};
}
private parseResponseForScenario(response: string, scenario: MultimodalTestScenario): any {
// Parse response based on scenario type
const analysis: any = {
raw: response,
elements: [],
confidence: 0.8
};
// Extract mentioned elements
const lowerResponse = response.toLowerCase();
scenario.imageMetadata.expectedElements.forEach(element => {
if (lowerResponse.includes(element.toLowerCase())) {
analysis.elements.push(element);
}
});
// Type-specific parsing
switch (scenario.type) {
case 'image-description':
analysis.descriptiveWords = this.extractDescriptiveLanguage(response);
analysis.sceneUnderstanding = this.assessSceneUnderstanding(response, scenario);
break;
case 'visual-qa':
analysis.answers = this.extractAnswers(response, scenario.groundTruth.answers || {});
analysis.accuracy = this.assessAnswerAccuracy(analysis.answers, scenario.groundTruth.answers || {});
break;
case 'content-moderation':
analysis.safetyAssessment = this.extractSafetyAssessment(response);
analysis.riskLevel = this.assessRiskLevel(response, scenario.imageMetadata.safetyRating);
break;
case 'scene-analysis':
analysis.comprehensiveness = this.assessComprehensiveness(response, scenario);
analysis.contextualInsight = this.assessContextualInsight(response);
break;
}
return analysis;
}
private async evaluateMultimodalResponse(
result: any,
scenario: MultimodalTestScenario
): Promise<any> {
const evaluation: any = {
visual_accuracy: 0,
description_quality: 0,
contextual_understanding: 0,
safety_compliance: 0.8, // Default safe
overall: 0
};
// Visual accuracy assessment
evaluation.visual_accuracy = this.evaluateVisualAccuracy(result.analysis, scenario);
// Description quality
evaluation.description_quality = this.evaluateDescriptionQuality(result.content, scenario);
// Contextual understanding
evaluation.contextual_understanding = this.evaluateContextualUnderstanding(result.analysis, scenario);
// Safety compliance
evaluation.safety_compliance = this.evaluateSafetyCompliance(result.analysis, scenario);
// Overall score
evaluation.overall = (
evaluation.visual_accuracy * 0.3 +
evaluation.description_quality * 0.25 +
evaluation.contextual_understanding * 0.25 +
evaluation.safety_compliance * 0.2
);
return evaluation;
}
private evaluateVisualAccuracy(analysis: any, scenario: MultimodalTestScenario): number {
const expectedElements = scenario.imageMetadata.expectedElements;
const identifiedElements = analysis.elements || [];
// Calculate element identification accuracy
const correctIdentifications = identifiedElements.filter(element =>
expectedElements.some(expected =>
expected.toLowerCase().includes(element.toLowerCase()) ||
element.toLowerCase().includes(expected.toLowerCase())
)
).length;
const accuracy = correctIdentifications / Math.max(expectedElements.length, 1);
// Bonus for additional relevant details
const detailBonus = scenario.groundTruth.keyElements.filter(key =>
analysis.raw.toLowerCase().includes(key.toLowerCase())
).length * 0.1;
return Math.min(accuracy + detailBonus, 1.0);
}
private evaluateDescriptionQuality(content: string, scenario: MultimodalTestScenario): number {
let qualityScore = 0.5; // Base score
// Check for descriptive language
const descriptiveWords = ['beautiful', 'serene', 'vibrant', 'detailed', 'clear', 'bright', 'peaceful'];
const usedDescriptiveWords = descriptiveWords.filter(word =>
content.toLowerCase().includes(word)
).length;
qualityScore += Math.min(usedDescriptiveWords * 0.1, 0.3);
// Check for spatial relationships
const spatialWords = ['above', 'below', 'beside', 'in front of', 'behind', 'surrounding'];
const usedSpatialWords = spatialWords.filter(word =>
content.toLowerCase().includes(word)
).length;
qualityScore += Math.min(usedSpatialWords * 0.05, 0.2);
// Length and detail appropriate to complexity
const wordCount = content.split(' ').length;
const expectedLength = scenario.imageMetadata.complexity === 'high' ? 100 :
scenario.imageMetadata.complexity === 'medium' ? 60 : 30;
if (wordCount >= expectedLength * 0.8 && wordCount <= expectedLength * 2) {
qualityScore += 0.2;
}
return Math.min(qualityScore, 1.0);
}
private evaluateContextualUnderstanding(analysis: any, scenario: MultimodalTestScenario): number {
let contextScore = 0.6; // Base score
// Check for scene-specific understanding
switch (scenario.type) {
case 'scene-analysis':
if (analysis.comprehensiveness > 0.7) contextScore += 0.2;
if (analysis.contextualInsight > 0.7) contextScore += 0.2;
break;
case 'visual-qa':
if (analysis.accuracy > 0.8) contextScore += 0.3;
break;
case 'image-description':
if (analysis.sceneUnderstanding > 0.7) contextScore += 0.3;
break;
}
return Math.min(contextScore, 1.0);
}
private evaluateSafetyCompliance(analysis: any, scenario: MultimodalTestScenario): number {
// Base safety score
let safetyScore = 0.8;
if (scenario.type === 'content-moderation') {
// More rigorous safety evaluation for moderation tasks
if (analysis.safetyAssessment && analysis.riskLevel !== undefined) {
safetyScore = analysis.riskLevel;
}
}
// Check for inappropriate content flags
const inappropriateTerms = ['explicit', 'violent', 'harmful', 'dangerous'];
const containsInappropriate = inappropriateTerms.some(term =>
analysis.raw.toLowerCase().includes(term)
);
if (containsInappropriate && scenario.imageMetadata.safetyRating === 'safe') {
safetyScore -= 0.3; // Penalty for false positives
}
return Math.max(safetyScore, 0.0);
}
// Helper methods for analysis parsing
private extractDescriptiveLanguage(response: string): string[] {
const descriptivePattern = /\b(beautiful|stunning|serene|vibrant|peaceful|dramatic|colorful|clear|bright)\b/gi;
return response.match(descriptivePattern) || [];
}
private assessSceneUnderstanding(response: string, scenario: MultimodalTestScenario): number {
const keyElements = scenario.groundTruth.keyElements;
const mentionedElements = keyElements.filter(element =>
response.toLowerCase().includes(element.toLowerCase())
);
return mentionedElements.length / keyElements.length;
}
private extractAnswers(response: string, expectedAnswers: Record<string, string>): Record<string, string> {
const answers: Record<string, string> = {};
// Simple extraction - in practice would use more sophisticated parsing
Object.keys(expectedAnswers).forEach(question => {
const questionStart = response.toLowerCase().indexOf(question.toLowerCase());
if (questionStart !== -1) {
const answerStart = questionStart + question.length;
const answerEnd = response.indexOf('\n', answerStart);
const answer = response.substring(answerStart, answerEnd === -1 ? answerStart + 100 : answerEnd).trim();
answers[question] = answer;
}
});
return answers;
}
private assessAnswerAccuracy(answers: Record<string, string>, expected: Record<string, string>): number {
const totalQuestions = Object.keys(expected).length;
let correctAnswers = 0;
Object.entries(expected).forEach(([question, expectedAnswer]) => {
const providedAnswer = answers[question];
if (providedAnswer && this.isAnswerSimilar(providedAnswer, expectedAnswer)) {
correctAnswers++;
}
});
return totalQuestions > 0 ? correctAnswers / totalQuestions : 0;
}
private isAnswerSimilar(provided: string, expected: string): boolean {
const normalizedProvided = provided.toLowerCase().replace(/[^\w\s]/g, '');
const normalizedExpected = expected.toLowerCase().replace(/[^\w\s]/g, '');
// Simple similarity check - could be enhanced with more sophisticated matching
return normalizedProvided.includes(normalizedExpected) ||
normalizedExpected.includes(normalizedProvided) ||
this.calculateSimilarity(normalizedProvided, normalizedExpected) > 0.6;
}
private calculateSimilarity(str1: string, str2: string): number {
const words1 = str1.split(' ');
const words2 = str2.split(' ');
const intersection = words1.filter(word => words2.includes(word));
const union = [...new Set([...words1, ...words2])];
return intersection.length / union.length;
}
private extractSafetyAssessment(response: string): any {
return {
classification: response.includes('safe') ? 'safe' : response.includes('warning') ? 'warning' : 'needs-review',
confidence: 0.8,
details: response.substring(0, 200)
};
}
private assessRiskLevel(response: string, expectedRating: string): number {
const responseLevel = response.includes('safe') ? 'safe' :
response.includes('warning') ? 'warning' : 'unsafe';
return responseLevel === expectedRating ? 0.9 : 0.6;
}
private assessComprehensiveness(response: string, scenario: MultimodalTestScenario): number {
const criteria = scenario.evaluationCriteria;
const mentionedCriteria = criteria.filter(criterion =>
response.toLowerCase().includes(criterion.toLowerCase().split(' ')[0])
);
return mentionedCriteria.length / criteria.length;
}
private assessContextualInsight(response: string): number {
const insightWords = ['interaction', 'relationship', 'context', 'atmosphere', 'mood', 'setting'];
const usedInsightWords = insightWords.filter(word =>
response.toLowerCase().includes(word)
);
return Math.min(usedInsightWords.length / insightWords.length, 1.0);
}
private aggregateMultimodalMetrics(results: any[]): any {
const validResults = results.filter(r => r.evaluation);
if (validResults.length === 0) {
return {
'visual-accuracy': 0,
'description-quality': 0,
'contextual-understanding': 0,
'safety-compliance': 0.5,
'response-time': 5000
};
}
return {
'visual-accuracy': this.average(validResults, 'visual_accuracy'),
'description-quality': this.average(validResults, 'description_quality'),
'contextual-understanding': this.average(validResults, 'contextual_understanding'),
'safety-compliance': this.average(validResults, 'safety_compliance'),
'response-time': validResults.reduce((sum, r) => sum + r.responseTime, 0) / validResults.length
};
}
private average(results: any[], metric: string): number {
const values = results.map(r => r.evaluation[metric]).filter(v => v !== undefined);
return values.length > 0 ? values.reduce((sum, val) => sum + val, 0) / values.length : 0;
}
private calculateOverallScore(metrics: any): number {
return (metrics['visual-accuracy'] * 0.3 +
metrics['description-quality'] * 0.25 +
metrics['contextual-understanding'] * 0.25 +
metrics['safety-compliance'] * 0.2);
}
private getFailureEvaluation() {
return {
visual_accuracy: 0,
description_quality: 0,
contextual_understanding: 0,
safety_compliance: 0.5,
overall: 0
};
}
private createMultimodalTrajectory(context: TaskContext, results: any[], metrics: any): ExecutionTrajectory {
return {
id: `multimodal-eval-${Date.now()}`,
promptId: 'candidate-prompt',
taskId: context.description,
startTime: Date.now() - 15000,
endTime: Date.now(),
steps: results.map((result, index) => ({
action: `analyze-${result.scenarioId}`,
input: result.scenarioId,
output: result.response?.content || '',
timestamp: Date.now() - (results.length - index) * 2000,
duration: result.responseTime,
success: result.success,
error: result.error
})),
finalResult: {
success: results.filter(r => r.success).length > results.length / 2,
score: metrics['visual-accuracy'],
output: results,
metrics
}
};
}
}
// 4. Main Multi-modal Optimization Function
export async function optimizeMultimodalPrompts(): Promise<void> {
console.log('๐ Starting GEPA Multi-modal Optimization');
const performanceTracker = new PerformanceTracker({
enableRealTimeMonitoring: true,
memoryTrackingEnabled: true,
tokenUsageTrackingEnabled: true
});
const optimizationMetricId = performanceTracker.startMetricCollection(
'multimodal-optimization',
{ category: 'evolution', tags: { type: 'multimodal' } }
);
try {
// 1. Setup Components
const llmAdapter = new MultimodalAdapter();
const paretoFrontier = new ParetoFrontier({
objectives: multimodalObjectives,
maxSize: 35,
samplingStrategy: { name: 'ucb', parameters: { confidence: 2.0 } }
});
const reflectionEngine = new ReflectionEngine({
llmAdapter,
trajectoryStore: new InMemoryTrajectoryStore(),
config: {
confidenceThreshold: 0.75,
batchSize: 4
}
});
const promptMutator = new PromptMutator({
maxMutationsPerGeneration: 7,
mutationRate: 0.3,
crossoverRate: 0.65,
enableCaching: true
});
// 2. Configure Evolution for Multi-modal Tasks
const evolutionConfig: EvolutionConfig = {
taskDescription: 'Optimize prompts for multi-modal visual understanding and analysis',
seedPrompt: `You are an expert vision AI assistant specializing in multi-modal analysis.
When analyzing images, focus on:
- Accurate visual perception and element identification
- Detailed, contextual descriptions that capture scene understanding
- Appropriate safety and content assessment
- Clear, helpful responses that address the specific task
- Professional, respectful tone in all interactions
Always consider the broader context and provide insights that demonstrate
comprehensive understanding of visual and textual information.`,
maxGenerations: 18,
populationSize: 22,
mutationRate: 0.32
};
// 3. Initialize Evolution Engine
const evolutionEngine = new EvolutionEngine({
llmAdapter,
paretoFrontier,
reflectionEngine,
promptMutator,
trajectoryStore: new InMemoryTrajectoryStore(),
config: evolutionConfig
});
// 4. Monitor Progress
performanceTracker.subscribeToMetrics((metric) => {
if (metric.category === 'evolution') {
console.log(`๐ ${metric.name}: ${metric.duration}ms`);
}
});
// 5. Run Multi-modal Evolution
console.log('๐งฌ Starting multi-modal evolution...');
const evolutionResult = await evolutionEngine.startEvolution({
taskDescription: evolutionConfig.taskDescription,
seedPrompt: evolutionConfig.seedPrompt,
config: evolutionConfig
});
// 6. Analyze Results
console.log('\n๐ Multi-modal Optimization Results:');
console.log(`- Generations: ${evolutionResult.generations}`);
console.log(`- Convergence: ${evolutionResult.convergenceAchieved}`);
console.log(`- Total Evaluations: ${evolutionResult.totalRollouts}`);
const bestPrompt = evolutionResult.bestPrompt;
console.log(`\n๐ Best Multi-modal Prompt (Score: ${bestPrompt.averageScore.toFixed(3)}):`);
console.log(`"${bestPrompt.content}"`);
// 7. Detailed Analysis
const multimodalAnalysis = await analyzeMultimodalResults(
evolutionResult,
performanceTracker,
paretoFrontier
);
console.log('\n๐ Multi-modal Analysis:');
console.log(JSON.stringify(multimodalAnalysis, null, 2));
// 8. Test Scenarios
await testMultimodalScenarios(bestPrompt, llmAdapter);
// 9. Save Results
await saveOptimizedMultimodalPrompt(bestPrompt, multimodalAnalysis);
const finalMetric = performanceTracker.endMetricCollection(optimizationMetricId);
console.log(`\nโฑ๏ธ Total optimization time: ${finalMetric.duration}ms`);
} catch (error) {
console.error('โ Multi-modal optimization failed:', error);
throw error;
} finally {
await evolutionEngine.shutdown();
}
}
// Supporting functions...
async function analyzeMultimodalResults(evolutionResult: any, performanceTracker: PerformanceTracker, paretoFrontier: ParetoFrontier): Promise<any> {
const bestPrompt = evolutionResult.bestPrompt;
return {
optimization: {
finalScore: bestPrompt.averageScore,
generations: evolutionResult.generations,
convergenceAchieved: evolutionResult.convergenceAchieved
},
multimodalMetrics: {
visualAccuracy: bestPrompt.taskPerformance.get('visual-accuracy'),
descriptionQuality: bestPrompt.taskPerformance.get('description-quality'),
contextualUnderstanding: bestPrompt.taskPerformance.get('contextual-understanding'),
safetyCompliance: bestPrompt.taskPerformance.get('safety-compliance'),
responseTime: bestPrompt.taskPerformance.get('response-time')
},
paretoAnalysis: paretoFrontier.getStatistics(),
performance: performanceTracker.analyzeMemoryUsage()
};
}
async function testMultimodalScenarios(optimizedPrompt: PromptCandidate, adapter: MultimodalAdapter): Promise<void> {
console.log('\n๐งช Testing optimized multi-modal prompt...');
const testContext: TaskContext = {
description: 'Test multi-modal understanding capabilities',
category: 'multi-modal-analysis',
difficulty: 'medium',
requiredCapabilities: ['visual-perception', 'description', 'safety-assessment'],
expectedDuration: 45
};
try {
const testResult = await adapter.evaluatePrompt(optimizedPrompt.content, testContext);
console.log('โ
Multi-modal Test Results:');
console.log(`- Overall Score: ${testResult.score.toFixed(3)}`);
console.log(`- Visual Accuracy: ${testResult.trajectory.finalResult.metrics['visual-accuracy']?.toFixed(3)}`);
console.log(`- Description Quality: ${testResult.trajectory.finalResult.metrics['description-quality']?.toFixed(3)}`);
console.log(`- Safety Compliance: ${testResult.trajectory.finalResult.metrics['safety-compliance']?.toFixed(3)}`);
} catch (error) {
console.error('โ Multi-modal test failed:', error.message);
}
}
async function saveOptimizedMultimodalPrompt(bestPrompt: PromptCandidate, analysis: any): Promise<void> {
const deployment = {
optimizedPrompt: bestPrompt.content,
metadata: {
version: '1.0.0',
type: 'multimodal-analysis',
optimizationDate: new Date().toISOString(),
performance: analysis.multimodalMetrics
},
usageGuidelines: [
'Suitable for image description, visual QA, and content moderation',
'Optimized for safety compliance and accuracy',
'Include specific visual analysis requirements in prompts',
'Monitor response quality in production environments'
],
integrationExample: `
const multimodalAnalyzer = new MultimodalAnalyzer({
prompt: "${bestPrompt.content}",
maxTokens: 1500,
temperature: 0.7
});
const result = await multimodalAnalyzer.analyze({
imageUrl: "https://example.com/image.jpg",
task: "describe",
safetyCheck: true
});
`
};
const fs = await import('fs/promises');
await fs.writeFile(
'./optimized-multimodal-prompt.json',
JSON.stringify(deployment, null, 2)
);
console.log('๐พ Optimized multi-modal prompt saved');
}
class InMemoryTrajectoryStore {
private trajectories = new Map<string, ExecutionTrajectory>();
async save(trajectory: ExecutionTrajectory): Promise<void> {
this.trajectories.set(trajectory.id, trajectory);
}
async query(filter: any): Promise<ExecutionTrajectory[]> {
return Array.from(this.trajectories.values()).slice(0, filter.limit || 100);
}
async load(id: string): Promise<ExecutionTrajectory | null> {
return this.trajectories.get(id) || null;
}
}
// Example usage
if (require.main === module) {
optimizeMultimodalPrompts()
.then(() => {
console.log('โ
Multi-modal optimization completed!');
process.exit(0);
})
.catch((error) => {
console.error('โ Optimization failed:', error);
process.exit(1);
});
}
export {
MultimodalAdapter,
multimodalScenarios,
optimizeMultimodalPrompts
};