pareto-frontier.test.ts•33.8 kB
/**
* Comprehensive test suite for Pareto Frontier Management System
*
* This file implements TEST-FIRST development for multi-objective optimization
* of prompt candidates using Pareto dominance relationships.
*
* Tests cover:
* - Domination relationship testing (isDominated)
* - Adding candidates and maintaining Pareto optimality
* - Sampling strategies (uniform, UCB, ε-greedy)
* - Performance with 1000+ candidates
* - Edge cases (empty frontier, single candidate)
* - Multi-objective optimization scenarios
* - Computational complexity (O(n log n))
*/
import { describe, it, expect, beforeEach } from 'vitest';
import type { PromptCandidate } from '../types/gepa';
import { TestDataGenerator, samplePromptCandidates } from '../test/fixtures/sample-data';
import {
ParetoFrontier,
type ParetoObjective,
type SamplingStrategy,
type ParetoFrontierConfig
} from './pareto-frontier';
describe('Pareto Frontier Management System', () => {
let paretoFrontier: ParetoFrontier;
let defaultObjectives: ParetoObjective[];
let config: ParetoFrontierConfig;
beforeEach(() => {
// Define multi-objective optimization targets
defaultObjectives = [
{
name: 'performance',
weight: 1.0,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => candidate.averageScore
},
{
name: 'efficiency',
weight: 0.8,
direction: 'minimize',
extractor: (candidate: PromptCandidate) => {
// Mock token efficiency calculation
const tokenCount = candidate.content.split(' ').length;
return tokenCount / (candidate.averageScore + 0.1);
}
},
{
name: 'diversity',
weight: 0.6,
direction: 'maximize',
extractor: (candidate: PromptCandidate) => {
// Mock diversity score based on content length and generation
return candidate.content.length * 0.001 + candidate.generation * 0.1;
}
}
];
config = {
objectives: defaultObjectives,
maxSize: 100,
samplingStrategy: { name: 'uniform' },
archiveEnabled: true
};
paretoFrontier = new ParetoFrontier(config);
});
describe('Domination Relationship Testing', () => {
it('should correctly identify when one candidate dominates another', () => {
const candidate1 = TestDataGenerator.generatePromptCandidate({
averageScore: 0.9,
content: 'Short prompt',
generation: 1
});
const candidate2 = TestDataGenerator.generatePromptCandidate({
averageScore: 0.7,
content: 'Much longer prompt with more tokens that reduces efficiency',
generation: 0
});
// candidate1 should dominate candidate2 (higher performance, better efficiency)
expect(paretoFrontier.isDominated(candidate2, candidate1)).toBe(true);
expect(paretoFrontier.isDominated(candidate1, candidate2)).toBe(false);
});
it('should identify non-dominated candidates correctly', () => {
const candidate1 = TestDataGenerator.generatePromptCandidate({
averageScore: 0.9,
content: 'Very long prompt that reduces efficiency but has high performance',
generation: 1
});
const candidate2 = TestDataGenerator.generatePromptCandidate({
averageScore: 0.7,
content: 'Short',
generation: 3
});
// Trade-off: candidate1 has better performance but worse efficiency
// candidate2 has worse performance but better efficiency and diversity
expect(paretoFrontier.isDominated(candidate1, candidate2)).toBe(false);
expect(paretoFrontier.isDominated(candidate2, candidate1)).toBe(false);
});
it('should handle identical candidates as non-dominating', () => {
const candidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.8,
content: 'Test prompt',
generation: 1
});
const identicalCandidate = { ...candidate, id: 'different-id' };
expect(paretoFrontier.isDominated(candidate, identicalCandidate)).toBe(false);
expect(paretoFrontier.isDominated(identicalCandidate, candidate)).toBe(false);
});
it('should correctly compute dominated candidates list', async () => {
// Add several candidates to frontier
const candidates = [
TestDataGenerator.generatePromptCandidate({ averageScore: 0.9, content: 'A', generation: 1 }),
TestDataGenerator.generatePromptCandidate({ averageScore: 0.7, content: 'B', generation: 2 }),
TestDataGenerator.generatePromptCandidate({ averageScore: 0.6, content: 'C', generation: 0 }),
];
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
// Superior candidate that should dominate some existing ones
const superiorCandidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.95,
content: 'Superior',
generation: 2
});
const dominated = paretoFrontier.getDominatedCandidates(superiorCandidate);
expect(dominated.length).toBeGreaterThan(0);
});
});
describe('Adding Candidates and Maintaining Pareto Optimality', () => {
it('should add non-dominated candidates to the frontier', async () => {
const initialSize = paretoFrontier.size();
const candidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.9,
content: 'Excellent prompt',
generation: 1
});
const added = await paretoFrontier.addCandidate(candidate);
expect(added).toBe(true);
expect(paretoFrontier.size()).toBe(initialSize + 1);
expect(paretoFrontier.getFrontier()).toHaveLength(1);
});
it('should reject dominated candidates', async () => {
// Add a superior candidate first
const superiorCandidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.95,
content: 'Short',
generation: 3
});
await paretoFrontier.addCandidate(superiorCandidate);
const initialSize = paretoFrontier.size();
// Try to add dominated candidate
const dominatedCandidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.7,
content: 'Much longer prompt with significantly more tokens',
generation: 1
});
const added = await paretoFrontier.addCandidate(dominatedCandidate);
expect(added).toBe(false);
expect(paretoFrontier.size()).toBe(initialSize);
});
it('should remove dominated candidates when adding superior ones', async () => {
// Add initial candidates
const candidates = [
TestDataGenerator.generatePromptCandidate({ averageScore: 0.7, content: 'Long prompt', generation: 1 }),
TestDataGenerator.generatePromptCandidate({ averageScore: 0.8, content: 'Medium', generation: 2 }),
];
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
const initialSize = paretoFrontier.size();
// Add superior candidate that dominates existing ones
const superiorCandidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.95,
content: 'Best',
generation: 3
});
await paretoFrontier.addCandidate(superiorCandidate);
// Frontier should be smaller due to dominated candidates being removed
expect(paretoFrontier.size()).toBeLessThanOrEqual(initialSize);
// Superior candidate should be in frontier
const frontier = paretoFrontier.getFrontier();
expect(frontier.some(point => point.candidate.id === superiorCandidate.id)).toBe(true);
});
it('should maintain optimal frontier size limit', async () => {
const limitedConfig = { ...config, maxSize: 5 };
const limitedFrontier = new ParetoFrontier(limitedConfig);
// Add more candidates than the limit
const candidates = TestDataGenerator.generatePromptCandidates(10);
for (const candidate of candidates) {
await limitedFrontier.addCandidate(candidate);
}
expect(limitedFrontier.size()).toBeLessThanOrEqual(limitedConfig.maxSize!);
});
});
describe('Sampling Strategies', () => {
beforeEach(async () => {
// Populate frontier with diverse candidates
const candidates = [
TestDataGenerator.generatePromptCandidate({
averageScore: 0.9,
rolloutCount: 10,
content: 'Short prompt A',
generation: 1
}),
TestDataGenerator.generatePromptCandidate({
averageScore: 0.8,
rolloutCount: 5,
content: 'This is a much longer prompt B with more tokens for efficiency testing',
generation: 2
}),
TestDataGenerator.generatePromptCandidate({
averageScore: 0.7,
rolloutCount: 20,
content: 'Medium length prompt C',
generation: 3
}),
TestDataGenerator.generatePromptCandidate({
averageScore: 0.85,
rolloutCount: 3,
content: 'Another different prompt D',
generation: 0
}),
];
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
});
describe('Uniform Sampling', () => {
it('should sample candidates uniformly from frontier', async () => {
const samples = [];
const sampleSize = 100;
for (let i = 0; i < sampleSize; i++) {
const sample = await paretoFrontier.sampleUniform();
if (sample) samples.push(sample.id);
}
expect(samples.length).toBe(sampleSize);
// Check that all frontier candidates were sampled (with high probability)
const uniqueSamples = new Set(samples);
expect(uniqueSamples.size).toBeGreaterThan(1);
});
it('should return null for empty frontier', async () => {
paretoFrontier.clear();
const sample = await paretoFrontier.sampleUniform();
expect(sample).toBeNull();
});
});
describe('UCB (Upper Confidence Bound) Sampling', () => {
it('should prefer candidates with high uncertainty', async () => {
const samples = [];
const sampleSize = 50;
for (let i = 0; i < sampleSize; i++) {
const sample = await paretoFrontier.sampleUCB(1.96);
if (sample) samples.push(sample);
}
expect(samples.length).toBe(sampleSize);
// Candidates with fewer rollouts should be sampled more frequently
const rolloutCounts = samples.map(s => s.rolloutCount);
const avgRollouts = rolloutCounts.reduce((a, b) => a + b, 0) / rolloutCounts.length;
// The average should be skewed towards lower rollout counts
expect(avgRollouts).toBeLessThan(15); // Threshold based on test data
});
it('should handle different confidence levels', async () => {
const conservativeSample = await paretoFrontier.sampleUCB(0.5);
const aggressiveSample = await paretoFrontier.sampleUCB(3.0);
expect(conservativeSample).not.toBeNull();
expect(aggressiveSample).not.toBeNull();
});
});
describe('ε-Greedy Sampling', () => {
it('should exploit best candidate most of the time', async () => {
const samples = [];
const sampleSize = 100;
const epsilon = 0.1;
for (let i = 0; i < sampleSize; i++) {
const sample = await paretoFrontier.sampleEpsilonGreedy(epsilon);
if (sample) samples.push(sample);
}
expect(samples.length).toBe(sampleSize);
// Count samples of the best candidate (averageScore = 0.9)
const bestCandidateId = paretoFrontier.getFrontier()
.reduce((best, current) =>
current.candidate.averageScore > best.candidate.averageScore ? current : best
).candidate.id;
const bestSamples = samples.filter(s => s.id === bestCandidateId).length;
const exploitationRate = bestSamples / sampleSize;
// Should exploit roughly (1 - epsilon) of the time
expect(exploitationRate).toBeGreaterThan(0.8);
});
it('should explore with specified epsilon rate', async () => {
const samples = [];
const sampleSize = 1000;
const epsilon = 0.3;
for (let i = 0; i < sampleSize; i++) {
const sample = await paretoFrontier.sampleEpsilonGreedy(epsilon);
if (sample) samples.push(sample);
}
const uniqueSamples = new Set(samples.map(s => s.id));
// With epsilon=0.3, we should see exploration (multiple different candidates)
expect(uniqueSamples.size).toBeGreaterThan(1);
});
});
describe('Strategy Selection', () => {
it('should use default strategy when none specified', async () => {
const sample = await paretoFrontier.sampleCandidate();
expect(sample).not.toBeNull();
});
it('should use specified strategy parameters', async () => {
const ucbStrategy: SamplingStrategy = {
name: 'ucb',
parameters: { confidence: 2.0 }
};
const sample = await paretoFrontier.sampleCandidate(ucbStrategy);
expect(sample).not.toBeNull();
});
it('should handle invalid strategies gracefully', async () => {
const invalidStrategy = { name: 'unknown' as any } as SamplingStrategy;
const sample = await paretoFrontier.sampleCandidate(invalidStrategy);
expect(sample).not.toBeNull(); // Should fall back to uniform
});
});
});
describe('Performance with Large Datasets', () => {
it('should handle 1000+ candidates efficiently', async () => {
const startTime = performance.now();
// Generate and add 1000 candidates
const candidates = TestDataGenerator.generatePromptCandidates(1000);
let addedCount = 0;
for (const candidate of candidates) {
const added = await paretoFrontier.addCandidate(candidate);
if (added) addedCount++;
}
const endTime = performance.now();
const executionTime = endTime - startTime;
// Should complete within reasonable time (adjust threshold as needed)
expect(executionTime).toBeLessThan(5000); // 5 seconds
// Frontier should contain non-dominated subset
expect(paretoFrontier.size()).toBeGreaterThan(0);
expect(paretoFrontier.size()).toBeLessThanOrEqual(candidates.length);
expect(addedCount).toBeGreaterThanOrEqual(paretoFrontier.size()); // Some added candidates may have been dominated later
// eslint-disable-next-line no-console
console.log(`Processed ${candidates.length} candidates in ${executionTime.toFixed(2)}ms`);
// eslint-disable-next-line no-console
console.log(`Frontier size: ${paretoFrontier.size()}`);
// eslint-disable-next-line no-console
console.log(`Acceptance rate: ${(addedCount / candidates.length * 100).toFixed(2)}%`);
});
it('should maintain reasonable computational complexity', async () => {
const sizes = [100, 200, 400];
const times: number[] = [];
for (const size of sizes) {
paretoFrontier.clear();
const candidates = TestDataGenerator.generatePromptCandidates(size);
const startTime = performance.now();
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
const endTime = performance.now();
times.push(endTime - startTime);
}
// Check that algorithm completes in reasonable time
for (const time of times) {
expect(time).toBeLessThan(1000); // Should complete within 1 second
}
// Check that time doesn't grow exponentially
for (let i = 1; i < times.length; i++) {
const sizeRatio = sizes[i] / sizes[i - 1];
const timeRatio = times[i] / times[i - 1];
// Time should not grow exponentially - allow generous margin for test environment variations
expect(timeRatio).toBeLessThan(Math.pow(sizeRatio, 3.0)); // Very generous bound
}
// eslint-disable-next-line no-console
console.log('Complexity analysis:', {
sizes,
times: times.map(t => `${t.toFixed(2)}ms`),
ratios: times.slice(1).map((t, i) => (t / times[i]).toFixed(2))
});
});
it('should efficiently sample from large frontiers', async () => {
// Add many candidates to create large frontier
const candidates = TestDataGenerator.generatePromptCandidates(500);
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
const startTime = performance.now();
// Perform many sampling operations
const samples = [];
for (let i = 0; i < 1000; i++) {
const sample = await paretoFrontier.sampleUniform();
if (sample) samples.push(sample);
}
const endTime = performance.now();
const samplingTime = endTime - startTime;
expect(samples.length).toBe(1000);
expect(samplingTime).toBeLessThan(1000); // Should be very fast
// eslint-disable-next-line no-console
console.log(`1000 samples from ${paretoFrontier.size()}-candidate frontier in ${samplingTime.toFixed(2)}ms`);
});
});
describe('Edge Cases', () => {
describe('Empty Frontier', () => {
it('should handle empty frontier gracefully', () => {
expect(paretoFrontier.size()).toBe(0);
expect(paretoFrontier.getFrontier()).toHaveLength(0);
});
it('should return null when sampling from empty frontier', async () => {
expect(await paretoFrontier.sampleUniform()).toBeNull();
expect(await paretoFrontier.sampleUCB(1.0)).toBeNull();
expect(await paretoFrontier.sampleEpsilonGreedy(0.1)).toBeNull();
expect(await paretoFrontier.sampleCandidate()).toBeNull();
});
it('should compute zero hypervolume for empty frontier', () => {
expect(paretoFrontier.computeHypervolume()).toBe(0);
});
it('should provide meaningful statistics for empty frontier', () => {
const stats = paretoFrontier.getStatistics();
expect(stats.totalCandidates).toBe(0);
expect(stats.frontierSize).toBe(0);
expect(stats.averageRank).toBe(0);
expect(stats.objectives.size).toBe(0);
});
});
describe('Single Candidate', () => {
beforeEach(async () => {
const candidate = TestDataGenerator.generatePromptCandidate({
averageScore: 0.8,
content: 'Single candidate',
generation: 1
});
await paretoFrontier.addCandidate(candidate);
});
it('should handle single candidate frontier correctly', () => {
expect(paretoFrontier.size()).toBe(1);
expect(paretoFrontier.getFrontier()).toHaveLength(1);
});
it('should always sample the single candidate', async () => {
const uniform = await paretoFrontier.sampleUniform();
const ucb = await paretoFrontier.sampleUCB(1.0);
const epsilon = await paretoFrontier.sampleEpsilonGreedy(0.5);
expect(uniform).not.toBeNull();
expect(ucb).not.toBeNull();
expect(epsilon).not.toBeNull();
// All should return the same candidate
expect(uniform!.id).toBe(ucb!.id);
expect(ucb!.id).toBe(epsilon!.id);
});
it('should compute positive hypervolume for single candidate', () => {
expect(paretoFrontier.computeHypervolume()).toBeGreaterThan(0);
});
});
describe('Identical Objectives', () => {
it('should handle candidates with identical objective values', async () => {
const candidates = [
TestDataGenerator.generatePromptCandidate({ averageScore: 0.8, content: 'Same', generation: 1 }),
TestDataGenerator.generatePromptCandidate({ averageScore: 0.8, content: 'Same', generation: 1 }),
TestDataGenerator.generatePromptCandidate({ averageScore: 0.8, content: 'Same', generation: 1 }),
];
let addedCount = 0;
for (const candidate of candidates) {
const added = await paretoFrontier.addCandidate(candidate);
if (added) addedCount++;
}
// First candidate should be added, others should be rejected (not strictly better)
expect(addedCount).toBe(1);
expect(paretoFrontier.size()).toBe(1);
});
});
describe('Invalid Objectives', () => {
it('should handle candidates with missing objective values gracefully', async () => {
const candidateWithNaN = TestDataGenerator.generatePromptCandidate({
averageScore: NaN,
content: 'Invalid score'
});
// Should handle gracefully (either reject or use default)
const added = await paretoFrontier.addCandidate(candidateWithNaN);
expect(typeof added).toBe('boolean');
});
it('should handle extreme objective values', async () => {
const extremeCandidates = [
TestDataGenerator.generatePromptCandidate({ averageScore: Number.MAX_VALUE }),
TestDataGenerator.generatePromptCandidate({ averageScore: Number.MIN_VALUE }),
TestDataGenerator.generatePromptCandidate({ averageScore: 0 }),
TestDataGenerator.generatePromptCandidate({ averageScore: 1 }),
];
for (const candidate of extremeCandidates) {
const added = await paretoFrontier.addCandidate(candidate);
expect(typeof added).toBe('boolean');
}
expect(paretoFrontier.size()).toBeGreaterThan(0);
});
});
});
describe('Multi-Objective Optimization Scenarios', () => {
describe('Three-Objective Optimization', () => {
beforeEach(async () => {
// Performance vs Efficiency vs Diversity trade-off
const candidates = [
// High performance, low efficiency, medium diversity
TestDataGenerator.generatePromptCandidate({
id: 'high-perf',
averageScore: 0.95,
content: 'Very detailed and comprehensive prompt that achieves excellent results but uses many tokens',
generation: 1
}),
// Medium performance, high efficiency, low diversity
TestDataGenerator.generatePromptCandidate({
id: 'high-eff',
averageScore: 0.75,
content: 'Short',
generation: 0
}),
// Low performance, medium efficiency, high diversity
TestDataGenerator.generatePromptCandidate({
id: 'high-div',
averageScore: 0.6,
content: 'Experimental approach with novel techniques',
generation: 5
}),
// Balanced trade-off
TestDataGenerator.generatePromptCandidate({
id: 'balanced',
averageScore: 0.8,
content: 'Moderate length prompt',
generation: 2
})
];
await Promise.all(candidates.map(c => paretoFrontier.addCandidate(c)));
});
it('should identify non-dominated solutions across all objectives', () => {
const frontier = paretoFrontier.getFrontier();
expect(frontier.length).toBeGreaterThan(1);
// Each frontier member should be non-dominated
for (let i = 0; i < frontier.length; i++) {
for (let j = 0; j < frontier.length; j++) {
if (i !== j) {
expect(paretoFrontier.isDominated(
frontier[i].candidate,
frontier[j].candidate
)).toBe(false);
}
}
}
});
it('should provide diverse sampling across trade-off regions', async () => {
const samples = [];
for (let i = 0; i < 100; i++) {
const sample = await paretoFrontier.sampleUniform();
if (sample) samples.push(sample);
}
// Should sample from different trade-off regions
const uniqueIds = new Set(samples.map(s => s.id));
expect(uniqueIds.size).toBeGreaterThan(1);
});
it('should compute meaningful hypervolume', () => {
const hypervolume = paretoFrontier.computeHypervolume();
expect(hypervolume).toBeGreaterThan(0);
// Adding a dominated candidate shouldn't increase hypervolume
const dominated = TestDataGenerator.generatePromptCandidate({
averageScore: 0.5,
content: 'Very long and inefficient prompt that performs poorly',
generation: 0
});
paretoFrontier.addCandidate(dominated);
const newHypervolume = paretoFrontier.computeHypervolume();
expect(newHypervolume).toBeLessThanOrEqual(hypervolume);
});
});
describe('Convergence Analysis', () => {
it('should track convergence metrics over time', async () => {
const generations = 10;
const candidatesPerGeneration = 20;
for (let gen = 0; gen < generations; gen++) {
const candidates = TestDataGenerator.generatePromptCandidates(candidatesPerGeneration);
for (const candidate of candidates) {
candidate.generation = gen;
await paretoFrontier.addCandidate(candidate);
}
const metrics = paretoFrontier.getConvergenceMetrics();
expect(metrics.diversity).toBeGreaterThanOrEqual(0);
expect(metrics.spacing).toBeGreaterThanOrEqual(0);
expect(metrics.spread).toBeGreaterThanOrEqual(0);
expect(metrics.hypervolume).toBeGreaterThanOrEqual(0);
}
const finalStats = paretoFrontier.getStatistics();
expect(finalStats.convergenceHistory.length).toBeGreaterThan(0);
});
it('should show improving convergence metrics', async () => {
const initialMetrics = paretoFrontier.getConvergenceMetrics();
// Add high-quality diverse candidates
const qualityCandidates = [
TestDataGenerator.generatePromptCandidate({
averageScore: 0.95,
content: 'A',
generation: 3
}),
TestDataGenerator.generatePromptCandidate({
averageScore: 0.85,
content: 'Very different approach with unique characteristics',
generation: 4
}),
TestDataGenerator.generatePromptCandidate({
averageScore: 0.92,
content: 'Medium length',
generation: 5
})
];
for (const candidate of qualityCandidates) {
await paretoFrontier.addCandidate(candidate);
}
const improvedMetrics = paretoFrontier.getConvergenceMetrics();
// Hypervolume should improve with better candidates
expect(improvedMetrics.hypervolume).toBeGreaterThanOrEqual(initialMetrics.hypervolume);
});
});
describe('Archive Optimization', () => {
it('should optimize archive size while preserving diversity', async () => {
// Fill frontier with many candidates
const candidates = TestDataGenerator.generatePromptCandidates(50);
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
const originalSize = paretoFrontier.size();
const targetSize = Math.min(10, originalSize);
if (originalSize > targetSize) {
const optimizedArchive = await paretoFrontier.optimizeArchive(targetSize);
expect(optimizedArchive.length).toBeLessThanOrEqual(targetSize);
expect(optimizedArchive.length).toBeGreaterThan(0);
// All archived candidates should be from the original frontier
const frontierIds = new Set(paretoFrontier.getFrontier().map(p => p.candidate.id));
for (const point of optimizedArchive) {
expect(frontierIds.has(point.candidate.id)).toBe(true);
}
}
});
});
});
describe('Statistics and Analytics', () => {
beforeEach(async () => {
// Add diverse candidates for meaningful statistics
const candidates = [
...samplePromptCandidates,
...TestDataGenerator.generatePromptCandidates(10)
];
for (const candidate of candidates) {
await paretoFrontier.addCandidate(candidate);
}
});
it('should provide comprehensive frontier statistics', () => {
const stats = paretoFrontier.getStatistics();
expect(stats.totalCandidates).toBeGreaterThan(0);
expect(stats.frontierSize).toBeGreaterThan(0);
expect(stats.frontierSize).toBeLessThanOrEqual(stats.totalCandidates);
expect(stats.averageRank).toBeGreaterThan(0);
expect(stats.objectives.size).toBe(defaultObjectives.length);
// Check objective statistics
for (const obj of defaultObjectives) {
const objStats = stats.objectives.get(obj.name);
expect(objStats).toBeDefined();
expect(objStats!.min).toBeLessThanOrEqual(objStats!.max);
expect(objStats!.mean).toBeGreaterThanOrEqual(objStats!.min);
expect(objStats!.mean).toBeLessThanOrEqual(objStats!.max);
expect(objStats!.std).toBeGreaterThanOrEqual(0);
}
});
it('should track convergence history', () => {
const stats = paretoFrontier.getStatistics();
expect(Array.isArray(stats.convergenceHistory)).toBe(true);
const metrics = paretoFrontier.getConvergenceMetrics();
expect(typeof metrics.diversity).toBe('number');
expect(typeof metrics.spacing).toBe('number');
expect(typeof metrics.spread).toBe('number');
expect(typeof metrics.hypervolume).toBe('number');
expect(typeof metrics.generationalDistance).toBe('number');
});
it('should compute hypervolume with different reference points', () => {
const defaultHV = paretoFrontier.computeHypervolume();
const customReference = new Map<string, number>([
['performance', 0.5],
['efficiency', 1000],
['diversity', 0]
]);
const customHV = paretoFrontier.computeHypervolume(customReference);
expect(defaultHV).toBeGreaterThan(0);
expect(customHV).toBeGreaterThan(0);
// Different reference points should give different hypervolumes
expect(defaultHV).not.toBe(customHV);
});
});
describe('Integration with Sample Data', () => {
it('should work with predefined sample candidates', async () => {
for (const candidate of samplePromptCandidates) {
const added = await paretoFrontier.addCandidate(candidate);
expect(typeof added).toBe('boolean');
}
expect(paretoFrontier.size()).toBeGreaterThan(0);
expect(paretoFrontier.size()).toBeLessThanOrEqual(samplePromptCandidates.length);
// Should be able to sample from frontier
const sample = await paretoFrontier.sampleCandidate();
expect(sample).not.toBeNull();
// Sample should be one of the original candidates
const originalIds = samplePromptCandidates.map(c => c.id);
expect(originalIds).toContain(sample!.id);
});
it('should handle TestDataGenerator candidates', async () => {
const generatedCandidates = TestDataGenerator.generatePromptCandidates(20);
for (const candidate of generatedCandidates) {
const added = await paretoFrontier.addCandidate(candidate);
expect(typeof added).toBe('boolean');
}
expect(paretoFrontier.size()).toBeGreaterThan(0);
// Test various sampling strategies
const uniform = await paretoFrontier.sampleUniform();
const ucb = await paretoFrontier.sampleUCB(1.96);
const epsilon = await paretoFrontier.sampleEpsilonGreedy(0.1);
expect(uniform).not.toBeNull();
expect(ucb).not.toBeNull();
expect(epsilon).not.toBeNull();
});
});
describe('Error Handling and Robustness', () => {
it('should handle concurrent modifications gracefully', async () => {
const candidates = TestDataGenerator.generatePromptCandidates(10);
// Simulate concurrent additions
const promises = candidates.map(candidate =>
paretoFrontier.addCandidate(candidate)
);
const results = await Promise.all(promises);
expect(results.every(r => typeof r === 'boolean')).toBe(true);
expect(paretoFrontier.size()).toBeGreaterThan(0);
expect(paretoFrontier.size()).toBeLessThanOrEqual(candidates.length);
});
it('should validate candidate data integrity', async () => {
const invalidCandidate = {
...TestDataGenerator.generatePromptCandidate(),
averageScore: -1 // Invalid score
};
// Should either reject invalid candidate or handle gracefully
const added = await paretoFrontier.addCandidate(invalidCandidate);
expect(typeof added).toBe('boolean');
});
it('should handle memory constraints gracefully', async () => {
// Create a frontier with very limited size
const constrainedConfig = { ...config, maxSize: 3 };
const constrainedFrontier = new ParetoFrontier(constrainedConfig);
// Add many candidates
const candidates = TestDataGenerator.generatePromptCandidates(20);
for (const candidate of candidates) {
await constrainedFrontier.addCandidate(candidate);
expect(constrainedFrontier.size()).toBeLessThanOrEqual(3);
}
// Should maintain functionality even with constraints
const sample = await constrainedFrontier.sampleCandidate();
expect(sample).not.toBeNull();
});
});
});