// Single Agent Puppet Production Pipeline
// Implements the detailed specification from the attached document
import OpenAI from 'openai';
import fs from 'fs/promises';
import path from 'path';
// Constants (locked specifications)
export const PIPELINE_CONSTANTS = {
ANGLES: ['front', 'left', 'right', 'back', '3q-left', '3q-right'],
EMOTIONS: ['neutral', 'happy', 'sad', 'angry', 'surprised', 'disgust', 'fear', 'smirk'],
MOUTH_STATES: ['closed', 'open-small', 'open-wide', 'tongue-out', 'teeth-showing'],
LIGHTING: 'soft even studio',
BACKGROUND: 'plain light gray',
OUTPUT_SIZE: '1024x1024',
STYLE_LOCK: 'same character proportions, color palette, and materials across all shots',
FRAMING: 'bust',
CAMERA_HEIGHT: 'eye-level'
};
// QC Thresholds
export const QC_THRESHOLDS = {
PALETTE_LOCK: 0.95,
PROPORTIONS_LOCK: 0.97,
ACCESSORIES_PRESENT: 1.0,
ANGLE_MATCH: 0.92,
EMOTION_MATCH: 0.80,
MOUTH_STATE_MATCH: 0.95,
ARTIFACT_CHECK: 0.90,
BACKGROUND_LOCK: 0.98,
OVERALL_PASS_RATE: 0.80
};
// Data Contracts
export interface CharacterIdentifiers {
character_id: string;
name: string;
source_reference: string;
anatomy: {
species_or_type: string;
height_relative: string;
proportions_notes: string;
silhouette_keywords: string[];
};
colors_materials: {
primary_palette: string[];
secondary_palette: string[];
materials: string[];
};
surface_features: {
fur_pattern: string;
scars_markings: string;
eye_details: {
iris_color: string;
pupil_shape: string;
};
mouth_teeth_tongue: {
teeth: string;
tongue: string;
};
};
costume_baseline: {
garment: string;
footwear: string;
logo_text: string | null;
};
accessories: string[];
mechanics: {
mouth_states_allowed: string[];
jaw_hinge_visibility: string;
ear_flex: string;
eye_gaze_rules: string;
};
forbidden_changes: string[];
notes: string;
}
export interface ShotSpec {
character_id: string;
angle: string;
emotion: string;
mouth_state: string;
lighting: string;
background: string;
framing: string;
camera_height: string;
notes: string;
}
export interface QCItem {
filename: string;
shot_spec: Partial<ShotSpec>;
scores: {
palette_lock: number;
proportions_lock: number;
accessories_present: number;
angle_match: number;
emotion_match: number;
mouth_state_match: number;
artifact_check: number;
background_lock: number;
};
status: 'pass' | 'auto-retry' | 'fail';
notes: string;
retry_prompt_delta?: string;
}
export interface QCReport {
batch_id: string;
pass_rate: number;
items: QCItem[];
}
export class SingleAgentPuppetPipeline {
private openai: OpenAI;
private basePath: string;
constructor(openaiApiKey: string, basePath: string = './puppet-pipeline') {
this.openai = new OpenAI({ apiKey: openaiApiKey });
this.basePath = basePath;
this.initializeDirectories();
}
private async initializeDirectories() {
const dirs = [
'01_input/reference',
'02_captions',
'03_specs',
'04_generations',
'05_qc',
'06_delivery'
];
for (const dir of dirs) {
await fs.mkdir(path.join(this.basePath, dir), { recursive: true });
}
}
// Step 1: Enhanced Vision Analysis for Image Conditioning Compensation
async describeReference(referenceImagePath: string): Promise<any> {
const prompt = `You are a forensic character analyst for precise puppet reproduction. Since this description will replace image conditioning, be EXTREMELY detailed.
MANDATORY ANALYSIS (pixel-level precision):
ANATOMY & PROPORTIONS:
- Head-to-body ratio (exact measurements in pixels if visible)
- Limb proportions relative to torso
- Ear shape, size, position, angle, texture
- Tail presence, length, thickness, curve direction
- Overall silhouette outline description
- Posture and stance characteristics
COLOR PALETTE (CRITICAL - provide exact hex codes):
- Primary colors: dominant surface colors with hex values
- Secondary colors: accent colors with hex values
- Eye colors: iris, pupil, whites with hex codes
- Mouth/teeth/tongue: exact colors with hex codes
- Shadow colors: darker variants with hex codes
- Highlight colors: lighter variants with hex codes
MATERIALS & TEXTURES:
- Surface material type (felt, fur, fabric, plastic, etc.)
- Texture direction and pattern
- Reflectivity and sheen characteristics
- Stitching lines: color, pattern, thickness
- Seam locations and visibility
- Fabric weave or fur direction
FACIAL FEATURES (microscopic detail):
- Eye shape: exact geometric description
- Eyelid presence, thickness, color
- Pupil shape: round/oval/other
- Iris patterns or flecks
- Eyelash presence and characteristics
- Eyebrow shape, color, thickness
- Nose/snout: shape, nostril details, coloring
- Mouth shape when closed
- Lip thickness and color
- Teeth visibility, shape, color when visible
- Tongue shape, length, color, texture
DISTINCTIVE FEATURES:
- Scars: location, size, color, texture
- Markings: shape, color, symmetry
- Freckles or spots: distribution pattern
- Asymmetries: any differences between left/right
- Wear patterns or damage
- Unique identifiers
ACCESSORIES & ODDITIES (auto-detect):
- Glasses: frame color, lens type, position
- Hats: style, color, fit, material
- Jewelry: type, material, color, position
- Clothing: exact garments, colors, patterns, fit
- Props or attachments
- Damage or repairs visible
- Stickers, badges, or additional decorations
MECHANICS (puppet-specific):
- Jaw hinge visibility and mechanics
- Mouth opening capability assessment
- Ear flexibility indicators
- Eye movement mechanics if visible
- Control rod attachment points
Respond as JSON: {
"ultra_detailed_description": "exhaustive prose description",
"color_palette": ["#hex1", "#hex2", ...],
"accessories_detected": ["item1", "item2", ...],
"oddities_detected": ["anomaly1", "anomaly2", ...],
"reproduction_critical_features": ["feature1", "feature2", ...]
}`;
try {
const imageData = await fs.readFile(referenceImagePath);
const base64Image = imageData.toString('base64');
const response = await this.openai.chat.completions.create({
model: "gpt-4o",
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "image_url",
image_url: {
url: `data:image/png;base64,${base64Image}`,
detail: "high"
}
}
]
}
],
temperature: 0.2
});
const captionText = response.choices[0].message.content || '';
let captionData;
try {
captionData = JSON.parse(captionText);
} catch {
// Fallback if not JSON
captionData = {
ultra_detailed_description: captionText,
color_palette: [],
accessories_detected: [],
oddities_detected: [],
reproduction_critical_features: []
};
}
// Save caption
const captionPath = path.join(this.basePath, '02_captions', 'character.caption.json');
await fs.writeFile(captionPath, JSON.stringify(captionData, null, 2));
return captionData;
} catch (error) {
console.error('Failed to describe reference:', error);
throw error;
}
}
// Step 2: Build Character Identifiers
async buildIdentifiers(captionData: any, seedIdentifiers?: Partial<CharacterIdentifiers>): Promise<CharacterIdentifiers> {
const prompt = `Merge the caption with optional seed identifiers into final Character Identifiers JSON.
Use this exact schema structure. Fill hex colors from caption palette. Preserve any 'forbidden_changes'.
Return ONLY the JSON object, no commentary.
SCHEMA EXAMPLE:
{
"character_id": "string",
"name": "string",
"source_reference": "path-or-url",
"anatomy": {
"species_or_type": "e.g., puppet gremlin",
"height_relative": "e.g., small",
"proportions_notes": "e.g., large head, short limbs",
"silhouette_keywords": ["rounded ears","tapered snout"]
},
"colors_materials": {
"primary_palette": ["#A1B2C3","#334455"],
"secondary_palette": ["#..."],
"materials": ["felt","faux fur","plastic eyes","stitched mouth"]
},
"surface_features": {
"fur_pattern": "describe zones and direction",
"scars_markings": "none or details",
"eye_details": {"iris_color":"hex","pupil_shape":"round"},
"mouth_teeth_tongue": {"teeth":"flat white","tongue":"pink #F29CB2"}
},
"costume_baseline": {
"garment": "yellow raincoat",
"footwear": "none",
"logo_text": null
},
"accessories": ["round glasses"],
"mechanics": {
"mouth_states_allowed": ["closed","open-small","open-wide","tongue-out","teeth-showing"],
"jaw_hinge_visibility": "hidden",
"ear_flex": "none",
"eye_gaze_rules": "camera unless specified"
},
"forbidden_changes": [
"do not change eye color",
"no new scars",
"preserve fur pattern zones"
],
"notes": "any extra lock-ins"
}`;
const response = await this.openai.chat.completions.create({
model: "gpt-4o",
messages: [
{ role: "system", content: prompt },
{
role: "user",
content: JSON.stringify({
caption: captionData,
seed: seedIdentifiers || {}
})
}
],
temperature: 0
});
const identifiers = JSON.parse(response.choices[0].message.content || '{}') as CharacterIdentifiers;
// Save identifiers
const identifiersPath = path.join(this.basePath, '03_specs', 'identifiers.final.json');
await fs.writeFile(identifiersPath, JSON.stringify(identifiers, null, 2));
return identifiers;
}
// Step 3: Cartesian Expansion - Generate Shot Plan
generateShotPlan(
characterId: string,
angles: string[] = PIPELINE_CONSTANTS.ANGLES,
emotions: string[] = PIPELINE_CONSTANTS.EMOTIONS,
mouthStates: string[] = PIPELINE_CONSTANTS.MOUTH_STATES
): ShotSpec[] {
const shots: ShotSpec[] = [];
for (const angle of angles) {
for (const emotion of emotions) {
for (const mouthState of mouthStates) {
shots.push({
character_id: characterId,
angle,
emotion,
mouth_state: mouthState,
lighting: PIPELINE_CONSTANTS.LIGHTING,
background: PIPELINE_CONSTANTS.BACKGROUND,
framing: PIPELINE_CONSTANTS.FRAMING,
camera_height: PIPELINE_CONSTANTS.CAMERA_HEIGHT,
notes: "lock palette and proportions; no background props"
});
}
}
}
return shots;
}
// Generate retry prompt delta for failed QC items
generateRetryPromptDelta(qcResult: QCItem): string {
const failedCategories = Object.entries(qcResult.scores)
.filter(([key, score]) => {
const threshold = QC_THRESHOLDS[key.toUpperCase() as keyof typeof QC_THRESHOLDS];
return threshold && score < threshold;
})
.map(([key]) => key);
const corrections = [];
if (failedCategories.includes('palette_lock')) {
corrections.push('ENFORCE EXACT COLOR MATCHING - do not deviate from specified hex codes');
}
if (failedCategories.includes('proportions_lock')) {
corrections.push('MAINTAIN PRECISE PROPORTIONS - do not alter head/body ratios or limb lengths');
}
if (failedCategories.includes('accessories_present')) {
corrections.push('INCLUDE ALL ACCESSORIES - ensure glasses, hats, jewelry are visible');
}
if (failedCategories.includes('angle_match')) {
corrections.push('CORRECT CAMERA ANGLE - adjust positioning to match specified angle exactly');
}
if (failedCategories.includes('emotion_match')) {
corrections.push('ACCURATE FACIAL EXPRESSION - ensure emotion is clearly displayed');
}
if (failedCategories.includes('mouth_state_match')) {
corrections.push('PRECISE MOUTH POSITION - match specified mouth state exactly');
}
if (failedCategories.includes('background_lock')) {
corrections.push('PLAIN GRAY BACKGROUND ONLY - remove all objects and textures');
}
return corrections.join('. ');
}
// Step 4: Enhanced OpenAI DALL-E 3 Shot Generation with Ultra-Detailed Prompts
async generateShot(
referenceImagePath: string,
identifiers: CharacterIdentifiers,
shot: ShotSpec,
outputPath: string,
ultraDetailedDescription?: string,
retryPromptDelta?: string
): Promise<void> {
const baseGuardrails = [
"CRITICAL: Preserve exact colors and materials from character identifiers",
"MANDATORY: Do not change eye color, fur pattern zones, or garment details",
"REQUIRED: Maintain exact proportions and silhouette measurements",
"ESSENTIAL: Plain light-gray background only, absolutely no props or objects",
"VITAL: Same character across all shots - no variations in appearance",
...identifiers.forbidden_changes
];
// Add retry-specific guardrails if this is a retry
const allGuardrails = retryPromptDelta
? [...baseGuardrails, `RETRY CORRECTION: ${retryPromptDelta}`]
: baseGuardrails;
// Enhanced prompt with ultra-detailed reconstruction to compensate for lack of image conditioning
const prompt = `Generate a professional studio photograph of a puppet character with these EXACT specifications:
ULTRA-DETAILED CHARACTER RECONSTRUCTION (CRITICAL - must match exactly):
${ultraDetailedDescription || identifiers.notes}
SHOT REQUIREMENTS:
- Camera angle: ${shot.angle} (precise positioning)
- Facial expression: ${shot.emotion} (accurate emotion display)
- Mouth position: ${shot.mouth_state} (exact mouth configuration)
- Framing: ${shot.framing} (standard puppet photography framing)
- Camera height: ${shot.camera_height} (professional level positioning)
- Studio lighting: ${shot.lighting} (even, professional illumination)
- Background: ${shot.background} (solid color, no textures or objects)
COLOR PALETTE (EXACT MATCHES REQUIRED):
Primary colors: ${identifiers.colors_materials.primary_palette.join(', ')}
Secondary colors: ${identifiers.colors_materials.secondary_palette.join(', ')}
Materials: ${identifiers.colors_materials.materials.join(', ')}
ANATOMY SPECIFICATIONS:
Species/Type: ${identifiers.anatomy.species_or_type}
Proportions: ${identifiers.anatomy.proportions_notes}
Silhouette keywords: ${identifiers.anatomy.silhouette_keywords.join(', ')}
SURFACE FEATURES (MUST REPLICATE):
Fur pattern: ${identifiers.surface_features.fur_pattern}
Scars/markings: ${identifiers.surface_features.scars_markings}
Eye details: ${JSON.stringify(identifiers.surface_features.eye_details)}
Mouth/teeth/tongue: ${JSON.stringify(identifiers.surface_features.mouth_teeth_tongue)}
COSTUME & ACCESSORIES (EXACT REPLICATION):
Garment: ${identifiers.costume_baseline.garment}
Footwear: ${identifiers.costume_baseline.footwear}
Logo/text: ${identifiers.costume_baseline.logo_text || 'none'}
Accessories: ${identifiers.accessories.join(', ') || 'none'}
PUPPET MECHANICS:
Mouth states allowed: ${identifiers.mechanics.mouth_states_allowed.join(', ')}
Jaw hinge: ${identifiers.mechanics.jaw_hinge_visibility}
Ear flexibility: ${identifiers.mechanics.ear_flex}
Eye gaze: ${identifiers.mechanics.eye_gaze_rules}
CRITICAL GUARDRAILS (NEVER VIOLATE):
${allGuardrails.join('\n- ')}
TECHNICAL SPECIFICATIONS:
- Studio photography style
- High resolution, crisp details
- Professional puppet photography
- Consistent character representation
- ${shot.notes}
Generate this character exactly as described. Any deviation from colors, proportions, or features is unacceptable.`;
try {
console.log(`🎨 Generating ${shot.angle}-${shot.emotion}-${shot.mouth_state} with OpenAI DALL-E 3...`);
const response = await this.openai.images.generate({
model: "dall-e-3",
prompt: prompt,
size: "1024x1024",
quality: "hd", // Use HD quality for better detail reproduction
style: "natural", // Natural style for puppet photography
n: 1
});
// Download generated image
if (!response.data || !response.data[0] || !response.data[0].url) {
throw new Error('No image URL returned from OpenAI DALL-E 3');
}
const imageUrl = response.data[0].url;
const imageResponse = await fetch(imageUrl);
const imageBuffer = await imageResponse.arrayBuffer();
await fs.mkdir(path.dirname(outputPath), { recursive: true });
await fs.writeFile(outputPath, Buffer.from(imageBuffer));
} catch (error) {
console.error('Failed to generate shot with OpenAI DALL-E 3:', error);
throw error;
}
}
// Step 5: Enhanced QC with Quantitative Analysis
async qcImage(
imagePath: string,
identifiers: CharacterIdentifiers,
shot: ShotSpec
): Promise<QCItem> {
const qcPrompt = `Evaluate if this generated image matches the Character Identifiers and Shot Spec with QUANTITATIVE precision.
CRITICAL SCORING (0.0 to 1.0):
1. PALETTE_LOCK: Compare colors to these exact hex codes: ${identifiers.colors_materials.primary_palette.join(', ')}.
Score 1.0 only if colors match within ±5% HSV tolerance. Threshold: ${QC_THRESHOLDS.PALETTE_LOCK}
2. PROPORTIONS_LOCK: Measure head/body ratio against: ${identifiers.anatomy.proportions_notes}.
Score 1.0 only if ratios are within ±3% of baseline. Threshold: ${QC_THRESHOLDS.PROPORTIONS_LOCK}
3. ACCESSORIES_PRESENT: Check for: ${identifiers.accessories.join(', ')}.
Score 1.0 only if ALL accessories visible. Binary pass/fail. Threshold: ${QC_THRESHOLDS.ACCESSORIES_PRESENT}
4. ANGLE_MATCH: Camera angle should be: ${shot.angle}.
Score 1.0 if head yaw/pitch/roll within ±10°. Threshold: ${QC_THRESHOLDS.ANGLE_MATCH}
5. EMOTION_MATCH: Expression should be: ${shot.emotion}.
Score based on facial feature positioning accuracy. Threshold: ${QC_THRESHOLDS.EMOTION_MATCH}
6. MOUTH_STATE_MATCH: Mouth should be: ${shot.mouth_state}.
Score 1.0 only if exact state visible. Threshold: ${QC_THRESHOLDS.MOUTH_STATE_MATCH}
7. ARTIFACT_CHECK: Look for extra limbs, distortions, melting edges.
Score 1.0 if clean image. Threshold: ${QC_THRESHOLDS.ARTIFACT_CHECK}
8. BACKGROUND_LOCK: Should be plain light gray, no props.
Score 1.0 if uniformity >98%. Threshold: ${QC_THRESHOLDS.BACKGROUND_LOCK}
Return strict JSON: {
"scores": {
"palette_lock": 0.0-1.0,
"proportions_lock": 0.0-1.0,
"accessories_present": 0.0-1.0,
"angle_match": 0.0-1.0,
"emotion_match": 0.0-1.0,
"mouth_state_match": 0.0-1.0,
"artifact_check": 0.0-1.0,
"background_lock": 0.0-1.0
},
"specific_issues": ["list exact problems found"],
"measurements": {
"color_deviations": ["#actual vs #expected"],
"proportion_ratios": "measured ratios",
"missing_accessories": ["list missing items"]
}
}
IDENTIFIERS: ${JSON.stringify(identifiers)}
SHOT SPEC: ${JSON.stringify(shot)}`;
try {
const imageData = await fs.readFile(imagePath);
const base64Image = imageData.toString('base64');
const response = await this.openai.chat.completions.create({
model: "gpt-4o",
messages: [
{
role: "user",
content: [
{ type: "text", text: qcPrompt },
{
type: "image_url",
image_url: {
url: `data:image/png;base64,${base64Image}`,
detail: "high"
}
}
]
}
],
temperature: 0
});
const qcText = response.choices[0].message.content || '';
let qcData;
try {
qcData = JSON.parse(qcText);
} catch {
// Fallback parsing
qcData = {
scores: {
palette_lock: 0.85,
proportions_lock: 0.90,
accessories_present: 0.95,
angle_match: 0.88,
emotion_match: 0.82,
mouth_state_match: 0.90,
artifact_check: 0.92,
background_lock: 0.95
},
specific_issues: ["Parsing failed - manual review needed"],
measurements: {}
};
}
// Determine status based on scores and thresholds
const scores = qcData.scores;
const failedChecks = Object.entries(QC_THRESHOLDS).filter(([key, threshold]) => {
const scoreKey = key.toLowerCase();
return scores[scoreKey] && scores[scoreKey] < threshold;
});
let status: 'pass' | 'auto-retry' | 'fail';
if (failedChecks.length === 0) {
status = 'pass';
} else if (failedChecks.length <= 2 && failedChecks.every(([key]) => key !== 'ACCESSORIES_PRESENT')) {
status = 'auto-retry';
} else {
status = 'fail';
}
const issues = qcData.specific_issues || [];
const measurements = qcData.measurements || {};
return {
filename: path.basename(imagePath),
shot_spec: {
angle: shot.angle,
emotion: shot.emotion,
mouth_state: shot.mouth_state
},
scores: scores,
status,
notes: issues.join('; ') + (measurements ? ` | Measurements: ${JSON.stringify(measurements)}` : ''),
retry_prompt_delta: status === 'auto-retry' ? this.generateRetryPromptDelta({
filename: path.basename(imagePath),
shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state },
scores,
status,
notes: ''
}) : undefined
};
} catch (error) {
console.error('QC failed:', error);
return {
filename: path.basename(imagePath),
shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state },
scores: {
palette_lock: 0,
proportions_lock: 0,
accessories_present: 0,
angle_match: 0,
emotion_match: 0,
mouth_state_match: 0,
artifact_check: 0,
background_lock: 0
},
status: 'fail',
notes: 'QC analysis failed'
};
}
}
// Selective regeneration for corrections loop
async regenerateFailedShots(
qcReport: QCReport,
referenceImagePath: string,
identifiers: CharacterIdentifiers,
shots: ShotSpec[],
ultraDetailedDescription: string,
userConstraints?: string[]
): Promise<QCReport> {
console.log('🔄 Regenerating failed shots...');
const failedItems = qcReport.items.filter(item => item.status === 'fail' || item.status === 'auto-retry');
const updatedItems = [...qcReport.items];
for (const failedItem of failedItems) {
console.log(`Regenerating: ${failedItem.filename}`);
// Find matching shot spec
const shot = shots.find(s =>
s.angle === failedItem.shot_spec.angle &&
s.emotion === failedItem.shot_spec.emotion &&
s.mouth_state === failedItem.shot_spec.mouth_state
);
if (!shot) continue;
// Generate enhanced constraints
const enhancedConstraints = [
failedItem.retry_prompt_delta || '',
...(userConstraints || [])
].filter(Boolean).join('. ');
// Regenerate with enhanced prompts using exact same filename format as main pipeline
const filename = `angle=${shot.angle}\\emotion=${shot.emotion}\\mouth=${shot.mouth_state}\\${identifiers.character_id}.png`;
const outputPath = path.join(this.basePath, '04_generations', identifiers.character_id, filename);
try {
await this.generateShot(referenceImagePath, identifiers, shot, outputPath, ultraDetailedDescription, enhancedConstraints);
// Re-QC
const newQcResult = await this.qcImage(outputPath, identifiers, shot);
// Update results - ensure filename consistency for tracking
newQcResult.filename = filename; // Use canonical filename
const itemIndex = updatedItems.findIndex(item =>
item.shot_spec.angle === failedItem.shot_spec.angle &&
item.shot_spec.emotion === failedItem.shot_spec.emotion &&
item.shot_spec.mouth_state === failedItem.shot_spec.mouth_state
);
if (itemIndex >= 0) {
updatedItems[itemIndex] = newQcResult;
}
} catch (error) {
console.error(`Failed to regenerate ${filename}:`, error);
}
}
// Calculate new pass rate
const passCount = updatedItems.filter(item => item.status === 'pass').length;
const newPassRate = passCount / updatedItems.length;
const updatedReport: QCReport = {
batch_id: `corrected_${qcReport.batch_id}`,
pass_rate: newPassRate,
items: updatedItems
};
// Save updated QC report
const qcPath = path.join(this.basePath, '05_qc', 'qc_report_corrected.json');
await fs.writeFile(qcPath, JSON.stringify(updatedReport, null, 2));
return updatedReport;
}
// Step 6: Run Complete Pipeline
async runPipeline(
referenceImagePath: string,
characterName: string,
options: {
angles?: string[];
emotions?: string[];
mouthStates?: string[];
proofOfConcept?: boolean;
} = {}
): Promise<QCReport> {
console.log('🎭 Starting Single Agent Puppet Pipeline...');
// Use proof-of-concept defaults or full sets
const angles = options.proofOfConcept ? ['front', '3q-left'] : (options.angles || PIPELINE_CONSTANTS.ANGLES);
const emotions = options.proofOfConcept ? ['neutral', 'happy', 'angry'] : (options.emotions || PIPELINE_CONSTANTS.EMOTIONS);
const mouthStates = options.proofOfConcept ? ['closed'] : (options.mouthStates || PIPELINE_CONSTANTS.MOUTH_STATES);
// Step 1: Describe reference
console.log('📝 Step 1: Analyzing reference image...');
const caption = await this.describeReference(referenceImagePath);
// Step 2: Build identifiers
console.log('🔍 Step 2: Building character identifiers...');
const identifiers = await this.buildIdentifiers(caption);
identifiers.character_id = identifiers.character_id || `puppet_${Date.now()}`;
identifiers.name = characterName;
// Step 3: Generate shot plan
console.log('📋 Step 3: Generating shot plan...');
const shots = this.generateShotPlan(identifiers.character_id, angles, emotions, mouthStates);
const planPath = path.join(this.basePath, '03_specs', 'shots.plan.json');
await fs.writeFile(planPath, JSON.stringify({ shots }, null, 2));
console.log(`📸 Generating ${shots.length} images...`);
// Step 4: Generate all shots with enhanced prompting and auto-retry
const qcItems: QCItem[] = [];
const batchId = `batch_${Date.now()}`;
const ultraDetailedDescription = caption.ultra_detailed_description || caption.description;
for (const [index, shot] of shots.entries()) {
console.log(`Generating ${index + 1}/${shots.length}: ${shot.angle}-${shot.emotion}-${shot.mouth_state}`);
// Use exact file naming format from specification: angle=<X>\emotion=<Y>\mouth=<Z>\<character_id>.png
const filename = `angle=${shot.angle}\\emotion=${shot.emotion}\\mouth=${shot.mouth_state}\\${identifiers.character_id}.png`;
const outputPath = path.join(this.basePath, '04_generations', identifiers.character_id, filename);
try {
await this.generateShot(referenceImagePath, identifiers, shot, outputPath, ultraDetailedDescription);
// Step 5: QC the generated image with auto-retry
let qcResult = await this.qcImage(outputPath, identifiers, shot);
// Auto-retry mechanism for failed shots
if (qcResult.status === 'auto-retry') {
console.log(`Auto-retrying ${filename} with enhanced prompt...`);
const retryPromptDelta = this.generateRetryPromptDelta(qcResult);
const retryFilename = `angle=${shot.angle}\\emotion=${shot.emotion}\\mouth=${shot.mouth_state}\\${identifiers.character_id}_retry.png`;
const retryOutputPath = path.join(this.basePath, '04_generations', identifiers.character_id, retryFilename);
await this.generateShot(referenceImagePath, identifiers, shot, retryOutputPath, ultraDetailedDescription, retryPromptDelta);
// Re-QC the retry
const retryQcResult = await this.qcImage(retryOutputPath, identifiers, shot);
if (retryQcResult.status === 'pass') {
// Use retry version by replacing original
await fs.rename(retryOutputPath, outputPath);
qcResult = retryQcResult;
// Keep canonical filename for consistent tracking
qcResult.filename = filename;
} else {
// Keep original, but update filename to match actual QC'd file
qcResult.filename = retryFilename;
qcResult.notes += ` | Retry attempted but still failed: ${retryQcResult.notes}`;
}
}
qcItems.push(qcResult);
} catch (error) {
console.error(`Failed to generate ${filename}:`, error);
qcItems.push({
filename,
shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state },
scores: {
palette_lock: 0, proportions_lock: 0, accessories_present: 0,
angle_match: 0, emotion_match: 0, mouth_state_match: 0,
artifact_check: 0, background_lock: 0
},
status: 'fail',
notes: 'Generation failed'
});
}
}
// Step 6: Compile QC Report
const passCount = qcItems.filter(item => item.status === 'pass').length;
const passRate = passCount / qcItems.length;
const qcReport: QCReport = {
batch_id: batchId,
pass_rate: passRate,
items: qcItems
};
// Save QC report
const qcPath = path.join(this.basePath, '05_qc', 'qc_report.json');
await fs.writeFile(qcPath, JSON.stringify(qcReport, null, 2));
// Generate review sheet
await this.generateReviewSheet(qcReport, identifiers);
console.log(`✅ Pipeline complete! Pass rate: ${(passRate * 100).toFixed(1)}%`);
console.log(`📊 Results: ${passCount} passed, ${qcItems.length - passCount} failed`);
return qcReport;
}
private async generateReviewSheet(qcReport: QCReport, identifiers: CharacterIdentifiers): Promise<void> {
const reviewContent = `# Puppet Production Review Sheet
## Character: ${identifiers.name}
**Character ID:** ${identifiers.character_id}
**Batch ID:** ${qcReport.batch_id}
**Overall Pass Rate:** ${(qcReport.pass_rate * 100).toFixed(1)}%
## Results Summary
- **Passed:** ${qcReport.items.filter(i => i.status === 'pass').length}
- **Auto-retry needed:** ${qcReport.items.filter(i => i.status === 'auto-retry').length}
- **Failed:** ${qcReport.items.filter(i => i.status === 'fail').length}
## Quantitative Analysis
${qcReport.items
.filter(item => item.status !== 'pass')
.map(item => `### ${item.filename}
**Status:** ${item.status}
**Issues:** ${item.notes}
**Scores:**
${Object.entries(item.scores)
.map(([key, score]) => `- ${key}: ${(score * 100).toFixed(1)}% (threshold: ${(QC_THRESHOLDS[key.toUpperCase() as keyof typeof QC_THRESHOLDS] || 0) * 100}%)`)
.join('\n')}`)
.join('\n\n')}
## Character Identifiers
\`\`\`json
${JSON.stringify(identifiers, null, 2)}
\`\`\`
Generated: ${new Date().toISOString()}
`;
const reviewPath = path.join(this.basePath, '06_delivery', 'review_sheet.md');
await fs.writeFile(reviewPath, reviewContent);
}
}
// Hybrid Workflow Extensions: Affogato + ElevenLabs
export interface AffogatoCharacterData {
character_id: string;
asset_id: string;
best_puppet_image: string;
created_at: string;
}
export interface SceneGenerationRequest {
character_id: string;
scene_prompt: string;
output_path: string;
style?: string;
quality?: 'Plus' | 'Regular';
}
export interface VoiceVideoRequest {
image_path: string;
script: string;
voice_id: string;
output_path: string;
duration?: number;
}
// Extended pipeline with hybrid workflow
export class HybridPuppetPipeline extends SingleAgentPuppetPipeline {
async createAffogatoCharacter(
characterName: string,
bestPuppetImagePath: string,
characterDescription: string
): Promise<AffogatoCharacterData> {
try {
// Import Affogato client
const { AffogatoClient } = await import('./integrations/affogato-client.js');
const affogatoClient = new AffogatoClient(process.env.AFFOGATO_API_KEY!);
console.log('📤 Uploading best puppet image to Affogato...');
const assetData = await affogatoClient.uploadAsset(bestPuppetImagePath);
console.log('🎭 Creating Affogato character for scene consistency...');
const characterData = await affogatoClient.createCharacter(
assetData.id,
characterName,
characterDescription,
'realistic'
);
return {
character_id: characterData.character_id,
asset_id: assetData.id,
best_puppet_image: bestPuppetImagePath,
created_at: new Date().toISOString()
};
} catch (error) {
console.error('Failed to create Affogato character:', error);
throw error;
}
}
async generateSceneImage(
affogatoCharacter: AffogatoCharacterData,
sceneRequest: SceneGenerationRequest
): Promise<void> {
try {
const { AffogatoClient } = await import('./integrations/affogato-client.js');
const affogatoClient = new AffogatoClient(process.env.AFFOGATO_API_KEY!);
console.log(`🎬 Generating scene: ${sceneRequest.scene_prompt}`);
// Use new generateSceneImage method following guidance notes
const response = await affogatoClient.generateSceneImage(
affogatoCharacter.character_id,
sceneRequest.scene_prompt,
'16:9', // Use 16:9 aspect ratio from guidance
'strong' // Use 'strong' mode for FaceLock consistency
);
if (!response.data?.media || !response.data.media[0]) {
throw new Error('No media returned from Affogato scene generation');
}
const mediaId = response.data.media[0].id;
await this.waitForAffogatoGeneration(affogatoClient, mediaId, sceneRequest.output_path);
} catch (error) {
console.error('Failed to generate scene image:', error);
throw error;
}
}
async createVoiceVideo(
voiceRequest: VoiceVideoRequest
): Promise<void> {
try {
console.log(`🎤 Creating voice video with ElevenLabs...`);
// Generate audio with ElevenLabs using guidance notes pattern
// Import the corrected ElevenLabsClient
const { ElevenLabsClient } = await import('./integrations/elevenlabs-client.js') as any;
const elevenLabsClient = new ElevenLabsClient();
// Use make_tts following guidance notes EXACTLY: make_tts(text, out_mp3, voice_id, stability=0.3, similarity=0.7)
const tempAudioPath = voiceRequest.output_path.replace('.mp4', '.mp3');
await elevenLabsClient.make_tts(voiceRequest.script, tempAudioPath, voiceRequest.voice_id, 0.3, 0.7);
console.log(`✅ Audio generated using guidance pattern: ${tempAudioPath}`);
// For now, we'll use Affogato's narrator feature to combine image + audio
// Upload the image and audio to Affogato for video creation
const { AffogatoClient } = await import('./integrations/affogato-client.js');
const affogatoClient = new AffogatoClient(process.env.AFFOGATO_API_KEY!);
// Use the image from previous generation and create lipsync video following guidance notes
const imageAsset = await affogatoClient.uploadAsset(voiceRequest.image_path);
// Use generateLipsyncVideo method with narrator feature from guidance notes
const response = await affogatoClient.generateLipsyncVideo(
imageAsset.url || voiceRequest.image_path,
{
audio_file: tempAudioPath,
start_time: 0,
end_time: voiceRequest.duration || 5
},
voiceRequest.script,
'16:9'
);
if (!response.data?.media || !response.data.media[0]) {
throw new Error('No video media returned from Affogato');
}
const mediaId = response.data.media[0].id;
await this.waitForAffogatoGeneration(affogatoClient, mediaId, voiceRequest.output_path);
// Clean up temporary audio file
await fs.unlink(tempAudioPath).catch(() => {});
} catch (error) {
console.error('Failed to create voice video:', error);
throw error;
}
}
private async waitForAffogatoGeneration(client: any, mediaId: string, outputPath: string): Promise<void> {
const maxAttempts = 30;
const delayMs = 10000;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
try {
const statusResponse = await client.makeApiRequest(`/pub/v1/media/${mediaId}`, null, 'GET');
if (statusResponse.status === 'completed' && statusResponse.image_url) {
const imageResponse = await fetch(statusResponse.image_url);
const imageBuffer = await imageResponse.arrayBuffer();
await fs.mkdir(path.dirname(outputPath), { recursive: true });
await fs.writeFile(outputPath, Buffer.from(imageBuffer));
console.log(`✅ Downloaded: ${path.basename(outputPath)}`);
return;
}
if (statusResponse.status === 'failed') {
throw new Error(`Generation failed: ${statusResponse.error || 'Unknown error'}`);
}
console.log(`⏳ Generation in progress... (${attempt + 1}/${maxAttempts})`);
await new Promise(resolve => setTimeout(resolve, delayMs));
} catch (error: any) {
if (attempt === maxAttempts - 1) {
throw new Error(`Failed to get generation status: ${error.message}`);
}
await new Promise(resolve => setTimeout(resolve, delayMs));
}
}
throw new Error('Generation timeout - max wait time exceeded');
}
}
// Export for MCP tool integration
export async function runSingleAgentPipeline(args: {
reference_image_path: string;
character_name: string;
proof_of_concept?: boolean;
angles?: string[];
emotions?: string[];
mouth_states?: string[];
}): Promise<QCReport> {
const openaiApiKey = process.env.OPENAI_API_KEY;
if (!openaiApiKey) {
throw new Error('OPENAI_API_KEY environment variable required');
}
const pipeline = new SingleAgentPuppetPipeline(openaiApiKey);
return await pipeline.runPipeline(args.reference_image_path, args.character_name, {
proofOfConcept: args.proof_of_concept,
angles: args.angles,
emotions: args.emotions,
mouthStates: args.mouth_states
});
}
// Hybrid workflow export
export async function runHybridPuppetPipeline(args: {
reference_image_path: string;
character_name: string;
proof_of_concept?: boolean;
create_affogato_character?: boolean;
scene_generations?: SceneGenerationRequest[];
voice_videos?: VoiceVideoRequest[];
}): Promise<{
puppet_qc_report: QCReport;
affogato_character?: AffogatoCharacterData;
scene_results?: string[];
video_results?: string[];
}> {
const pipeline = new HybridPuppetPipeline(process.env.OPENAI_API_KEY!);
// Step 1: Create core puppet with OpenAI
console.log('🎭 Phase 1: Creating core puppet with OpenAI...');
const puppetReport = await pipeline.runPipeline(args.reference_image_path, args.character_name, {
proofOfConcept: args.proof_of_concept
});
let affogatoCharacter: AffogatoCharacterData | undefined;
let sceneResults: string[] = [];
let videoResults: string[] = [];
// Step 2: Create Affogato character from best puppet
if (args.create_affogato_character && puppetReport.items.length > 0) {
console.log('🎬 Phase 2: Creating Affogato character for scene consistency...');
// Find best scoring puppet image
const bestPuppet = puppetReport.items
.filter((r: any) => r.status === 'pass')
.sort((a: any, b: any) => b.scores?.overall_average - a.scores?.overall_average)[0];
if (bestPuppet) {
affogatoCharacter = await pipeline.createAffogatoCharacter(
args.character_name,
bestPuppet.filename,
bestPuppet.notes || `Character: ${args.character_name}`
);
}
}
// Step 3: Generate scene images with character consistency
if (affogatoCharacter && args.scene_generations) {
console.log('🎬 Phase 3: Generating consistent character scenes...');
for (const sceneRequest of args.scene_generations) {
try {
await pipeline.generateSceneImage(affogatoCharacter, sceneRequest);
sceneResults.push(sceneRequest.output_path);
} catch (error) {
console.error(`Failed to generate scene: ${sceneRequest.scene_prompt}`, error);
}
}
}
// Step 4: Create voice videos
if (args.voice_videos) {
console.log('🎤 Phase 4: Creating voice videos...');
for (const voiceRequest of args.voice_videos) {
try {
await pipeline.createVoiceVideo(voiceRequest);
videoResults.push(voiceRequest.output_path);
} catch (error) {
console.error(`Failed to create voice video: ${voiceRequest.script}`, error);
}
}
}
return {
puppet_qc_report: puppetReport,
affogato_character: affogatoCharacter,
scene_results: sceneResults,
video_results: videoResults
};
}