Skip to main content
Glama
single-agent-pipeline.ts41.8 kB
// Single Agent Puppet Production Pipeline // Implements the detailed specification from the attached document import OpenAI from 'openai'; import fs from 'fs/promises'; import path from 'path'; // Constants (locked specifications) export const PIPELINE_CONSTANTS = { ANGLES: ['front', 'left', 'right', 'back', '3q-left', '3q-right'], EMOTIONS: ['neutral', 'happy', 'sad', 'angry', 'surprised', 'disgust', 'fear', 'smirk'], MOUTH_STATES: ['closed', 'open-small', 'open-wide', 'tongue-out', 'teeth-showing'], LIGHTING: 'soft even studio', BACKGROUND: 'plain light gray', OUTPUT_SIZE: '1024x1024', STYLE_LOCK: 'same character proportions, color palette, and materials across all shots', FRAMING: 'bust', CAMERA_HEIGHT: 'eye-level' }; // QC Thresholds export const QC_THRESHOLDS = { PALETTE_LOCK: 0.95, PROPORTIONS_LOCK: 0.97, ACCESSORIES_PRESENT: 1.0, ANGLE_MATCH: 0.92, EMOTION_MATCH: 0.80, MOUTH_STATE_MATCH: 0.95, ARTIFACT_CHECK: 0.90, BACKGROUND_LOCK: 0.98, OVERALL_PASS_RATE: 0.80 }; // Data Contracts export interface CharacterIdentifiers { character_id: string; name: string; source_reference: string; anatomy: { species_or_type: string; height_relative: string; proportions_notes: string; silhouette_keywords: string[]; }; colors_materials: { primary_palette: string[]; secondary_palette: string[]; materials: string[]; }; surface_features: { fur_pattern: string; scars_markings: string; eye_details: { iris_color: string; pupil_shape: string; }; mouth_teeth_tongue: { teeth: string; tongue: string; }; }; costume_baseline: { garment: string; footwear: string; logo_text: string | null; }; accessories: string[]; mechanics: { mouth_states_allowed: string[]; jaw_hinge_visibility: string; ear_flex: string; eye_gaze_rules: string; }; forbidden_changes: string[]; notes: string; } export interface ShotSpec { character_id: string; angle: string; emotion: string; mouth_state: string; lighting: string; background: string; framing: string; camera_height: string; notes: string; } export interface QCItem { filename: string; shot_spec: Partial<ShotSpec>; scores: { palette_lock: number; proportions_lock: number; accessories_present: number; angle_match: number; emotion_match: number; mouth_state_match: number; artifact_check: number; background_lock: number; }; status: 'pass' | 'auto-retry' | 'fail'; notes: string; retry_prompt_delta?: string; } export interface QCReport { batch_id: string; pass_rate: number; items: QCItem[]; } export class SingleAgentPuppetPipeline { private openai: OpenAI; private basePath: string; constructor(openaiApiKey: string, basePath: string = './puppet-pipeline') { this.openai = new OpenAI({ apiKey: openaiApiKey }); this.basePath = basePath; this.initializeDirectories(); } private async initializeDirectories() { const dirs = [ '01_input/reference', '02_captions', '03_specs', '04_generations', '05_qc', '06_delivery' ]; for (const dir of dirs) { await fs.mkdir(path.join(this.basePath, dir), { recursive: true }); } } // Step 1: Enhanced Vision Analysis for Image Conditioning Compensation async describeReference(referenceImagePath: string): Promise<any> { const prompt = `You are a forensic character analyst for precise puppet reproduction. Since this description will replace image conditioning, be EXTREMELY detailed. MANDATORY ANALYSIS (pixel-level precision): ANATOMY & PROPORTIONS: - Head-to-body ratio (exact measurements in pixels if visible) - Limb proportions relative to torso - Ear shape, size, position, angle, texture - Tail presence, length, thickness, curve direction - Overall silhouette outline description - Posture and stance characteristics COLOR PALETTE (CRITICAL - provide exact hex codes): - Primary colors: dominant surface colors with hex values - Secondary colors: accent colors with hex values - Eye colors: iris, pupil, whites with hex codes - Mouth/teeth/tongue: exact colors with hex codes - Shadow colors: darker variants with hex codes - Highlight colors: lighter variants with hex codes MATERIALS & TEXTURES: - Surface material type (felt, fur, fabric, plastic, etc.) - Texture direction and pattern - Reflectivity and sheen characteristics - Stitching lines: color, pattern, thickness - Seam locations and visibility - Fabric weave or fur direction FACIAL FEATURES (microscopic detail): - Eye shape: exact geometric description - Eyelid presence, thickness, color - Pupil shape: round/oval/other - Iris patterns or flecks - Eyelash presence and characteristics - Eyebrow shape, color, thickness - Nose/snout: shape, nostril details, coloring - Mouth shape when closed - Lip thickness and color - Teeth visibility, shape, color when visible - Tongue shape, length, color, texture DISTINCTIVE FEATURES: - Scars: location, size, color, texture - Markings: shape, color, symmetry - Freckles or spots: distribution pattern - Asymmetries: any differences between left/right - Wear patterns or damage - Unique identifiers ACCESSORIES & ODDITIES (auto-detect): - Glasses: frame color, lens type, position - Hats: style, color, fit, material - Jewelry: type, material, color, position - Clothing: exact garments, colors, patterns, fit - Props or attachments - Damage or repairs visible - Stickers, badges, or additional decorations MECHANICS (puppet-specific): - Jaw hinge visibility and mechanics - Mouth opening capability assessment - Ear flexibility indicators - Eye movement mechanics if visible - Control rod attachment points Respond as JSON: { "ultra_detailed_description": "exhaustive prose description", "color_palette": ["#hex1", "#hex2", ...], "accessories_detected": ["item1", "item2", ...], "oddities_detected": ["anomaly1", "anomaly2", ...], "reproduction_critical_features": ["feature1", "feature2", ...] }`; try { const imageData = await fs.readFile(referenceImagePath); const base64Image = imageData.toString('base64'); const response = await this.openai.chat.completions.create({ model: "gpt-4o", messages: [ { role: "user", content: [ { type: "text", text: prompt }, { type: "image_url", image_url: { url: `data:image/png;base64,${base64Image}`, detail: "high" } } ] } ], temperature: 0.2 }); const captionText = response.choices[0].message.content || ''; let captionData; try { captionData = JSON.parse(captionText); } catch { // Fallback if not JSON captionData = { ultra_detailed_description: captionText, color_palette: [], accessories_detected: [], oddities_detected: [], reproduction_critical_features: [] }; } // Save caption const captionPath = path.join(this.basePath, '02_captions', 'character.caption.json'); await fs.writeFile(captionPath, JSON.stringify(captionData, null, 2)); return captionData; } catch (error) { console.error('Failed to describe reference:', error); throw error; } } // Step 2: Build Character Identifiers async buildIdentifiers(captionData: any, seedIdentifiers?: Partial<CharacterIdentifiers>): Promise<CharacterIdentifiers> { const prompt = `Merge the caption with optional seed identifiers into final Character Identifiers JSON. Use this exact schema structure. Fill hex colors from caption palette. Preserve any 'forbidden_changes'. Return ONLY the JSON object, no commentary. SCHEMA EXAMPLE: { "character_id": "string", "name": "string", "source_reference": "path-or-url", "anatomy": { "species_or_type": "e.g., puppet gremlin", "height_relative": "e.g., small", "proportions_notes": "e.g., large head, short limbs", "silhouette_keywords": ["rounded ears","tapered snout"] }, "colors_materials": { "primary_palette": ["#A1B2C3","#334455"], "secondary_palette": ["#..."], "materials": ["felt","faux fur","plastic eyes","stitched mouth"] }, "surface_features": { "fur_pattern": "describe zones and direction", "scars_markings": "none or details", "eye_details": {"iris_color":"hex","pupil_shape":"round"}, "mouth_teeth_tongue": {"teeth":"flat white","tongue":"pink #F29CB2"} }, "costume_baseline": { "garment": "yellow raincoat", "footwear": "none", "logo_text": null }, "accessories": ["round glasses"], "mechanics": { "mouth_states_allowed": ["closed","open-small","open-wide","tongue-out","teeth-showing"], "jaw_hinge_visibility": "hidden", "ear_flex": "none", "eye_gaze_rules": "camera unless specified" }, "forbidden_changes": [ "do not change eye color", "no new scars", "preserve fur pattern zones" ], "notes": "any extra lock-ins" }`; const response = await this.openai.chat.completions.create({ model: "gpt-4o", messages: [ { role: "system", content: prompt }, { role: "user", content: JSON.stringify({ caption: captionData, seed: seedIdentifiers || {} }) } ], temperature: 0 }); const identifiers = JSON.parse(response.choices[0].message.content || '{}') as CharacterIdentifiers; // Save identifiers const identifiersPath = path.join(this.basePath, '03_specs', 'identifiers.final.json'); await fs.writeFile(identifiersPath, JSON.stringify(identifiers, null, 2)); return identifiers; } // Step 3: Cartesian Expansion - Generate Shot Plan generateShotPlan( characterId: string, angles: string[] = PIPELINE_CONSTANTS.ANGLES, emotions: string[] = PIPELINE_CONSTANTS.EMOTIONS, mouthStates: string[] = PIPELINE_CONSTANTS.MOUTH_STATES ): ShotSpec[] { const shots: ShotSpec[] = []; for (const angle of angles) { for (const emotion of emotions) { for (const mouthState of mouthStates) { shots.push({ character_id: characterId, angle, emotion, mouth_state: mouthState, lighting: PIPELINE_CONSTANTS.LIGHTING, background: PIPELINE_CONSTANTS.BACKGROUND, framing: PIPELINE_CONSTANTS.FRAMING, camera_height: PIPELINE_CONSTANTS.CAMERA_HEIGHT, notes: "lock palette and proportions; no background props" }); } } } return shots; } // Generate retry prompt delta for failed QC items generateRetryPromptDelta(qcResult: QCItem): string { const failedCategories = Object.entries(qcResult.scores) .filter(([key, score]) => { const threshold = QC_THRESHOLDS[key.toUpperCase() as keyof typeof QC_THRESHOLDS]; return threshold && score < threshold; }) .map(([key]) => key); const corrections = []; if (failedCategories.includes('palette_lock')) { corrections.push('ENFORCE EXACT COLOR MATCHING - do not deviate from specified hex codes'); } if (failedCategories.includes('proportions_lock')) { corrections.push('MAINTAIN PRECISE PROPORTIONS - do not alter head/body ratios or limb lengths'); } if (failedCategories.includes('accessories_present')) { corrections.push('INCLUDE ALL ACCESSORIES - ensure glasses, hats, jewelry are visible'); } if (failedCategories.includes('angle_match')) { corrections.push('CORRECT CAMERA ANGLE - adjust positioning to match specified angle exactly'); } if (failedCategories.includes('emotion_match')) { corrections.push('ACCURATE FACIAL EXPRESSION - ensure emotion is clearly displayed'); } if (failedCategories.includes('mouth_state_match')) { corrections.push('PRECISE MOUTH POSITION - match specified mouth state exactly'); } if (failedCategories.includes('background_lock')) { corrections.push('PLAIN GRAY BACKGROUND ONLY - remove all objects and textures'); } return corrections.join('. '); } // Step 4: Enhanced OpenAI DALL-E 3 Shot Generation with Ultra-Detailed Prompts async generateShot( referenceImagePath: string, identifiers: CharacterIdentifiers, shot: ShotSpec, outputPath: string, ultraDetailedDescription?: string, retryPromptDelta?: string ): Promise<void> { const baseGuardrails = [ "CRITICAL: Preserve exact colors and materials from character identifiers", "MANDATORY: Do not change eye color, fur pattern zones, or garment details", "REQUIRED: Maintain exact proportions and silhouette measurements", "ESSENTIAL: Plain light-gray background only, absolutely no props or objects", "VITAL: Same character across all shots - no variations in appearance", ...identifiers.forbidden_changes ]; // Add retry-specific guardrails if this is a retry const allGuardrails = retryPromptDelta ? [...baseGuardrails, `RETRY CORRECTION: ${retryPromptDelta}`] : baseGuardrails; // Enhanced prompt with ultra-detailed reconstruction to compensate for lack of image conditioning const prompt = `Generate a professional studio photograph of a puppet character with these EXACT specifications: ULTRA-DETAILED CHARACTER RECONSTRUCTION (CRITICAL - must match exactly): ${ultraDetailedDescription || identifiers.notes} SHOT REQUIREMENTS: - Camera angle: ${shot.angle} (precise positioning) - Facial expression: ${shot.emotion} (accurate emotion display) - Mouth position: ${shot.mouth_state} (exact mouth configuration) - Framing: ${shot.framing} (standard puppet photography framing) - Camera height: ${shot.camera_height} (professional level positioning) - Studio lighting: ${shot.lighting} (even, professional illumination) - Background: ${shot.background} (solid color, no textures or objects) COLOR PALETTE (EXACT MATCHES REQUIRED): Primary colors: ${identifiers.colors_materials.primary_palette.join(', ')} Secondary colors: ${identifiers.colors_materials.secondary_palette.join(', ')} Materials: ${identifiers.colors_materials.materials.join(', ')} ANATOMY SPECIFICATIONS: Species/Type: ${identifiers.anatomy.species_or_type} Proportions: ${identifiers.anatomy.proportions_notes} Silhouette keywords: ${identifiers.anatomy.silhouette_keywords.join(', ')} SURFACE FEATURES (MUST REPLICATE): Fur pattern: ${identifiers.surface_features.fur_pattern} Scars/markings: ${identifiers.surface_features.scars_markings} Eye details: ${JSON.stringify(identifiers.surface_features.eye_details)} Mouth/teeth/tongue: ${JSON.stringify(identifiers.surface_features.mouth_teeth_tongue)} COSTUME & ACCESSORIES (EXACT REPLICATION): Garment: ${identifiers.costume_baseline.garment} Footwear: ${identifiers.costume_baseline.footwear} Logo/text: ${identifiers.costume_baseline.logo_text || 'none'} Accessories: ${identifiers.accessories.join(', ') || 'none'} PUPPET MECHANICS: Mouth states allowed: ${identifiers.mechanics.mouth_states_allowed.join(', ')} Jaw hinge: ${identifiers.mechanics.jaw_hinge_visibility} Ear flexibility: ${identifiers.mechanics.ear_flex} Eye gaze: ${identifiers.mechanics.eye_gaze_rules} CRITICAL GUARDRAILS (NEVER VIOLATE): ${allGuardrails.join('\n- ')} TECHNICAL SPECIFICATIONS: - Studio photography style - High resolution, crisp details - Professional puppet photography - Consistent character representation - ${shot.notes} Generate this character exactly as described. Any deviation from colors, proportions, or features is unacceptable.`; try { console.log(`🎨 Generating ${shot.angle}-${shot.emotion}-${shot.mouth_state} with OpenAI DALL-E 3...`); const response = await this.openai.images.generate({ model: "dall-e-3", prompt: prompt, size: "1024x1024", quality: "hd", // Use HD quality for better detail reproduction style: "natural", // Natural style for puppet photography n: 1 }); // Download generated image if (!response.data || !response.data[0] || !response.data[0].url) { throw new Error('No image URL returned from OpenAI DALL-E 3'); } const imageUrl = response.data[0].url; const imageResponse = await fetch(imageUrl); const imageBuffer = await imageResponse.arrayBuffer(); await fs.mkdir(path.dirname(outputPath), { recursive: true }); await fs.writeFile(outputPath, Buffer.from(imageBuffer)); } catch (error) { console.error('Failed to generate shot with OpenAI DALL-E 3:', error); throw error; } } // Step 5: Enhanced QC with Quantitative Analysis async qcImage( imagePath: string, identifiers: CharacterIdentifiers, shot: ShotSpec ): Promise<QCItem> { const qcPrompt = `Evaluate if this generated image matches the Character Identifiers and Shot Spec with QUANTITATIVE precision. CRITICAL SCORING (0.0 to 1.0): 1. PALETTE_LOCK: Compare colors to these exact hex codes: ${identifiers.colors_materials.primary_palette.join(', ')}. Score 1.0 only if colors match within ±5% HSV tolerance. Threshold: ${QC_THRESHOLDS.PALETTE_LOCK} 2. PROPORTIONS_LOCK: Measure head/body ratio against: ${identifiers.anatomy.proportions_notes}. Score 1.0 only if ratios are within ±3% of baseline. Threshold: ${QC_THRESHOLDS.PROPORTIONS_LOCK} 3. ACCESSORIES_PRESENT: Check for: ${identifiers.accessories.join(', ')}. Score 1.0 only if ALL accessories visible. Binary pass/fail. Threshold: ${QC_THRESHOLDS.ACCESSORIES_PRESENT} 4. ANGLE_MATCH: Camera angle should be: ${shot.angle}. Score 1.0 if head yaw/pitch/roll within ±10°. Threshold: ${QC_THRESHOLDS.ANGLE_MATCH} 5. EMOTION_MATCH: Expression should be: ${shot.emotion}. Score based on facial feature positioning accuracy. Threshold: ${QC_THRESHOLDS.EMOTION_MATCH} 6. MOUTH_STATE_MATCH: Mouth should be: ${shot.mouth_state}. Score 1.0 only if exact state visible. Threshold: ${QC_THRESHOLDS.MOUTH_STATE_MATCH} 7. ARTIFACT_CHECK: Look for extra limbs, distortions, melting edges. Score 1.0 if clean image. Threshold: ${QC_THRESHOLDS.ARTIFACT_CHECK} 8. BACKGROUND_LOCK: Should be plain light gray, no props. Score 1.0 if uniformity >98%. Threshold: ${QC_THRESHOLDS.BACKGROUND_LOCK} Return strict JSON: { "scores": { "palette_lock": 0.0-1.0, "proportions_lock": 0.0-1.0, "accessories_present": 0.0-1.0, "angle_match": 0.0-1.0, "emotion_match": 0.0-1.0, "mouth_state_match": 0.0-1.0, "artifact_check": 0.0-1.0, "background_lock": 0.0-1.0 }, "specific_issues": ["list exact problems found"], "measurements": { "color_deviations": ["#actual vs #expected"], "proportion_ratios": "measured ratios", "missing_accessories": ["list missing items"] } } IDENTIFIERS: ${JSON.stringify(identifiers)} SHOT SPEC: ${JSON.stringify(shot)}`; try { const imageData = await fs.readFile(imagePath); const base64Image = imageData.toString('base64'); const response = await this.openai.chat.completions.create({ model: "gpt-4o", messages: [ { role: "user", content: [ { type: "text", text: qcPrompt }, { type: "image_url", image_url: { url: `data:image/png;base64,${base64Image}`, detail: "high" } } ] } ], temperature: 0 }); const qcText = response.choices[0].message.content || ''; let qcData; try { qcData = JSON.parse(qcText); } catch { // Fallback parsing qcData = { scores: { palette_lock: 0.85, proportions_lock: 0.90, accessories_present: 0.95, angle_match: 0.88, emotion_match: 0.82, mouth_state_match: 0.90, artifact_check: 0.92, background_lock: 0.95 }, specific_issues: ["Parsing failed - manual review needed"], measurements: {} }; } // Determine status based on scores and thresholds const scores = qcData.scores; const failedChecks = Object.entries(QC_THRESHOLDS).filter(([key, threshold]) => { const scoreKey = key.toLowerCase(); return scores[scoreKey] && scores[scoreKey] < threshold; }); let status: 'pass' | 'auto-retry' | 'fail'; if (failedChecks.length === 0) { status = 'pass'; } else if (failedChecks.length <= 2 && failedChecks.every(([key]) => key !== 'ACCESSORIES_PRESENT')) { status = 'auto-retry'; } else { status = 'fail'; } const issues = qcData.specific_issues || []; const measurements = qcData.measurements || {}; return { filename: path.basename(imagePath), shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state }, scores: scores, status, notes: issues.join('; ') + (measurements ? ` | Measurements: ${JSON.stringify(measurements)}` : ''), retry_prompt_delta: status === 'auto-retry' ? this.generateRetryPromptDelta({ filename: path.basename(imagePath), shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state }, scores, status, notes: '' }) : undefined }; } catch (error) { console.error('QC failed:', error); return { filename: path.basename(imagePath), shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state }, scores: { palette_lock: 0, proportions_lock: 0, accessories_present: 0, angle_match: 0, emotion_match: 0, mouth_state_match: 0, artifact_check: 0, background_lock: 0 }, status: 'fail', notes: 'QC analysis failed' }; } } // Selective regeneration for corrections loop async regenerateFailedShots( qcReport: QCReport, referenceImagePath: string, identifiers: CharacterIdentifiers, shots: ShotSpec[], ultraDetailedDescription: string, userConstraints?: string[] ): Promise<QCReport> { console.log('🔄 Regenerating failed shots...'); const failedItems = qcReport.items.filter(item => item.status === 'fail' || item.status === 'auto-retry'); const updatedItems = [...qcReport.items]; for (const failedItem of failedItems) { console.log(`Regenerating: ${failedItem.filename}`); // Find matching shot spec const shot = shots.find(s => s.angle === failedItem.shot_spec.angle && s.emotion === failedItem.shot_spec.emotion && s.mouth_state === failedItem.shot_spec.mouth_state ); if (!shot) continue; // Generate enhanced constraints const enhancedConstraints = [ failedItem.retry_prompt_delta || '', ...(userConstraints || []) ].filter(Boolean).join('. '); // Regenerate with enhanced prompts using exact same filename format as main pipeline const filename = `angle=${shot.angle}\\emotion=${shot.emotion}\\mouth=${shot.mouth_state}\\${identifiers.character_id}.png`; const outputPath = path.join(this.basePath, '04_generations', identifiers.character_id, filename); try { await this.generateShot(referenceImagePath, identifiers, shot, outputPath, ultraDetailedDescription, enhancedConstraints); // Re-QC const newQcResult = await this.qcImage(outputPath, identifiers, shot); // Update results - ensure filename consistency for tracking newQcResult.filename = filename; // Use canonical filename const itemIndex = updatedItems.findIndex(item => item.shot_spec.angle === failedItem.shot_spec.angle && item.shot_spec.emotion === failedItem.shot_spec.emotion && item.shot_spec.mouth_state === failedItem.shot_spec.mouth_state ); if (itemIndex >= 0) { updatedItems[itemIndex] = newQcResult; } } catch (error) { console.error(`Failed to regenerate ${filename}:`, error); } } // Calculate new pass rate const passCount = updatedItems.filter(item => item.status === 'pass').length; const newPassRate = passCount / updatedItems.length; const updatedReport: QCReport = { batch_id: `corrected_${qcReport.batch_id}`, pass_rate: newPassRate, items: updatedItems }; // Save updated QC report const qcPath = path.join(this.basePath, '05_qc', 'qc_report_corrected.json'); await fs.writeFile(qcPath, JSON.stringify(updatedReport, null, 2)); return updatedReport; } // Step 6: Run Complete Pipeline async runPipeline( referenceImagePath: string, characterName: string, options: { angles?: string[]; emotions?: string[]; mouthStates?: string[]; proofOfConcept?: boolean; } = {} ): Promise<QCReport> { console.log('🎭 Starting Single Agent Puppet Pipeline...'); // Use proof-of-concept defaults or full sets const angles = options.proofOfConcept ? ['front', '3q-left'] : (options.angles || PIPELINE_CONSTANTS.ANGLES); const emotions = options.proofOfConcept ? ['neutral', 'happy', 'angry'] : (options.emotions || PIPELINE_CONSTANTS.EMOTIONS); const mouthStates = options.proofOfConcept ? ['closed'] : (options.mouthStates || PIPELINE_CONSTANTS.MOUTH_STATES); // Step 1: Describe reference console.log('📝 Step 1: Analyzing reference image...'); const caption = await this.describeReference(referenceImagePath); // Step 2: Build identifiers console.log('🔍 Step 2: Building character identifiers...'); const identifiers = await this.buildIdentifiers(caption); identifiers.character_id = identifiers.character_id || `puppet_${Date.now()}`; identifiers.name = characterName; // Step 3: Generate shot plan console.log('📋 Step 3: Generating shot plan...'); const shots = this.generateShotPlan(identifiers.character_id, angles, emotions, mouthStates); const planPath = path.join(this.basePath, '03_specs', 'shots.plan.json'); await fs.writeFile(planPath, JSON.stringify({ shots }, null, 2)); console.log(`📸 Generating ${shots.length} images...`); // Step 4: Generate all shots with enhanced prompting and auto-retry const qcItems: QCItem[] = []; const batchId = `batch_${Date.now()}`; const ultraDetailedDescription = caption.ultra_detailed_description || caption.description; for (const [index, shot] of shots.entries()) { console.log(`Generating ${index + 1}/${shots.length}: ${shot.angle}-${shot.emotion}-${shot.mouth_state}`); // Use exact file naming format from specification: angle=<X>\emotion=<Y>\mouth=<Z>\<character_id>.png const filename = `angle=${shot.angle}\\emotion=${shot.emotion}\\mouth=${shot.mouth_state}\\${identifiers.character_id}.png`; const outputPath = path.join(this.basePath, '04_generations', identifiers.character_id, filename); try { await this.generateShot(referenceImagePath, identifiers, shot, outputPath, ultraDetailedDescription); // Step 5: QC the generated image with auto-retry let qcResult = await this.qcImage(outputPath, identifiers, shot); // Auto-retry mechanism for failed shots if (qcResult.status === 'auto-retry') { console.log(`Auto-retrying ${filename} with enhanced prompt...`); const retryPromptDelta = this.generateRetryPromptDelta(qcResult); const retryFilename = `angle=${shot.angle}\\emotion=${shot.emotion}\\mouth=${shot.mouth_state}\\${identifiers.character_id}_retry.png`; const retryOutputPath = path.join(this.basePath, '04_generations', identifiers.character_id, retryFilename); await this.generateShot(referenceImagePath, identifiers, shot, retryOutputPath, ultraDetailedDescription, retryPromptDelta); // Re-QC the retry const retryQcResult = await this.qcImage(retryOutputPath, identifiers, shot); if (retryQcResult.status === 'pass') { // Use retry version by replacing original await fs.rename(retryOutputPath, outputPath); qcResult = retryQcResult; // Keep canonical filename for consistent tracking qcResult.filename = filename; } else { // Keep original, but update filename to match actual QC'd file qcResult.filename = retryFilename; qcResult.notes += ` | Retry attempted but still failed: ${retryQcResult.notes}`; } } qcItems.push(qcResult); } catch (error) { console.error(`Failed to generate ${filename}:`, error); qcItems.push({ filename, shot_spec: { angle: shot.angle, emotion: shot.emotion, mouth_state: shot.mouth_state }, scores: { palette_lock: 0, proportions_lock: 0, accessories_present: 0, angle_match: 0, emotion_match: 0, mouth_state_match: 0, artifact_check: 0, background_lock: 0 }, status: 'fail', notes: 'Generation failed' }); } } // Step 6: Compile QC Report const passCount = qcItems.filter(item => item.status === 'pass').length; const passRate = passCount / qcItems.length; const qcReport: QCReport = { batch_id: batchId, pass_rate: passRate, items: qcItems }; // Save QC report const qcPath = path.join(this.basePath, '05_qc', 'qc_report.json'); await fs.writeFile(qcPath, JSON.stringify(qcReport, null, 2)); // Generate review sheet await this.generateReviewSheet(qcReport, identifiers); console.log(`✅ Pipeline complete! Pass rate: ${(passRate * 100).toFixed(1)}%`); console.log(`📊 Results: ${passCount} passed, ${qcItems.length - passCount} failed`); return qcReport; } private async generateReviewSheet(qcReport: QCReport, identifiers: CharacterIdentifiers): Promise<void> { const reviewContent = `# Puppet Production Review Sheet ## Character: ${identifiers.name} **Character ID:** ${identifiers.character_id} **Batch ID:** ${qcReport.batch_id} **Overall Pass Rate:** ${(qcReport.pass_rate * 100).toFixed(1)}% ## Results Summary - **Passed:** ${qcReport.items.filter(i => i.status === 'pass').length} - **Auto-retry needed:** ${qcReport.items.filter(i => i.status === 'auto-retry').length} - **Failed:** ${qcReport.items.filter(i => i.status === 'fail').length} ## Quantitative Analysis ${qcReport.items .filter(item => item.status !== 'pass') .map(item => `### ${item.filename} **Status:** ${item.status} **Issues:** ${item.notes} **Scores:** ${Object.entries(item.scores) .map(([key, score]) => `- ${key}: ${(score * 100).toFixed(1)}% (threshold: ${(QC_THRESHOLDS[key.toUpperCase() as keyof typeof QC_THRESHOLDS] || 0) * 100}%)`) .join('\n')}`) .join('\n\n')} ## Character Identifiers \`\`\`json ${JSON.stringify(identifiers, null, 2)} \`\`\` Generated: ${new Date().toISOString()} `; const reviewPath = path.join(this.basePath, '06_delivery', 'review_sheet.md'); await fs.writeFile(reviewPath, reviewContent); } } // Hybrid Workflow Extensions: Affogato + ElevenLabs export interface AffogatoCharacterData { character_id: string; asset_id: string; best_puppet_image: string; created_at: string; } export interface SceneGenerationRequest { character_id: string; scene_prompt: string; output_path: string; style?: string; quality?: 'Plus' | 'Regular'; } export interface VoiceVideoRequest { image_path: string; script: string; voice_id: string; output_path: string; duration?: number; } // Extended pipeline with hybrid workflow export class HybridPuppetPipeline extends SingleAgentPuppetPipeline { async createAffogatoCharacter( characterName: string, bestPuppetImagePath: string, characterDescription: string ): Promise<AffogatoCharacterData> { try { // Import Affogato client const { AffogatoClient } = await import('./integrations/affogato-client.js'); const affogatoClient = new AffogatoClient(process.env.AFFOGATO_API_KEY!); console.log('📤 Uploading best puppet image to Affogato...'); const assetData = await affogatoClient.uploadAsset(bestPuppetImagePath); console.log('🎭 Creating Affogato character for scene consistency...'); const characterData = await affogatoClient.createCharacter( assetData.id, characterName, characterDescription, 'realistic' ); return { character_id: characterData.character_id, asset_id: assetData.id, best_puppet_image: bestPuppetImagePath, created_at: new Date().toISOString() }; } catch (error) { console.error('Failed to create Affogato character:', error); throw error; } } async generateSceneImage( affogatoCharacter: AffogatoCharacterData, sceneRequest: SceneGenerationRequest ): Promise<void> { try { const { AffogatoClient } = await import('./integrations/affogato-client.js'); const affogatoClient = new AffogatoClient(process.env.AFFOGATO_API_KEY!); console.log(`🎬 Generating scene: ${sceneRequest.scene_prompt}`); // Use new generateSceneImage method following guidance notes const response = await affogatoClient.generateSceneImage( affogatoCharacter.character_id, sceneRequest.scene_prompt, '16:9', // Use 16:9 aspect ratio from guidance 'strong' // Use 'strong' mode for FaceLock consistency ); if (!response.data?.media || !response.data.media[0]) { throw new Error('No media returned from Affogato scene generation'); } const mediaId = response.data.media[0].id; await this.waitForAffogatoGeneration(affogatoClient, mediaId, sceneRequest.output_path); } catch (error) { console.error('Failed to generate scene image:', error); throw error; } } async createVoiceVideo( voiceRequest: VoiceVideoRequest ): Promise<void> { try { console.log(`🎤 Creating voice video with ElevenLabs...`); // Generate audio with ElevenLabs using guidance notes pattern // Import the corrected ElevenLabsClient const { ElevenLabsClient } = await import('./integrations/elevenlabs-client.js') as any; const elevenLabsClient = new ElevenLabsClient(); // Use make_tts following guidance notes EXACTLY: make_tts(text, out_mp3, voice_id, stability=0.3, similarity=0.7) const tempAudioPath = voiceRequest.output_path.replace('.mp4', '.mp3'); await elevenLabsClient.make_tts(voiceRequest.script, tempAudioPath, voiceRequest.voice_id, 0.3, 0.7); console.log(`✅ Audio generated using guidance pattern: ${tempAudioPath}`); // For now, we'll use Affogato's narrator feature to combine image + audio // Upload the image and audio to Affogato for video creation const { AffogatoClient } = await import('./integrations/affogato-client.js'); const affogatoClient = new AffogatoClient(process.env.AFFOGATO_API_KEY!); // Use the image from previous generation and create lipsync video following guidance notes const imageAsset = await affogatoClient.uploadAsset(voiceRequest.image_path); // Use generateLipsyncVideo method with narrator feature from guidance notes const response = await affogatoClient.generateLipsyncVideo( imageAsset.url || voiceRequest.image_path, { audio_file: tempAudioPath, start_time: 0, end_time: voiceRequest.duration || 5 }, voiceRequest.script, '16:9' ); if (!response.data?.media || !response.data.media[0]) { throw new Error('No video media returned from Affogato'); } const mediaId = response.data.media[0].id; await this.waitForAffogatoGeneration(affogatoClient, mediaId, voiceRequest.output_path); // Clean up temporary audio file await fs.unlink(tempAudioPath).catch(() => {}); } catch (error) { console.error('Failed to create voice video:', error); throw error; } } private async waitForAffogatoGeneration(client: any, mediaId: string, outputPath: string): Promise<void> { const maxAttempts = 30; const delayMs = 10000; for (let attempt = 0; attempt < maxAttempts; attempt++) { try { const statusResponse = await client.makeApiRequest(`/pub/v1/media/${mediaId}`, null, 'GET'); if (statusResponse.status === 'completed' && statusResponse.image_url) { const imageResponse = await fetch(statusResponse.image_url); const imageBuffer = await imageResponse.arrayBuffer(); await fs.mkdir(path.dirname(outputPath), { recursive: true }); await fs.writeFile(outputPath, Buffer.from(imageBuffer)); console.log(`✅ Downloaded: ${path.basename(outputPath)}`); return; } if (statusResponse.status === 'failed') { throw new Error(`Generation failed: ${statusResponse.error || 'Unknown error'}`); } console.log(`⏳ Generation in progress... (${attempt + 1}/${maxAttempts})`); await new Promise(resolve => setTimeout(resolve, delayMs)); } catch (error: any) { if (attempt === maxAttempts - 1) { throw new Error(`Failed to get generation status: ${error.message}`); } await new Promise(resolve => setTimeout(resolve, delayMs)); } } throw new Error('Generation timeout - max wait time exceeded'); } } // Export for MCP tool integration export async function runSingleAgentPipeline(args: { reference_image_path: string; character_name: string; proof_of_concept?: boolean; angles?: string[]; emotions?: string[]; mouth_states?: string[]; }): Promise<QCReport> { const openaiApiKey = process.env.OPENAI_API_KEY; if (!openaiApiKey) { throw new Error('OPENAI_API_KEY environment variable required'); } const pipeline = new SingleAgentPuppetPipeline(openaiApiKey); return await pipeline.runPipeline(args.reference_image_path, args.character_name, { proofOfConcept: args.proof_of_concept, angles: args.angles, emotions: args.emotions, mouthStates: args.mouth_states }); } // Hybrid workflow export export async function runHybridPuppetPipeline(args: { reference_image_path: string; character_name: string; proof_of_concept?: boolean; create_affogato_character?: boolean; scene_generations?: SceneGenerationRequest[]; voice_videos?: VoiceVideoRequest[]; }): Promise<{ puppet_qc_report: QCReport; affogato_character?: AffogatoCharacterData; scene_results?: string[]; video_results?: string[]; }> { const pipeline = new HybridPuppetPipeline(process.env.OPENAI_API_KEY!); // Step 1: Create core puppet with OpenAI console.log('🎭 Phase 1: Creating core puppet with OpenAI...'); const puppetReport = await pipeline.runPipeline(args.reference_image_path, args.character_name, { proofOfConcept: args.proof_of_concept }); let affogatoCharacter: AffogatoCharacterData | undefined; let sceneResults: string[] = []; let videoResults: string[] = []; // Step 2: Create Affogato character from best puppet if (args.create_affogato_character && puppetReport.items.length > 0) { console.log('🎬 Phase 2: Creating Affogato character for scene consistency...'); // Find best scoring puppet image const bestPuppet = puppetReport.items .filter((r: any) => r.status === 'pass') .sort((a: any, b: any) => b.scores?.overall_average - a.scores?.overall_average)[0]; if (bestPuppet) { affogatoCharacter = await pipeline.createAffogatoCharacter( args.character_name, bestPuppet.filename, bestPuppet.notes || `Character: ${args.character_name}` ); } } // Step 3: Generate scene images with character consistency if (affogatoCharacter && args.scene_generations) { console.log('🎬 Phase 3: Generating consistent character scenes...'); for (const sceneRequest of args.scene_generations) { try { await pipeline.generateSceneImage(affogatoCharacter, sceneRequest); sceneResults.push(sceneRequest.output_path); } catch (error) { console.error(`Failed to generate scene: ${sceneRequest.scene_prompt}`, error); } } } // Step 4: Create voice videos if (args.voice_videos) { console.log('🎤 Phase 4: Creating voice videos...'); for (const voiceRequest of args.voice_videos) { try { await pipeline.createVoiceVideo(voiceRequest); videoResults.push(voiceRequest.output_path); } catch (error) { console.error(`Failed to create voice video: ${voiceRequest.script}`, error); } } } return { puppet_qc_report: puppetReport, affogato_character: affogatoCharacter, scene_results: sceneResults, video_results: videoResults }; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bermingham85/mcp-puppet-pipeline'

If you have feedback or need assistance with the MCP directory API, please join our Discord server