// ElevenLabs Voice Generation Client - Rebuilt to follow user guidance notes exactly
// Based on pipeline_poc.py patterns from uploaded guidance documentation
import axios from 'axios';
import fs from 'fs/promises';
import path from 'path';
export class ElevenLabsClient {
constructor(apiKey = null) {
this.apiKey = apiKey || process.env.ELEVENLABS_API_KEY;
this.baseURL = 'https://api.elevenlabs.io/v1';
// Headers following guidance notes pattern
this.headers = {
'xi-api-key': this.apiKey,
'Content-Type': 'application/json'
};
}
// Create a new character voice from description
async createCharacterVoice(characterName, voiceDescription, referenceAudioPath = null) {
console.log(`🎤 Creating voice for ${characterName}...`);
try {
// For now, use a pre-existing voice that matches the description
// In production, you could use voice cloning with reference audio
const voices = await this.getAvailableVoices();
// Select best matching voice based on description
const selectedVoice = this.selectVoiceFromDescription(voices, voiceDescription);
console.log(`✅ Selected voice: ${selectedVoice.name} for ${characterName}`);
return {
success: true,
voice_id: selectedVoice.voice_id,
voice_name: selectedVoice.name,
character_name: characterName,
description: voiceDescription,
preview_url: selectedVoice.preview_url
};
} catch (error) {
console.error('❌ Voice creation failed:', error.message);
return {
success: false,
error: error.message
};
}
}
// Get available voices from ElevenLabs
async getAvailableVoices() {
try {
const response = await axios.get(`${this.baseURL}/voices`, {
headers: this.headers
});
return response.data.voices;
} catch (error) {
throw new Error(`Failed to get voices: ${error.response?.data?.detail || error.message}`);
}
}
// Get all voices with enhanced metadata for dropdown
async getVoices() {
console.log('🎤 Fetching ElevenLabs voices for form dropdown...');
try {
if (!this.apiKey) {
console.log('⚠️ ElevenLabs API key not found, returning demo voices');
return this.getDemoVoices();
}
const voices = await this.getAvailableVoices();
const enhancedVoices = voices.map(voice => ({
voice_id: voice.voice_id,
name: voice.name,
category: voice.category,
description: voice.description || '',
preview_url: voice.preview_url,
labels: voice.labels || {},
accent: this.detectAccent(voice),
country: this.detectCountry(voice),
gender: this.detectGender(voice),
age: this.detectAge(voice)
}));
console.log(`✅ Retrieved ${enhancedVoices.length} ElevenLabs voices`);
return enhancedVoices;
} catch (error) {
console.error('❌ ElevenLabs API error:', error.message);
console.log('⚠️ Falling back to demo voices');
return this.getDemoVoices();
}
}
// Generate voice sample using guidance notes pattern with default stability=0.3, similarity=0.7
async generateVoiceSample(voiceId, text = "Hello, this is a voice sample for character selection.") {
console.log(`🎤 Generating voice sample for voice ID: ${voiceId}`);
try {
if (!this.apiKey) {
throw new Error('ElevenLabs API key required for voice sampling');
}
const response = await axios.post(
`${this.baseURL}/text-to-speech/${voiceId}`,
{
text: text,
model_id: "eleven_multilingual_v2",
voice_settings: {
stability: 0.3,
similarity_boost: 0.7
}
},
{
headers: this.headers,
responseType: 'arraybuffer',
timeout: 120000
}
);
// Convert audio buffer to base64 for web playback
const audioBuffer = Buffer.from(response.data);
const base64Audio = audioBuffer.toString('base64');
console.log(`✅ Voice sample generated for ${voiceId}`);
return {
audio_base64: base64Audio,
content_type: 'audio/mpeg'
};
} catch (error) {
console.error(`❌ Voice sample generation failed for ${voiceId}:`, error.message);
throw new Error(`Voice sampling failed: ${error.message}`);
}
}
// Get demo voices when API key is not available
getDemoVoices() {
return [
{
voice_id: 'demo_voice_1',
name: 'Demo Voice 1',
category: 'premade',
description: 'Sample voice for testing',
preview_url: null,
accent: 'American',
country: 'United States',
gender: 'Male',
age: 'Young Adult'
},
{
voice_id: 'demo_voice_2',
name: 'Demo Voice 2',
category: 'premade',
description: 'Sample voice for testing',
preview_url: null,
accent: 'British',
country: 'United Kingdom',
gender: 'Female',
age: 'Adult'
}
];
}
// Detect accent from voice metadata
detectAccent(voice) {
const description = (voice.description || '').toLowerCase();
const name = (voice.name || '').toLowerCase();
const labels = Object.values(voice.labels || {}).join(' ').toLowerCase();
const allText = `${description} ${name} ${labels}`;
// Common accent patterns
if (allText.includes('british') || allText.includes('uk') || allText.includes('english')) return 'British';
if (allText.includes('american') || allText.includes('us') || allText.includes('california')) return 'American';
if (allText.includes('australian') || allText.includes('aussie')) return 'Australian';
if (allText.includes('irish') || allText.includes('ireland')) return 'Irish';
if (allText.includes('scottish') || allText.includes('scotland')) return 'Scottish';
if (allText.includes('southern') || allText.includes('texas')) return 'Southern American';
if (allText.includes('canadian')) return 'Canadian';
if (allText.includes('french')) return 'French';
if (allText.includes('german')) return 'German';
if (allText.includes('italian')) return 'Italian';
if (allText.includes('spanish')) return 'Spanish';
if (allText.includes('indian')) return 'Indian';
if (allText.includes('japanese')) return 'Japanese';
if (allText.includes('chinese')) return 'Chinese';
return 'Neutral';
}
// Detect country from voice metadata
detectCountry(voice) {
const accent = this.detectAccent(voice);
const accentToCountry = {
'British': 'United Kingdom',
'American': 'United States',
'Australian': 'Australia',
'Irish': 'Ireland',
'Scottish': 'Scotland',
'Southern American': 'United States',
'Canadian': 'Canada',
'French': 'France',
'German': 'Germany',
'Italian': 'Italy',
'Spanish': 'Spain',
'Indian': 'India',
'Japanese': 'Japan',
'Chinese': 'China'
};
return accentToCountry[accent] || 'International';
}
// Detect gender from voice metadata
detectGender(voice) {
const description = (voice.description || '').toLowerCase();
const name = (voice.name || '').toLowerCase();
const labels = Object.values(voice.labels || {}).join(' ').toLowerCase();
const allText = `${description} ${name} ${labels}`;
if (allText.includes('female') || allText.includes('woman') || allText.includes('girl')) return 'Female';
if (allText.includes('male') || allText.includes('man') || allText.includes('boy')) return 'Male';
return 'Neutral';
}
// Detect age category from voice metadata
detectAge(voice) {
const description = (voice.description || '').toLowerCase();
const name = (voice.name || '').toLowerCase();
const labels = Object.values(voice.labels || {}).join(' ').toLowerCase();
const allText = `${description} ${name} ${labels}`;
if (allText.includes('child') || allText.includes('kid') || allText.includes('young')) return 'Child';
if (allText.includes('teen') || allText.includes('teenager')) return 'Teenager';
if (allText.includes('adult') || allText.includes('mature')) return 'Adult';
if (allText.includes('elderly') || allText.includes('old') || allText.includes('senior')) return 'Elderly';
return 'Young Adult';
}
// Generate speech using exact guidance notes pattern: make_tts(text, out_mp3, voice_id, stability=0.3, similarity=0.7)
async generateSpeech(text, voiceId, outputPath, characterName, stability = 0.3, similarity = 0.7) {
console.log(`🗣️ Generating speech for ${characterName}...`);
try {
// Following guidance notes exactly: eleven_multilingual_v2 model with specific voice_settings structure
const response = await axios.post(
`${this.baseURL}/text-to-speech/${voiceId}`,
{
text: text,
model_id: "eleven_multilingual_v2",
voice_settings: {
stability: stability,
similarity_boost: similarity
}
},
{
headers: this.headers,
responseType: 'arraybuffer',
timeout: 120000
}
);
// Save audio file using guidance notes pattern
const audioPath = path.join(outputPath, `${characterName}_${Date.now()}.mp3`);
await fs.writeFile(audioPath, response.data);
console.log(`✅ Speech generated using guidance notes pattern: ${audioPath}`);
return {
success: true,
audio_path: audioPath,
text: text,
voice_id: voiceId,
character_name: characterName
};
} catch (error) {
console.error('❌ Speech generation failed:', error.message);
return {
success: false,
error: error.message
};
}
}
// Core TTS function following guidance notes EXACTLY: make_tts(text, out_mp3, voice_id, stability=0.3, similarity=0.7)
async make_tts(text, out_mp3, voice_id, stability = 0.3, similarity = 0.7) {
const url = `https://api.elevenlabs.io/v1/text-to-speech/${voice_id}`;
const payload = {
text: text,
model_id: "eleven_multilingual_v2",
voice_settings: {
stability: stability,
similarity_boost: similarity
}
};
const headers = {
"xi-api-key": this.apiKey,
"Content-Type": "application/json"
};
const response = await axios.post(url, payload, {
headers: headers,
responseType: 'arraybuffer',
timeout: 120000
});
await fs.writeFile(out_mp3, response.data);
return out_mp3;
}
// Legacy wrapper for backward compatibility
async makeTTS(text, outputPath, voiceId, stability = 0.3, similarity = 0.7) {
return await this.make_tts(text, outputPath, voiceId, stability, similarity);
}
// Generate speech for each dialogue line in a script using guidance pattern
async generateScriptAudio(scriptBreakdown, voiceId, characterName, outputDir) {
console.log(`🎬 Generating audio for all scenes using guidance notes pattern...`);
const audioFiles = [];
try {
for (let i = 0; i < scriptBreakdown.length; i++) {
const scene = scriptBreakdown[i];
if (scene.dialogue && scene.dialogue.trim()) {
console.log(`🎤 Scene ${i + 1}: "${scene.dialogue.substring(0, 50)}..."`);
// Use makeTTS following guidance notes pattern
const audioPath = path.join(outputDir, `${characterName}_scene_${i + 1}_${Date.now()}.mp3`);
await this.makeTTS(scene.dialogue, audioPath, voiceId);
const audioResult = {
success: true,
audio_path: audioPath,
text: scene.dialogue,
voice_id: voiceId,
character_name: `${characterName}_scene_${i + 1}`
};
if (audioResult.success) {
audioFiles.push({
scene_number: i + 1,
dialogue: scene.dialogue,
audio_path: audioResult.audio_path,
emotion: scene.emotion,
duration_estimate: this.estimateAudioDuration(scene.dialogue)
});
}
// Brief pause between requests
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
console.log(`✅ Generated audio for ${audioFiles.length} scenes`);
return {
success: true,
audio_files: audioFiles,
total_scenes: audioFiles.length
};
} catch (error) {
console.error('❌ Script audio generation failed:', error.message);
return {
success: false,
error: error.message,
partial_audio: audioFiles
};
}
}
// Select best voice from available voices based on description
selectVoiceFromDescription(voices, description) {
if (!voices || voices.length === 0) {
return { voice_id: 'default', name: 'Default Voice' };
}
const desc = description.toLowerCase();
// Voice matching logic based on characteristics
for (const voice of voices) {
const voiceName = voice.name.toLowerCase();
const labels = (voice.labels || []).map(l => l.toLowerCase());
// Match age characteristics
if (desc.includes('elderly') || desc.includes('wise') || desc.includes('mature')) {
if (labels.includes('middle aged') || labels.includes('old') || voiceName.includes('old')) {
return voice;
}
}
// Match energy level
if (desc.includes('energetic') || desc.includes('upbeat')) {
if (labels.includes('energetic') || labels.includes('cheerful')) {
return voice;
}
}
// Match gender if specified
if (desc.includes('deep') || desc.includes('masculine')) {
if (labels.includes('male') || labels.includes('deep')) {
return voice;
}
}
if (desc.includes('higher pitched') || desc.includes('feminine')) {
if (labels.includes('female') || labels.includes('high pitched')) {
return voice;
}
}
}
// Default to first available voice if no match
return voices[0];
}
// Estimate audio duration based on text length
estimateAudioDuration(text) {
// Rough estimate: ~150 words per minute for natural speech
const words = text.split(/\s+/).length;
const minutes = words / 150;
return Math.max(1, Math.round(minutes * 60)); // seconds
}
// Get voice details
async getVoiceDetails(voiceId) {
try {
const response = await axios.get(`${this.baseURL}/voices/${voiceId}`, {
headers: this.headers
});
return {
success: true,
voice: response.data
};
} catch (error) {
return {
success: false,
error: error.message
};
}
}
}