Skip to main content
Glama
elevenlabs-client.js17.6 kB
// ElevenLabs Voice Generation Client - Rebuilt to follow user guidance notes exactly // Based on pipeline_poc.py patterns from uploaded guidance documentation import axios from 'axios'; import fs from 'fs/promises'; import path from 'path'; export class ElevenLabsClient { constructor(apiKey = null) { this.apiKey = apiKey || process.env.ELEVENLABS_API_KEY; this.baseURL = 'https://api.elevenlabs.io/v1'; // Headers following guidance notes pattern this.headers = { 'xi-api-key': this.apiKey, 'Content-Type': 'application/json' }; } // Create a new character voice from description async createCharacterVoice(characterName, voiceDescription, referenceAudioPath = null) { console.log(`🎤 Creating voice for ${characterName}...`); try { // For now, use a pre-existing voice that matches the description // In production, you could use voice cloning with reference audio const voices = await this.getAvailableVoices(); // Select best matching voice based on description const selectedVoice = this.selectVoiceFromDescription(voices, voiceDescription); console.log(`✅ Selected voice: ${selectedVoice.name} for ${characterName}`); return { success: true, voice_id: selectedVoice.voice_id, voice_name: selectedVoice.name, character_name: characterName, description: voiceDescription, preview_url: selectedVoice.preview_url }; } catch (error) { console.error('❌ Voice creation failed:', error.message); return { success: false, error: error.message }; } } // Get available voices from ElevenLabs async getAvailableVoices() { try { const response = await axios.get(`${this.baseURL}/voices`, { headers: this.headers }); return response.data.voices; } catch (error) { throw new Error(`Failed to get voices: ${error.response?.data?.detail || error.message}`); } } // Get all voices with enhanced metadata for dropdown async getVoices() { console.log('🎤 Fetching ElevenLabs voices for form dropdown...'); try { if (!this.apiKey) { console.log('⚠️ ElevenLabs API key not found, returning demo voices'); return this.getDemoVoices(); } const voices = await this.getAvailableVoices(); const enhancedVoices = voices.map(voice => ({ voice_id: voice.voice_id, name: voice.name, category: voice.category, description: voice.description || '', preview_url: voice.preview_url, labels: voice.labels || {}, accent: this.detectAccent(voice), country: this.detectCountry(voice), gender: this.detectGender(voice), age: this.detectAge(voice) })); console.log(`✅ Retrieved ${enhancedVoices.length} ElevenLabs voices`); return enhancedVoices; } catch (error) { console.error('❌ ElevenLabs API error:', error.message); console.log('⚠️ Falling back to demo voices'); return this.getDemoVoices(); } } // Generate voice sample using guidance notes pattern with default stability=0.3, similarity=0.7 async generateVoiceSample(voiceId, text = "Hello, this is a voice sample for character selection.") { console.log(`🎤 Generating voice sample for voice ID: ${voiceId}`); try { if (!this.apiKey) { throw new Error('ElevenLabs API key required for voice sampling'); } const response = await axios.post( `${this.baseURL}/text-to-speech/${voiceId}`, { text: text, model_id: "eleven_multilingual_v2", voice_settings: { stability: 0.3, similarity_boost: 0.7 } }, { headers: this.headers, responseType: 'arraybuffer', timeout: 120000 } ); // Convert audio buffer to base64 for web playback const audioBuffer = Buffer.from(response.data); const base64Audio = audioBuffer.toString('base64'); console.log(`✅ Voice sample generated for ${voiceId}`); return { audio_base64: base64Audio, content_type: 'audio/mpeg' }; } catch (error) { console.error(`❌ Voice sample generation failed for ${voiceId}:`, error.message); throw new Error(`Voice sampling failed: ${error.message}`); } } // Get demo voices when API key is not available getDemoVoices() { return [ { voice_id: 'demo_voice_1', name: 'Demo Voice 1', category: 'premade', description: 'Sample voice for testing', preview_url: null, accent: 'American', country: 'United States', gender: 'Male', age: 'Young Adult' }, { voice_id: 'demo_voice_2', name: 'Demo Voice 2', category: 'premade', description: 'Sample voice for testing', preview_url: null, accent: 'British', country: 'United Kingdom', gender: 'Female', age: 'Adult' } ]; } // Detect accent from voice metadata detectAccent(voice) { const description = (voice.description || '').toLowerCase(); const name = (voice.name || '').toLowerCase(); const labels = Object.values(voice.labels || {}).join(' ').toLowerCase(); const allText = `${description} ${name} ${labels}`; // Common accent patterns if (allText.includes('british') || allText.includes('uk') || allText.includes('english')) return 'British'; if (allText.includes('american') || allText.includes('us') || allText.includes('california')) return 'American'; if (allText.includes('australian') || allText.includes('aussie')) return 'Australian'; if (allText.includes('irish') || allText.includes('ireland')) return 'Irish'; if (allText.includes('scottish') || allText.includes('scotland')) return 'Scottish'; if (allText.includes('southern') || allText.includes('texas')) return 'Southern American'; if (allText.includes('canadian')) return 'Canadian'; if (allText.includes('french')) return 'French'; if (allText.includes('german')) return 'German'; if (allText.includes('italian')) return 'Italian'; if (allText.includes('spanish')) return 'Spanish'; if (allText.includes('indian')) return 'Indian'; if (allText.includes('japanese')) return 'Japanese'; if (allText.includes('chinese')) return 'Chinese'; return 'Neutral'; } // Detect country from voice metadata detectCountry(voice) { const accent = this.detectAccent(voice); const accentToCountry = { 'British': 'United Kingdom', 'American': 'United States', 'Australian': 'Australia', 'Irish': 'Ireland', 'Scottish': 'Scotland', 'Southern American': 'United States', 'Canadian': 'Canada', 'French': 'France', 'German': 'Germany', 'Italian': 'Italy', 'Spanish': 'Spain', 'Indian': 'India', 'Japanese': 'Japan', 'Chinese': 'China' }; return accentToCountry[accent] || 'International'; } // Detect gender from voice metadata detectGender(voice) { const description = (voice.description || '').toLowerCase(); const name = (voice.name || '').toLowerCase(); const labels = Object.values(voice.labels || {}).join(' ').toLowerCase(); const allText = `${description} ${name} ${labels}`; if (allText.includes('female') || allText.includes('woman') || allText.includes('girl')) return 'Female'; if (allText.includes('male') || allText.includes('man') || allText.includes('boy')) return 'Male'; return 'Neutral'; } // Detect age category from voice metadata detectAge(voice) { const description = (voice.description || '').toLowerCase(); const name = (voice.name || '').toLowerCase(); const labels = Object.values(voice.labels || {}).join(' ').toLowerCase(); const allText = `${description} ${name} ${labels}`; if (allText.includes('child') || allText.includes('kid') || allText.includes('young')) return 'Child'; if (allText.includes('teen') || allText.includes('teenager')) return 'Teenager'; if (allText.includes('adult') || allText.includes('mature')) return 'Adult'; if (allText.includes('elderly') || allText.includes('old') || allText.includes('senior')) return 'Elderly'; return 'Young Adult'; } // Generate speech using exact guidance notes pattern: make_tts(text, out_mp3, voice_id, stability=0.3, similarity=0.7) async generateSpeech(text, voiceId, outputPath, characterName, stability = 0.3, similarity = 0.7) { console.log(`🗣️ Generating speech for ${characterName}...`); try { // Following guidance notes exactly: eleven_multilingual_v2 model with specific voice_settings structure const response = await axios.post( `${this.baseURL}/text-to-speech/${voiceId}`, { text: text, model_id: "eleven_multilingual_v2", voice_settings: { stability: stability, similarity_boost: similarity } }, { headers: this.headers, responseType: 'arraybuffer', timeout: 120000 } ); // Save audio file using guidance notes pattern const audioPath = path.join(outputPath, `${characterName}_${Date.now()}.mp3`); await fs.writeFile(audioPath, response.data); console.log(`✅ Speech generated using guidance notes pattern: ${audioPath}`); return { success: true, audio_path: audioPath, text: text, voice_id: voiceId, character_name: characterName }; } catch (error) { console.error('❌ Speech generation failed:', error.message); return { success: false, error: error.message }; } } // Core TTS function following guidance notes EXACTLY: make_tts(text, out_mp3, voice_id, stability=0.3, similarity=0.7) async make_tts(text, out_mp3, voice_id, stability = 0.3, similarity = 0.7) { const url = `https://api.elevenlabs.io/v1/text-to-speech/${voice_id}`; const payload = { text: text, model_id: "eleven_multilingual_v2", voice_settings: { stability: stability, similarity_boost: similarity } }; const headers = { "xi-api-key": this.apiKey, "Content-Type": "application/json" }; const response = await axios.post(url, payload, { headers: headers, responseType: 'arraybuffer', timeout: 120000 }); await fs.writeFile(out_mp3, response.data); return out_mp3; } // Legacy wrapper for backward compatibility async makeTTS(text, outputPath, voiceId, stability = 0.3, similarity = 0.7) { return await this.make_tts(text, outputPath, voiceId, stability, similarity); } // Generate speech for each dialogue line in a script using guidance pattern async generateScriptAudio(scriptBreakdown, voiceId, characterName, outputDir) { console.log(`🎬 Generating audio for all scenes using guidance notes pattern...`); const audioFiles = []; try { for (let i = 0; i < scriptBreakdown.length; i++) { const scene = scriptBreakdown[i]; if (scene.dialogue && scene.dialogue.trim()) { console.log(`🎤 Scene ${i + 1}: "${scene.dialogue.substring(0, 50)}..."`); // Use makeTTS following guidance notes pattern const audioPath = path.join(outputDir, `${characterName}_scene_${i + 1}_${Date.now()}.mp3`); await this.makeTTS(scene.dialogue, audioPath, voiceId); const audioResult = { success: true, audio_path: audioPath, text: scene.dialogue, voice_id: voiceId, character_name: `${characterName}_scene_${i + 1}` }; if (audioResult.success) { audioFiles.push({ scene_number: i + 1, dialogue: scene.dialogue, audio_path: audioResult.audio_path, emotion: scene.emotion, duration_estimate: this.estimateAudioDuration(scene.dialogue) }); } // Brief pause between requests await new Promise(resolve => setTimeout(resolve, 1000)); } } console.log(`✅ Generated audio for ${audioFiles.length} scenes`); return { success: true, audio_files: audioFiles, total_scenes: audioFiles.length }; } catch (error) { console.error('❌ Script audio generation failed:', error.message); return { success: false, error: error.message, partial_audio: audioFiles }; } } // Select best voice from available voices based on description selectVoiceFromDescription(voices, description) { if (!voices || voices.length === 0) { return { voice_id: 'default', name: 'Default Voice' }; } const desc = description.toLowerCase(); // Voice matching logic based on characteristics for (const voice of voices) { const voiceName = voice.name.toLowerCase(); const labels = (voice.labels || []).map(l => l.toLowerCase()); // Match age characteristics if (desc.includes('elderly') || desc.includes('wise') || desc.includes('mature')) { if (labels.includes('middle aged') || labels.includes('old') || voiceName.includes('old')) { return voice; } } // Match energy level if (desc.includes('energetic') || desc.includes('upbeat')) { if (labels.includes('energetic') || labels.includes('cheerful')) { return voice; } } // Match gender if specified if (desc.includes('deep') || desc.includes('masculine')) { if (labels.includes('male') || labels.includes('deep')) { return voice; } } if (desc.includes('higher pitched') || desc.includes('feminine')) { if (labels.includes('female') || labels.includes('high pitched')) { return voice; } } } // Default to first available voice if no match return voices[0]; } // Estimate audio duration based on text length estimateAudioDuration(text) { // Rough estimate: ~150 words per minute for natural speech const words = text.split(/\s+/).length; const minutes = words / 150; return Math.max(1, Math.round(minutes * 60)); // seconds } // Get voice details async getVoiceDetails(voiceId) { try { const response = await axios.get(`${this.baseURL}/voices/${voiceId}`, { headers: this.headers }); return { success: true, voice: response.data }; } catch (error) { return { success: false, error: error.message }; } } }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/bermingham85/mcp-puppet-pipeline'

If you have feedback or need assistance with the MCP directory API, please join our Discord server