elevenlabs.provider.ts•7.54 kB
/**
 * @fileoverview ElevenLabs text-to-speech provider implementation.
 * @module src/services/speech/providers/elevenlabs
 */
import { injectable } from 'tsyringe';
import { JsonRpcErrorCode, McpError } from '@/types-global/errors.js';
import {
  fetchWithTimeout,
  logger,
  requestContextService,
} from '@/utils/index.js';
import type { ISpeechProvider } from '../core/ISpeechProvider.js';
import type {
  SpeechProviderConfig,
  SpeechToTextOptions,
  SpeechToTextResult,
  TextToSpeechOptions,
  TextToSpeechResult,
  Voice,
} from '../types.js';
/**
 * ElevenLabs API response for voice list.
 */
interface ElevenLabsVoice {
  voice_id: string;
  name: string;
  description?: string;
  category?: string;
  labels?: Record<string, string>;
  preview_url?: string;
}
interface ElevenLabsVoicesResponse {
  voices: ElevenLabsVoice[];
}
/**
 * ElevenLabs TTS provider.
 * Supports high-quality text-to-speech synthesis with customizable voices.
 */
@injectable()
export class ElevenLabsProvider implements ISpeechProvider {
  public readonly name = 'elevenlabs';
  public readonly supportsTTS = true;
  public readonly supportsSTT = false;
  private readonly apiKey: string;
  private readonly baseUrl: string;
  private readonly defaultVoiceId: string;
  private readonly defaultModelId: string;
  private readonly timeout: number;
  constructor(config: SpeechProviderConfig) {
    if (!config.apiKey) {
      throw new McpError(
        JsonRpcErrorCode.InvalidParams,
        'ElevenLabs API key is required',
      );
    }
    this.apiKey = config.apiKey;
    this.baseUrl = config.baseUrl || 'https://api.elevenlabs.io/v1';
    this.defaultVoiceId = config.defaultVoiceId || 'EXAVITQu4vr4xnSDxMaL'; // Default: Bella
    this.defaultModelId = config.defaultModelId || 'eleven_monolingual_v1';
    this.timeout = config.timeout || 30000;
    logger.info(
      `ElevenLabs TTS provider initialized: ${this.baseUrl}, voice=${this.defaultVoiceId}`,
    );
  }
  /**
   * Convert text to speech using ElevenLabs API.
   */
  async textToSpeech(
    options: TextToSpeechOptions,
  ): Promise<TextToSpeechResult> {
    const context = requestContextService.createRequestContext({
      operation: 'elevenlabs-tts',
      ...(options.context || {}),
    });
    const voiceId = options.voice?.voiceId || this.defaultVoiceId;
    const modelId = options.modelId || this.defaultModelId;
    logger.debug('Converting text to speech with ElevenLabs', context);
    if (!options.text || options.text.trim().length === 0) {
      throw new McpError(
        JsonRpcErrorCode.InvalidParams,
        'Text cannot be empty',
        context,
      );
    }
    if (options.text.length > 5000) {
      throw new McpError(
        JsonRpcErrorCode.InvalidParams,
        'Text exceeds maximum length of 5000 characters',
        context,
      );
    }
    const url = `${this.baseUrl}/text-to-speech/${voiceId}`;
    // Build voice settings
    const voiceSettings = {
      stability: options.voice?.stability ?? 0.5,
      similarity_boost: options.voice?.similarityBoost ?? 0.75,
      style: options.voice?.style ?? 0.0,
      use_speaker_boost: true,
    };
    const requestBody = {
      text: options.text,
      model_id: modelId,
      voice_settings: voiceSettings,
    };
    try {
      const response = await fetchWithTimeout(url, this.timeout, context, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
          'xi-api-key': this.apiKey,
        },
        body: JSON.stringify(requestBody),
      });
      if (!response.ok) {
        const errorText = await response.text();
        logger.error(`ElevenLabs API error: ${response.status}`, context);
        throw new McpError(
          JsonRpcErrorCode.InternalError,
          `ElevenLabs API error: ${response.status} - ${errorText}`,
          context,
        );
      }
      const audioBuffer = Buffer.from(await response.arrayBuffer());
      logger.info(
        `Text-to-speech conversion successful (voice=${voiceId}, ${audioBuffer.length} bytes)`,
        context,
      );
      return {
        audio: audioBuffer,
        format: 'mp3',
        characterCount: options.text.length,
        metadata: {
          voiceId,
          modelId,
          provider: this.name,
        },
      };
    } catch (error) {
      if (error instanceof McpError) {
        throw error;
      }
      logger.error(
        'Failed to convert text to speech',
        error instanceof Error ? error : new Error(String(error)),
        context,
      );
      throw new McpError(
        JsonRpcErrorCode.InternalError,
        `Failed to convert text to speech: ${error instanceof Error ? error.message : 'Unknown error'}`,
        context,
      );
    }
  }
  /**
   * Speech-to-text is not supported by ElevenLabs.
   */
  speechToText(_options: SpeechToTextOptions): Promise<SpeechToTextResult> {
    throw new McpError(
      JsonRpcErrorCode.MethodNotFound,
      'Speech-to-text is not supported by ElevenLabs provider',
    );
  }
  /**
   * Get available voices from ElevenLabs.
   */
  async getVoices(): Promise<Voice[]> {
    const context = requestContextService.createRequestContext({
      operation: 'elevenlabs-getVoices',
    });
    logger.debug('Fetching available voices from ElevenLabs', context);
    const url = `${this.baseUrl}/voices`;
    try {
      const response = await fetchWithTimeout(url, this.timeout, context, {
        method: 'GET',
        headers: {
          'xi-api-key': this.apiKey,
        },
      });
      if (!response.ok) {
        const errorText = await response.text();
        logger.error(`Failed to fetch voices: ${response.status}`, context);
        throw new McpError(
          JsonRpcErrorCode.InternalError,
          `Failed to fetch voices: ${response.status} - ${errorText}`,
        );
      }
      const data = (await response.json()) as ElevenLabsVoicesResponse;
      const voices: Voice[] = data.voices.map((v) => ({
        id: v.voice_id,
        name: v.name,
        ...(v.description !== undefined && { description: v.description }),
        ...(v.category !== undefined && { category: v.category }),
        ...(v.preview_url !== undefined && { previewUrl: v.preview_url }),
        ...(v.labels?.gender !== undefined && {
          gender: v.labels.gender as 'male' | 'female' | 'neutral',
        }),
        metadata: {
          labels: v.labels,
        },
      }));
      logger.info(`Successfully fetched ${voices.length} voices`, context);
      return voices;
    } catch (error) {
      if (error instanceof McpError) {
        throw error;
      }
      logger.error(
        'Failed to fetch voices',
        error instanceof Error ? error : new Error(String(error)),
        context,
      );
      throw new McpError(
        JsonRpcErrorCode.InternalError,
        `Failed to fetch voices: ${error instanceof Error ? error.message : 'Unknown error'}`,
      );
    }
  }
  /**
   * Health check for ElevenLabs API.
   */
  async healthCheck(): Promise<boolean> {
    try {
      // Simple health check: try to fetch voices
      await this.getVoices();
      return true;
    } catch (error) {
      const context = requestContextService.createRequestContext({
        operation: 'elevenlabs-healthCheck',
      });
      logger.error(
        'ElevenLabs health check failed',
        error instanceof Error ? error : new Error(String(error)),
        context,
      );
      return false;
    }
  }
}