Speech MCP
by Kvadratni
- src
- speech_mcp
- ui
- components
"""
Text-to-speech adapter for the Speech UI.
This module provides a PyQt wrapper around the TTS adapters.
"""
import os
import time
import threading
import random
import math
from PyQt5.QtCore import QObject, pyqtSignal, QTimer
# Import the centralized logger
from speech_mcp.utils.logger import get_logger
# Get a logger for this module
logger = get_logger(__name__, component="tts")
# Import centralized constants
from speech_mcp.constants import ENV_TTS_VOICE
class TTSAdapter(QObject):
"""
Text-to-speech adapter for PyQt UI.
This class provides a Qt wrapper around the TTS adapters to integrate with PyQt signals.
"""
speaking_finished = pyqtSignal()
speaking_started = pyqtSignal()
speaking_progress = pyqtSignal(float) # Progress between 0.0 and 1.0
audio_level = pyqtSignal(float) # Audio level for visualization
def __init__(self):
super().__init__()
self.tts_engine = None
self.is_speaking = False
self._speaking_lock = threading.Lock() # Add a lock for thread safety
self.available_voices = []
self.current_voice = None
self.initialize_tts()
def initialize_tts(self):
"""Initialize the TTS engine using the adapter system"""
try:
# First try to import the Kokoro adapter
logger.info("Initializing TTS using adapter system")
# Try to import the TTS adapters
from speech_mcp.tts_adapters import KokoroTTS, Pyttsx3TTS
# First try Kokoro (our primary TTS engine)
try:
logger.info("Trying to initialize Kokoro TTS adapter")
self.tts_engine = KokoroTTS()
if self.tts_engine.is_initialized:
logger.info("Kokoro TTS adapter initialized successfully")
else:
logger.warning("Kokoro TTS adapter initialization failed")
raise ImportError("Kokoro initialization failed")
except ImportError as e:
logger.warning(f"Failed to initialize Kokoro TTS adapter: {e}")
# Fall back to pyttsx3
try:
logger.info("Falling back to pyttsx3 TTS adapter")
self.tts_engine = Pyttsx3TTS()
if self.tts_engine.is_initialized:
logger.info("pyttsx3 TTS adapter initialized successfully")
else:
logger.warning("pyttsx3 TTS adapter initialization failed")
raise ImportError("pyttsx3 initialization failed")
except ImportError as e:
logger.error(f"Failed to initialize pyttsx3 TTS adapter: {e}")
self.tts_engine = None
except Exception as e:
logger.error(f"Error initializing Kokoro: {e}")
# Fall back to pyttsx3
try:
logger.info("Falling back to pyttsx3 TTS adapter")
self.tts_engine = Pyttsx3TTS()
if self.tts_engine.is_initialized:
logger.info("pyttsx3 TTS adapter initialized successfully")
else:
logger.warning("pyttsx3 TTS adapter initialization failed")
raise ImportError("pyttsx3 initialization failed")
except ImportError as e:
logger.error(f"Failed to initialize pyttsx3 TTS adapter: {e}")
self.tts_engine = None
except Exception as e:
logger.error(f"Error initializing pyttsx3: {e}")
self.tts_engine = None
# If we have a TTS engine, get the available voices
if self.tts_engine:
self.available_voices = self.tts_engine.get_available_voices()
self.current_voice = self.tts_engine.voice
logger.info(f"TTS initialized with {len(self.available_voices)} voices, current voice: {self.current_voice}")
return True
else:
logger.error("No TTS engine available")
return False
except ImportError as e:
logger.warning(f"Failed to import TTS adapters: {e}")
# Direct fallback to pyttsx3 if adapters are not available
try:
import pyttsx3
self.tts_engine = pyttsx3.init()
logger.info("pyttsx3 text-to-speech engine initialized directly")
# Get available voices
voices = self.tts_engine.getProperty('voices')
self.available_voices = [f"pyttsx3:{voice.id}" for voice in voices]
if self.available_voices:
self.current_voice = self.available_voices[0]
logger.debug(f"Available pyttsx3 voices: {len(voices)}")
for i, voice in enumerate(voices):
logger.debug(f"Voice {i}: {voice.id} - {voice.name}")
return True
except ImportError as e:
logger.error(f"pyttsx3 not available: {e}")
except Exception as e:
logger.error(f"Error initializing pyttsx3: {e}")
logger.error("No TTS engine available")
return False
except Exception as e:
logger.error(f"Error initializing TTS: {e}")
return False
def speak(self, text):
"""Speak the given text"""
if not text:
logger.warning("Empty text provided to speak")
return False
if not self.tts_engine:
logger.warning("No TTS engine available")
return False
# Use a lock to safely check and update speaking state
with self._speaking_lock:
if self.is_speaking:
logger.warning("Already speaking, ignoring new request")
return False
# Set speaking state before starting thread
self.is_speaking = True
logger.info(f"TTSAdapter.speak called with text: {text[:50]}{'...' if len(text) > 50 else ''}")
# Emit speaking started signal on the main thread
self.speaking_started.emit()
# Start speaking in a separate thread
speak_thread = threading.Thread(target=self._speak_thread, args=(text,), daemon=True)
speak_thread.start()
logger.debug("Started _speak_thread")
return True
def emit_audio_level(self):
"""Emit audio level signal for visualization"""
# Use the lock to safely check the speaking state
with self._speaking_lock:
is_speaking = self.is_speaking
if not is_speaking:
if hasattr(self, 'audio_level_timer') and self.audio_level_timer.isActive():
self.audio_level_timer.stop()
self.audio_level.emit(0.0) # Reset to zero when not speaking
return
# When speaking, we don't need to emit actual levels since we're using pre-recorded patterns
# Just emit a dummy signal to trigger visualization updates
self.audio_level.emit(0.5)
def _speak_thread(self, text):
"""Thread function for speaking text"""
try:
logger.info(f"_speak_thread started for text: {text[:50]}{'...' if len(text) > 50 else ''}")
# Use the TTS engine's speak method
if hasattr(self.tts_engine, 'speak'):
# This is one of our adapters
logger.info("Using TTS adapter speak method")
try:
result = self.tts_engine.speak(text)
logger.info(f"TTS speak result: {result}")
if not result:
logger.error("TTS failed")
except Exception as e:
logger.error(f"Exception in TTS speak: {e}", exc_info=True)
result = False
elif hasattr(self.tts_engine, 'say'):
# This is direct pyttsx3
logger.info("Using direct pyttsx3 say method")
self.tts_engine.say(text)
self.tts_engine.runAndWait()
logger.info("pyttsx3 speech completed")
else:
logger.error("TTS engine does not have speak or say method")
logger.info("Speech completed")
except Exception as e:
logger.error(f"Error during text-to-speech: {e}", exc_info=True)
finally:
# Use the lock to safely update the speaking state
with self._speaking_lock:
self.is_speaking = False
# Emit the signal after releasing the lock
self.speaking_finished.emit()
logger.info("Speaking finished signal emitted")
def set_voice(self, voice_id):
"""Set the voice to use for TTS"""
if not self.tts_engine:
logger.warning("No TTS engine available")
return False
try:
if hasattr(self.tts_engine, 'set_voice'):
# This is one of our adapters
result = self.tts_engine.set_voice(voice_id)
if result:
self.current_voice = voice_id
logger.info(f"Voice set to: {voice_id}")
return True
else:
logger.error(f"Failed to set voice to: {voice_id}")
return False
elif hasattr(self.tts_engine, 'setProperty'):
# This is direct pyttsx3
# Extract the voice ID from the format "pyttsx3:voice_id"
if voice_id.startswith("pyttsx3:"):
voice_id = voice_id.split(":", 1)[1]
# Find the voice object
for voice in self.tts_engine.getProperty('voices'):
if voice.id == voice_id:
self.tts_engine.setProperty('voice', voice.id)
self.current_voice = f"pyttsx3:{voice.id}"
logger.info(f"Voice set to: {voice.name}")
return True
logger.error(f"Voice not found: {voice_id}")
return False
logger.warning("TTS engine does not support voice selection")
return False
except Exception as e:
logger.error(f"Error setting voice: {e}")
return False
def get_available_voices(self):
"""Get a list of available voices"""
return self.available_voices
def get_current_voice(self):
"""Get the current voice"""
return self.current_voice