"""Test ML models directly."""
import sys
import os
# Setup NVIDIA DLLs
if sys.platform == "win32" and hasattr(os, "add_dll_directory"):
from pathlib import Path
venv_nvidia = Path(sys.prefix) / "Lib" / "site-packages" / "nvidia"
if venv_nvidia.exists():
for pkg_dir in venv_nvidia.iterdir():
if pkg_dir.is_dir():
bin_dir = pkg_dir / "bin"
if bin_dir.exists():
try:
os.add_dll_directory(str(bin_dir.absolute()))
except Exception:
pass
def test_tts():
"""Test Pocket TTS."""
print("\n=== Testing Pocket TTS ===")
try:
from pocket_tts import TTSModel
print("Loading model...")
model = TTSModel.load_model()
print(f"Sample rate: {model.sample_rate}")
# Load voice
print("Loading voice...")
voice_state = model.get_state_for_audio_prompt(
"hf://kyutai/tts-voices/voice-donations/Selfie.wav"
)
print("Voice loaded!")
# Synthesize
print("Synthesizing speech...")
audio = model.generate_audio(voice_state, "Hello, this is a test.")
print(f"Generated {len(audio.numpy())} samples")
print("✓ TTS OK")
return True
except Exception as e:
print(f"✗ TTS FAILED: {e}")
import traceback
traceback.print_exc()
return False
def test_asr():
"""Test Parakeet ASR."""
print("\n=== Testing Parakeet ASR ===")
try:
import onnx_asr
import numpy as np
# Suppress ORT warnings
os.environ.setdefault("ORT_LOGGING_LEVEL", "ERROR")
print("Loading model...")
model = onnx_asr.load_model("nemo-parakeet-tdt-0.6b-v3")
print("Model loaded!")
# Create test audio (1 second of silence)
audio = np.zeros(16000, dtype=np.float32)
print("Transcribing silence...")
result = model.recognize(audio, sample_rate=16000)
print(f"Result: {result}")
print("✓ ASR OK")
return True
except Exception as e:
print(f"✗ ASR FAILED: {e}")
import traceback
traceback.print_exc()
return False
def test_ser():
"""Test SenseVoice SER."""
print("\n=== Testing SenseVoice SER ===")
try:
from funasr import AutoModel
import numpy as np
print("Loading model...")
model = AutoModel(
model="FunAudioLLM/SenseVoiceSmall",
trust_remote_code=True,
)
print("Model loaded!")
# Create test audio (1 second of silence)
audio = np.zeros(16000, dtype=np.float32)
print("Detecting emotion...")
result = model.generate(input=audio, language="auto", use_itn=False)
print(f"Result: {result}")
print("✓ SER OK")
return True
except Exception as e:
print(f"✗ SER FAILED: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
results = {}
# Test each model
results["TTS"] = test_tts()
results["ASR"] = test_asr()
results["SER"] = test_ser()
# Summary
print("\n" + "=" * 40)
print("SUMMARY")
print("=" * 40)
for name, ok in results.items():
status = "✓ PASS" if ok else "✗ FAIL"
print(f" {name}: {status}")
# Exit with error if any failed
if not all(results.values()):
sys.exit(1)