vocal_demo.py•6.82 kB
#!/usr/bin/env python3
"""
🎤 Vocal Range Separation Demo
=============================
This demo shows the new vocal range separation feature that can split
vocals into Soprano, Alto, Tenor, and Bass parts.
"""
import asyncio
import sys
import numpy as np
import soundfile as sf
from pathlib import Path
# Add the src directory to Python path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from stem_mcp.audio_processor import AudioProcessor
def create_vocal_test_audio():
"""Create test audio with multiple vocal ranges"""
print("🎵 Creating multi-vocal test audio...")
duration = 8.0 # seconds
sample_rate = 44100
t = np.linspace(0, duration, int(sample_rate * duration))
# Create different vocal ranges with harmonic content
# Soprano range (C4-C6): 261.63 - 1046.50 Hz
soprano_fundamental = 440 # A4
soprano = 0.3 * (np.sin(2 * np.pi * soprano_fundamental * t) +
0.5 * np.sin(2 * np.pi * soprano_fundamental * 2 * t) + # 2nd harmonic
0.3 * np.sin(2 * np.pi * soprano_fundamental * 3 * t)) # 3rd harmonic
# Alto range (G3-G5): 196.00 - 783.99 Hz
alto_fundamental = 329.63 # E4
alto = 0.25 * (np.sin(2 * np.pi * alto_fundamental * t) +
0.4 * np.sin(2 * np.pi * alto_fundamental * 2 * t) +
0.2 * np.sin(2 * np.pi * alto_fundamental * 3 * t))
# Tenor range (C3-C5): 130.81 - 523.25 Hz
tenor_fundamental = 220 # A3
tenor = 0.35 * (np.sin(2 * np.pi * tenor_fundamental * t) +
0.6 * np.sin(2 * np.pi * tenor_fundamental * 2 * t) +
0.25 * np.sin(2 * np.pi * tenor_fundamental * 3 * t))
# Bass range (E2-E4): 82.41 - 329.63 Hz
bass_fundamental = 110 # A2
bass = 0.4 * (np.sin(2 * np.pi * bass_fundamental * t) +
0.7 * np.sin(2 * np.pi * bass_fundamental * 2 * t) +
0.3 * np.sin(2 * np.pi * bass_fundamental * 3 * t))
# Create envelopes for more realistic vocals
# Different timing for each voice
soprano_envelope = np.exp(-0.3 * (t - 1)) * (t > 1) * (t < 7)
alto_envelope = np.exp(-0.25 * (t - 0.5)) * (t > 0.5) * (t < 7.5)
tenor_envelope = np.exp(-0.2 * (t - 0)) * (t > 0) * (t < 8)
bass_envelope = np.exp(-0.15 * (t - 0.2)) * (t > 0.2) * (t < 7.8)
# Apply envelopes
soprano *= soprano_envelope
alto *= alto_envelope
tenor *= tenor_envelope
bass *= bass_envelope
# Mix all voices together
mixed_vocals = soprano + alto + tenor + bass
# Add some formant-like resonance
# Apply subtle filtering to simulate vocal tract
from scipy import signal
# Simple formant simulation
b, a = signal.butter(2, [200, 3000], btype='band', fs=sample_rate)
mixed_vocals = signal.filtfilt(b, a, mixed_vocals)
# Normalize
mixed_vocals = mixed_vocals / np.max(np.abs(mixed_vocals)) * 0.8
# Make stereo
stereo_vocals = np.column_stack([mixed_vocals, mixed_vocals])
# Save the test file
test_file = "examples/multi_vocal_test.wav"
Path("examples").mkdir(exist_ok=True)
sf.write(test_file, stereo_vocals, sample_rate)
print(f"✅ Created multi-vocal test audio: {test_file}")
print(f" Contains: Soprano ({soprano_fundamental}Hz), Alto ({alto_fundamental}Hz)")
print(f" Tenor ({tenor_fundamental}Hz), Bass ({bass_fundamental}Hz)")
print(f" Duration: {duration} seconds")
return test_file
async def demo_vocal_separation():
"""Demo the vocal range separation feature"""
print("\n" + "="*60)
print("🎤 VOCAL RANGE SEPARATION DEMO")
print("="*60)
# Create test audio with multiple vocal ranges
test_file = create_vocal_test_audio()
# Initialize processor
print(f"\n⚡ Initializing audio processor...")
processor = AudioProcessor()
print(f"✅ Processor ready (device: {processor.device})")
# Test different separation methods
methods = ["harmonic_analysis", "frequency_bands", "spectral_filtering"]
for method in methods:
print(f"\n🔬 Testing method: {method}")
print("-" * 40)
try:
result = await processor.separate_vocal_ranges(
audio_path=test_file,
output_dir=f"examples/vocal_ranges_{method}",
ranges=["soprano", "alto", "tenor", "bass"],
method=method,
enhance_separation=True
)
print("✅ Separation complete!")
print(result)
except Exception as e:
print(f"❌ Method {method} failed: {e}")
# Test with custom ranges
print(f"\n🎯 Testing custom range selection (Soprano + Tenor only)")
print("-" * 50)
try:
result = await processor.separate_vocal_ranges(
audio_path=test_file,
output_dir="examples/vocal_ranges_custom",
ranges=["soprano", "tenor"], # Only these two
method="harmonic_analysis",
enhance_separation=True
)
print("✅ Custom range separation complete!")
print(result)
except Exception as e:
print(f"❌ Custom range separation failed: {e}")
# Show all created files
print(f"\n📁 Created Files:")
examples_path = Path("examples")
vocal_files = list(examples_path.rglob("*vocal*"))
for file_path in sorted(vocal_files):
if file_path.is_file():
size_mb = file_path.stat().st_size / (1024 * 1024)
print(f" 📄 {file_path} ({size_mb:.2f} MB)")
# Show example MCP usage
print(f"\n🛠️ Example MCP Usage:")
print("="*40)
print('> "I have a vocal track at /path/to/vocals.wav. Please separate it into')
print(' soprano, alto, tenor, and bass parts using harmonic analysis."')
print()
print("MCP Call:")
print('{')
print(' "tool": "separate_vocal_ranges",')
print(' "arguments": {')
print(' "audio_path": "/path/to/vocals.wav",')
print(' "output_dir": "vocal_parts",')
print(' "ranges": ["soprano", "alto", "tenor", "bass"],')
print(' "method": "harmonic_analysis",')
print(' "enhance_separation": true')
print(' }')
print('}')
print(f"\n🎉 Vocal Range Separation Demo Complete!")
print(f"Your MCP server now has advanced vocal processing capabilities!")
if __name__ == "__main__":
try:
import scipy
asyncio.run(demo_vocal_separation())
except ImportError:
print("❌ This demo requires scipy. Install it with:")
print(" pip install scipy")
print("\nOr run the basic version without vocal synthesis...")