Stem MCP Server

vocal_demo.py•6.82 kB

#!/usr/bin/env python3 """ 🎤 Vocal Range Separation Demo ============================= This demo shows the new vocal range separation feature that can split vocals into Soprano, Alto, Tenor, and Bass parts. """ import asyncio import sys import numpy as np import soundfile as sf from pathlib import Path # Add the src directory to Python path sys.path.insert(0, str(Path(__file__).parent / "src")) from stem_mcp.audio_processor import AudioProcessor def create_vocal_test_audio(): """Create test audio with multiple vocal ranges""" print("🎵 Creating multi-vocal test audio...") duration = 8.0 # seconds sample_rate = 44100 t = np.linspace(0, duration, int(sample_rate * duration)) # Create different vocal ranges with harmonic content # Soprano range (C4-C6): 261.63 - 1046.50 Hz soprano_fundamental = 440 # A4 soprano = 0.3 * (np.sin(2 * np.pi * soprano_fundamental * t) + 0.5 * np.sin(2 * np.pi * soprano_fundamental * 2 * t) + # 2nd harmonic 0.3 * np.sin(2 * np.pi * soprano_fundamental * 3 * t)) # 3rd harmonic # Alto range (G3-G5): 196.00 - 783.99 Hz alto_fundamental = 329.63 # E4 alto = 0.25 * (np.sin(2 * np.pi * alto_fundamental * t) + 0.4 * np.sin(2 * np.pi * alto_fundamental * 2 * t) + 0.2 * np.sin(2 * np.pi * alto_fundamental * 3 * t)) # Tenor range (C3-C5): 130.81 - 523.25 Hz tenor_fundamental = 220 # A3 tenor = 0.35 * (np.sin(2 * np.pi * tenor_fundamental * t) + 0.6 * np.sin(2 * np.pi * tenor_fundamental * 2 * t) + 0.25 * np.sin(2 * np.pi * tenor_fundamental * 3 * t)) # Bass range (E2-E4): 82.41 - 329.63 Hz bass_fundamental = 110 # A2 bass = 0.4 * (np.sin(2 * np.pi * bass_fundamental * t) + 0.7 * np.sin(2 * np.pi * bass_fundamental * 2 * t) + 0.3 * np.sin(2 * np.pi * bass_fundamental * 3 * t)) # Create envelopes for more realistic vocals # Different timing for each voice soprano_envelope = np.exp(-0.3 * (t - 1)) * (t > 1) * (t < 7) alto_envelope = np.exp(-0.25 * (t - 0.5)) * (t > 0.5) * (t < 7.5) tenor_envelope = np.exp(-0.2 * (t - 0)) * (t > 0) * (t < 8) bass_envelope = np.exp(-0.15 * (t - 0.2)) * (t > 0.2) * (t < 7.8) # Apply envelopes soprano *= soprano_envelope alto *= alto_envelope tenor *= tenor_envelope bass *= bass_envelope # Mix all voices together mixed_vocals = soprano + alto + tenor + bass # Add some formant-like resonance # Apply subtle filtering to simulate vocal tract from scipy import signal # Simple formant simulation b, a = signal.butter(2, [200, 3000], btype='band', fs=sample_rate) mixed_vocals = signal.filtfilt(b, a, mixed_vocals) # Normalize mixed_vocals = mixed_vocals / np.max(np.abs(mixed_vocals)) * 0.8 # Make stereo stereo_vocals = np.column_stack([mixed_vocals, mixed_vocals]) # Save the test file test_file = "examples/multi_vocal_test.wav" Path("examples").mkdir(exist_ok=True) sf.write(test_file, stereo_vocals, sample_rate) print(f"✅ Created multi-vocal test audio: {test_file}") print(f" Contains: Soprano ({soprano_fundamental}Hz), Alto ({alto_fundamental}Hz)") print(f" Tenor ({tenor_fundamental}Hz), Bass ({bass_fundamental}Hz)") print(f" Duration: {duration} seconds") return test_file async def demo_vocal_separation(): """Demo the vocal range separation feature""" print("\n" + "="*60) print("🎤 VOCAL RANGE SEPARATION DEMO") print("="*60) # Create test audio with multiple vocal ranges test_file = create_vocal_test_audio() # Initialize processor print(f"\n⚡ Initializing audio processor...") processor = AudioProcessor() print(f"✅ Processor ready (device: {processor.device})") # Test different separation methods methods = ["harmonic_analysis", "frequency_bands", "spectral_filtering"] for method in methods: print(f"\n🔬 Testing method: {method}") print("-" * 40) try: result = await processor.separate_vocal_ranges( audio_path=test_file, output_dir=f"examples/vocal_ranges_{method}", ranges=["soprano", "alto", "tenor", "bass"], method=method, enhance_separation=True ) print("✅ Separation complete!") print(result) except Exception as e: print(f"❌ Method {method} failed: {e}") # Test with custom ranges print(f"\n🎯 Testing custom range selection (Soprano + Tenor only)") print("-" * 50) try: result = await processor.separate_vocal_ranges( audio_path=test_file, output_dir="examples/vocal_ranges_custom", ranges=["soprano", "tenor"], # Only these two method="harmonic_analysis", enhance_separation=True ) print("✅ Custom range separation complete!") print(result) except Exception as e: print(f"❌ Custom range separation failed: {e}") # Show all created files print(f"\n📁 Created Files:") examples_path = Path("examples") vocal_files = list(examples_path.rglob("*vocal*")) for file_path in sorted(vocal_files): if file_path.is_file(): size_mb = file_path.stat().st_size / (1024 * 1024) print(f" 📄 {file_path} ({size_mb:.2f} MB)") # Show example MCP usage print(f"\n🛠️ Example MCP Usage:") print("="*40) print('> "I have a vocal track at /path/to/vocals.wav. Please separate it into') print(' soprano, alto, tenor, and bass parts using harmonic analysis."') print() print("MCP Call:") print('{') print(' "tool": "separate_vocal_ranges",') print(' "arguments": {') print(' "audio_path": "/path/to/vocals.wav",') print(' "output_dir": "vocal_parts",') print(' "ranges": ["soprano", "alto", "tenor", "bass"],') print(' "method": "harmonic_analysis",') print(' "enhance_separation": true') print(' }') print('}') print(f"\n🎉 Vocal Range Separation Demo Complete!") print(f"Your MCP server now has advanced vocal processing capabilities!") if __name__ == "__main__": try: import scipy asyncio.run(demo_vocal_separation()) except ImportError: print("❌ This demo requires scipy. Install it with:") print(" pip install scipy") print("\nOr run the basic version without vocal synthesis...")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/tolutronics/audio-processing-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server