"""
Tool schemas for the Stem MCP Server.
Defines the input/output schemas for all available audio processing tools.
"""
TOOLS_SCHEMA = [
{
"name": "generate_stems",
"description": "Generate stems from an audio file using AI source separation. Supports multiple models including Demucs for high-quality stem separation.",
"inputSchema": {
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the input audio file (WAV, MP3, FLAC, etc.)"
},
"output_dir": {
"type": "string",
"description": "Directory to save the generated stems (default: current directory)",
"default": "."
},
"model_type": {
"type": "string",
"description": "AI model to use for stem separation",
"enum": ["htdemucs", "htdemucs_ft", "htdemucs_6s", "mdx", "mdx_extra"],
"default": "htdemucs"
},
"num_stems": {
"type": "integer",
"description": "Number of stems to generate (typically 4: vocals, drums, bass, other)",
"minimum": 2,
"maximum": 6,
"default": 4
}
},
"required": ["audio_path"]
}
},
{
"name": "split_stems",
"description": "Split existing stems into smaller segments for easier handling or loop creation.",
"inputSchema": {
"type": "object",
"properties": {
"stem_path": {
"type": "string",
"description": "Path to the stem file to split"
},
"output_dir": {
"type": "string",
"description": "Directory to save the split segments (default: current directory)",
"default": "."
},
"segment_length": {
"type": "number",
"description": "Length of each segment in seconds",
"minimum": 1.0,
"maximum": 300.0,
"default": 30.0
},
"overlap": {
"type": "number",
"description": "Overlap between segments in seconds",
"minimum": 0.0,
"maximum": 10.0,
"default": 0.0
}
},
"required": ["stem_path"]
}
},
{
"name": "create_loop",
"description": "Create seamless loops from audio segments with crossfading and tempo matching.",
"inputSchema": {
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the input audio file"
},
"output_path": {
"type": "string",
"description": "Path for the output loop file (optional)"
},
"loop_duration": {
"type": "number",
"description": "Duration of the loop in seconds",
"minimum": 0.5,
"maximum": 60.0,
"default": 4.0
},
"bpm": {
"type": "number",
"description": "Target BPM for the loop (optional, auto-detected if not provided)",
"minimum": 60.0,
"maximum": 200.0
},
"crossfade_duration": {
"type": "number",
"description": "Crossfade duration in seconds for seamless looping",
"minimum": 0.0,
"maximum": 2.0,
"default": 0.1
}
},
"required": ["audio_path"]
}
},
{
"name": "analyze_audio",
"description": "Analyze audio file for musical features including tempo, key, dynamics, and spectral characteristics.",
"inputSchema": {
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the audio file to analyze"
}
},
"required": ["audio_path"]
}
},
{
"name": "extract_vocal",
"description": "Extract the vocal track from a mixed audio file.",
"inputSchema": {
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the input audio file"
},
"output_path": {
"type": "string",
"description": "Path for the output vocal file (optional)"
},
"method": {
"type": "string",
"description": "Method to use for vocal extraction",
"enum": ["demucs", "librosa", "spectral"],
"default": "demucs"
}
},
"required": ["audio_path"]
}
},
{
"name": "isolate_instrument",
"description": "Isolate a specific instrument from a mixed audio file.",
"inputSchema": {
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the input audio file"
},
"instrument": {
"type": "string",
"description": "Instrument to isolate",
"enum": ["drums", "bass", "guitar", "piano", "vocals", "other"],
"default": "drums"
},
"output_path": {
"type": "string",
"description": "Path for the output instrument file (optional)"
},
"method": {
"type": "string",
"description": "Method to use for instrument isolation",
"enum": ["demucs", "librosa", "spectral"],
"default": "demucs"
}
},
"required": ["audio_path"]
}
},
{
"name": "separate_vocal_ranges",
"description": "Separate vocal track into different vocal ranges: Soprano (C4-C6), Alto (G3-G5), Tenor (C3-C5), and Bass (E2-E4). Uses advanced frequency filtering and harmonic analysis.",
"inputSchema": {
"type": "object",
"properties": {
"audio_path": {
"type": "string",
"description": "Path to the vocal audio file (preferably isolated vocals)"
},
"output_dir": {
"type": "string",
"description": "Directory to save the separated vocal ranges (default: current directory)",
"default": "."
},
"ranges": {
"type": "array",
"description": "Vocal ranges to extract",
"items": {
"type": "string",
"enum": ["soprano", "alto", "tenor", "bass"]
},
"default": ["soprano", "alto", "tenor", "bass"]
},
"method": {
"type": "string",
"description": "Method for vocal range separation",
"enum": ["frequency_bands", "harmonic_analysis", "spectral_filtering"],
"default": "harmonic_analysis"
},
"enhance_separation": {
"type": "boolean",
"description": "Apply additional processing to enhance vocal range separation",
"default": True
}
},
"required": ["audio_path"]
}
}
]