#!/usr/bin/env python3
"""
GPT-SoVITS MCP Server
Provides MCP tools for interacting with GPT-SoVITS API endpoints
"""
import asyncio
import httpx
from typing import List, Optional, Dict, Any
from fastapi_mcp import FastAPIClient, Tool
import json
class GPTSoVITSMCPServer:
"""MCP Server for GPT-SoVITS API integration"""
def __init__(self, base_url: str = "http://localhost:9880"):
self.base_url = base_url
self.client = httpx.AsyncClient()
self.tools = []
self._setup_tools()
def _setup_tools(self):
"""Setup MCP tools based on GPT-SoVITS API endpoints"""
# TTS Inference Tool
tts_tool = Tool(
name="gpt_sovits_tts",
description="Generate speech from text using GPT-SoVITS",
parameters={
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Text to synthesize into speech"
},
"text_lang": {
"type": "string",
"enum": ["zh", "en", "ja", "ko", "yue"],
"description": "Language of the input text",
"default": "zh"
},
"refer_wav_path": {
"type": "string",
"description": "Path to reference audio file for voice cloning",
"default": ""
},
"prompt_text": {
"type": "string",
"description": "Prompt text matching the reference audio",
"default": ""
},
"top_k": {
"type": "integer",
"description": "Top-k sampling parameter",
"default": 5
},
"top_p": {
"type": "number",
"description": "Top-p sampling parameter",
"default": 1.0
},
"temperature": {
"type": "number",
"description": "Temperature for sampling",
"default": 1.0
},
"format": {
"type": "string",
"enum": ["wav", "ogg", "aac"],
"description": "Output audio format",
"default": "wav"
},
"streaming": {
"type": "boolean",
"description": "Enable streaming audio output",
"default": False
}
},
"required": ["text"]
},
handler=self._handle_tts
)
# Model Management Tool
model_tool = Tool(
name="gpt_sovits_set_model",
description="Set GPT and SoVITS model weights",
parameters={
"type": "object",
"properties": {
"gpt_model_path": {
"type": "string",
"description": "Path to GPT model file"
},
"sovits_model_path": {
"type": "string",
"description": "Path to SoVITS model file"
}
},
"required": ["gpt_model_path", "sovits_model_path"]
},
handler=self._handle_set_model
)
# Reference Audio Tool
refer_tool = Tool(
name="gpt_sovits_set_reference",
description="Set default reference audio and prompt text",
parameters={
"type": "object",
"properties": {
"refer_wav_path": {
"type": "string",
"description": "Path to reference audio file"
},
"prompt_text": {
"type": "string",
"description": "Prompt text matching the reference audio"
},
"prompt_lang": {
"type": "string",
"enum": ["zh", "en", "ja", "ko", "yue"],
"description": "Language of the prompt text",
"default": "zh"
}
},
"required": ["refer_wav_path", "prompt_text"]
},
handler=self._handle_set_reference
)
# Server Control Tool
control_tool = Tool(
name="gpt_sovits_control",
description="Control GPT-SoVITS server (restart/shutdown)",
parameters={
"type": "object",
"properties": {
"command": {
"type": "string",
"enum": ["restart", "shutdown"],
"description": "Control command to execute"
}
},
"required": ["command"]
},
handler=self._handle_control
)
self.tools = [tts_tool, model_tool, refer_tool, control_tool]
async def _handle_tts(self, **kwargs) -> Dict[str, Any]:
"""Handle TTS inference request"""
try:
# Prepare request data
params = {
"text": kwargs.get("text"),
"text_lang": kwargs.get("text_lang", "zh"),
"refer_wav_path": kwargs.get("refer_wav_path", ""),
"prompt_text": kwargs.get("prompt_text", ""),
"top_k": kwargs.get("top_k", 5),
"top_p": kwargs.get("top_p", 1.0),
"temperature": kwargs.get("temperature", 1.0),
"format": kwargs.get("format", "wav"),
"streaming": kwargs.get("streaming", False)
}
# Make API request
response = await self.client.post(f"{self.base_url}/", json=params)
response.raise_for_status()
if params["streaming"]:
return {
"status": "success",
"message": "Streaming audio response initiated",
"stream_url": f"{self.base_url}/",
"content_type": f"audio/{params['format']}"
}
else:
return {
"status": "success",
"message": "Audio generated successfully",
"audio_data": response.content.hex(),
"content_type": f"audio/{params['format']}"
}
except Exception as e:
return {
"status": "error",
"message": f"TTS generation failed: {str(e)}"
}
async def _handle_set_model(self, **kwargs) -> Dict[str, Any]:
"""Handle model setting request"""
try:
params = {
"gpt_model_path": kwargs.get("gpt_model_path"),
"sovits_model_path": kwargs.get("sovits_model_path")
}
response = await self.client.post(f"{self.base_url}/set_model", json=params)
response.raise_for_status()
return {
"status": "success",
"message": "Model weights updated successfully"
}
except Exception as e:
return {
"status": "error",
"message": f"Model setting failed: {str(e)}"
}
async def _handle_set_reference(self, **kwargs) -> Dict[str, Any]:
"""Handle reference audio setting request"""
try:
params = {
"refer_wav_path": kwargs.get("refer_wav_path"),
"prompt_text": kwargs.get("prompt_text"),
"prompt_lang": kwargs.get("prompt_lang", "zh")
}
response = await self.client.post(f"{self.base_url}/change_refer", json=params)
response.raise_for_status()
return {
"status": "success",
"message": "Reference audio updated successfully"
}
except Exception as e:
return {
"status": "error",
"message": f"Reference setting failed: {str(e)}"
}
async def _handle_control(self, **kwargs) -> Dict[str, Any]:
"""Handle server control request"""
try:
command = kwargs.get("command")
params = {"command": command}
response = await self.client.post(f"{self.base_url}/control", json=params)
response.raise_for_status()
return {
"status": "success",
"message": f"Server {command} command executed successfully"
}
except Exception as e:
return {
"status": "error",
"message": f"Control command failed: {str(e)}"
}
def get_tools(self) -> List[Tool]:
"""Get list of available MCP tools"""
return self.tools
async def close(self):
"""Clean up resources"""
await self.client.aclose()
def main():
"""Main entry point for the MCP server"""
import argparse
parser = argparse.ArgumentParser(description="GPT-SoVITS MCP Server")
parser.add_argument("--base-url", default="http://localhost:9880",
help="Base URL for GPT-SoVITS API server")
parser.add_argument("--port", type=int, default=8000,
help="Port to run MCP server on")
args = parser.parse_args()
# Create MCP server instance
mcp_server = GPTSoVITSMCPServer(base_url=args.base_url)
print(f"GPT-SoVITS MCP Server starting...")
print(f"Connecting to GPT-SoVITS API at: {args.base_url}")
print(f"Available tools: {len(mcp_server.get_tools())}")
for tool in mcp_server.get_tools():
print(f" - {tool.name}: {tool.description}")
# Start the server (this would typically integrate with an MCP runtime)
print(f"MCP Server ready on port {args.port}")
if __name__ == "__main__":
main()