#!/usr/bin/env python3
"""
GPT-SoVITS FastAPI-MCP Server
Uses FastAPI-MCP to automatically generate MCP tools from GPT-SoVITS API
"""
from fastapi import FastAPI, HTTPException
from fastapi_mcp import FastApiMCP
import httpx
import uvicorn
from typing import Dict, Any, Optional
from pydantic import BaseModel
import json
import os
import platform
from pathlib import Path
# Pydantic models for API requests
class TTSRequest(BaseModel):
text: str
text_lang: str = "zh"
refer_wav_path: str = ""
prompt_text: str = ""
top_k: int = 5
top_p: float = 1.0
temperature: float = 1.0
format: str = "wav"
streaming: bool = False
class ModelRequest(BaseModel):
gpt_model_path: str
sovits_model_path: str
class ReferenceRequest(BaseModel):
refer_wav_path: str
prompt_text: str
prompt_lang: str = "zh"
class ControlRequest(BaseModel):
command: str
class SimpleTTSRequest(BaseModel):
text: str
lang: str = "ja"
class PresetTTSRequest(BaseModel):
text: str
voice_preset: str = "default"
lang: str = "ja"
class CharacterTTSRequest(BaseModel):
text: str
character: str = "default"
lang: str = "ja"
emotion: Optional[str] = None # 将来的な感情制御用
class GPTSoVITSProxy:
"""Proxy class to handle GPT-SoVITS API communication"""
def __init__(self, base_url: str = "http://localhost:9880"):
self.base_url = base_url
self.client = httpx.AsyncClient(timeout=30.0)
async def forward_request(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]:
"""Forward request to GPT-SoVITS API"""
try:
url = f"{self.base_url}{endpoint}"
response = await self.client.post(url, json=data)
response.raise_for_status()
# Handle different response types
content_type = response.headers.get("content-type", "")
if "audio/" in content_type:
return {
"status": "success",
"message": "Audio generated successfully",
"content_type": content_type,
"audio_size": len(response.content)
}
else:
try:
return response.json()
except:
return {
"status": "success",
"message": "Request completed successfully"
}
except httpx.HTTPError as e:
raise HTTPException(status_code=500, detail=f"API request failed: {str(e)}")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
# Create FastAPI app
app = FastAPI(
title="GPT-SoVITS MCP API",
description="MCP-enabled API proxy for GPT-SoVITS text-to-speech synthesis",
version="1.0.0"
)
# Cross-platform path configuration
def get_voices_base_path():
"""Get the base path for voice files, supporting environment variables and cross-platform paths"""
# Check environment variable first
voices_path = os.getenv("SOVITS_VOICES_PATH")
if voices_path:
return Path(voices_path)
# Default paths based on platform
if platform.system() == "Windows":
return Path("voices/characters")
else:
return Path("voices/characters")
def get_voice_file_path(filename: str) -> str:
"""Get cross-platform voice file path"""
if not filename:
return ""
base_path = get_voices_base_path()
full_path = base_path / filename
return str(full_path)
# Character voice presets configuration
CHARACTER_PRESETS = {
"default": {
"name": "デフォルト",
"description": "システムデフォルトの音声",
"refer_wav_path": "",
"prompt_text": "",
"prompt_lang": "ja",
"personality": "neutral"
},
"yuki": {
"name": "雪菜(ゆきな)",
"description": "明るく元気な女性の声。アニメキャラクター風",
"refer_wav_path": get_voice_file_path("yuki.wav"),
"prompt_text": "おはよう!今日も一緒に頑張ろうね!",
"prompt_lang": "ja",
"personality": "cheerful"
},
"hiroshi": {
"name": "博(ひろし)",
"description": "落ち着いた大人の男性の声。ビジネス向け",
"refer_wav_path": get_voice_file_path("hiroshi.wav"),
"prompt_text": "お疲れ様です。会議の資料を確認いたします",
"prompt_lang": "ja",
"personality": "professional"
},
"akane": {
"name": "茜(あかね)",
"description": "優しく丁寧な女性の声。接客・案内向け",
"refer_wav_path": get_voice_file_path("akane.wav"),
"prompt_text": "いらっしゃいませ。何かお手伝いできることはございますか",
"prompt_lang": "ja",
"personality": "polite"
},
"takeshi": {
"name": "武(たけし)",
"description": "力強く頼もしい男性の声。アナウンス向け",
"refer_wav_path": get_voice_file_path("takeshi.wav"),
"prompt_text": "こちらは緊急放送です。落ち着いて行動してください",
"prompt_lang": "ja",
"personality": "authoritative"
},
"miku": {
"name": "美紅(みく)",
"description": "可愛らしい少女の声。エンターテイメント向け",
"refer_wav_path": get_voice_file_path("miku.wav"),
"prompt_text": "みんなー!一緒に歌おうよ〜♪",
"prompt_lang": "ja",
"personality": "cute"
},
"sensei": {
"name": "先生",
"description": "知的で教育的な中性的な声。学習・説明向け",
"refer_wav_path": get_voice_file_path("sensei.wav"),
"prompt_text": "それでは今日の授業を始めましょう。よろしくお願いします",
"prompt_lang": "ja",
"personality": "educational"
}
}
# Initialize proxy
proxy = GPTSoVITSProxy()
# Create FastAPI-MCP instance
mcp = FastApiMCP(app)
@app.post("/tts", summary="Text-to-Speech Synthesis")
async def text_to_speech(request: TTSRequest) -> Dict[str, Any]:
"""
Generate speech from text using GPT-SoVITS.
This endpoint synthesizes speech from input text with optional voice cloning
using reference audio and prompt text.
"""
return await proxy.forward_request("/", request.dict())
@app.post("/set_model", summary="Set Model Weights")
async def set_model(request: ModelRequest) -> Dict[str, Any]:
"""
Set GPT and SoVITS model weights.
Updates the model weights used for text-to-speech synthesis.
"""
return await proxy.forward_request("/set_model", request.dict())
@app.post("/set_reference", summary="Set Reference Audio")
async def set_reference(request: ReferenceRequest) -> Dict[str, Any]:
"""
Set default reference audio and prompt text for voice cloning.
This sets the default voice characteristics that will be used for synthesis.
"""
return await proxy.forward_request("/change_refer", request.dict())
@app.post("/control", summary="Server Control")
async def control_server(request: ControlRequest) -> Dict[str, Any]:
"""
Control GPT-SoVITS server operations.
Allows restarting or shutting down the GPT-SoVITS server.
"""
if request.command not in ["restart", "shutdown"]:
raise HTTPException(status_code=400, detail="Invalid command. Use 'restart' or 'shutdown'")
return await proxy.forward_request("/control", request.dict())
@app.post("/tts_simple", summary="Simple Text-to-Speech")
async def text_to_speech_simple(request: SimpleTTSRequest) -> Dict[str, Any]:
"""
Simple text-to-speech synthesis with minimal parameters.
Uses default settings for quick voice generation.
"""
full_request = TTSRequest(
text=request.text,
text_lang=request.lang,
refer_wav_path="",
prompt_text="",
top_k=5,
top_p=1.0,
temperature=1.0,
format="wav",
streaming=False
)
return await proxy.forward_request("/", full_request.dict())
@app.post("/tts_preset", summary="Text-to-Speech with Voice Presets")
async def text_to_speech_preset(request: PresetTTSRequest) -> Dict[str, Any]:
"""
Generate speech using predefined voice presets.
Available presets: default, announcer, assistant
"""
if request.voice_preset not in CHARACTER_PRESETS:
raise HTTPException(
status_code=400,
detail=f"Unknown voice preset: {request.voice_preset}. Available: {list(CHARACTER_PRESETS.keys())}"
)
preset = CHARACTER_PRESETS[request.voice_preset]
full_request = TTSRequest(
text=request.text,
text_lang=request.lang,
refer_wav_path=preset["refer_wav_path"],
prompt_text=preset["prompt_text"],
top_k=5,
top_p=1.0,
temperature=1.0,
format="wav",
streaming=False
)
return await proxy.forward_request("/", full_request.dict())
@app.post("/tts_character", summary="Text-to-Speech with Character Voices")
async def text_to_speech_character(request: CharacterTTSRequest) -> Dict[str, Any]:
"""
Generate speech using predefined character voices.
Available characters: yuki (元気), hiroshi (ビジネス), akane (丁寧),
takeshi (力強い), miku (可愛い), sensei (教育的)
"""
if request.character not in CHARACTER_PRESETS:
raise HTTPException(
status_code=400,
detail=f"Unknown character: {request.character}. Available: {list(CHARACTER_PRESETS.keys())}"
)
character = CHARACTER_PRESETS[request.character]
full_request = TTSRequest(
text=request.text,
text_lang=request.lang,
refer_wav_path=character["refer_wav_path"],
prompt_text=character["prompt_text"],
top_k=5,
top_p=1.0,
temperature=1.0,
format="wav",
streaming=False
)
result = await proxy.forward_request("/", full_request.dict())
# キャラクター情報を結果に追加
if isinstance(result, dict):
result["character_info"] = {
"name": character["name"],
"personality": character["personality"]
}
return result
@app.get("/characters", summary="List Available Characters")
async def list_characters():
"""List all available character voices with descriptions."""
characters = {}
for key, char in CHARACTER_PRESETS.items():
characters[key] = {
"name": char["name"],
"description": char["description"],
"personality": char["personality"]
}
return {
"characters": characters,
"total_count": len(characters)
}
@app.get("/presets", summary="List Available Voice Presets")
async def list_voice_presets():
"""List all available voice presets with their configurations."""
return {
"presets": list(CHARACTER_PRESETS.keys()),
"configurations": CHARACTER_PRESETS
}
@app.get("/health", summary="Health Check")
async def health_check():
"""Check if the GPT-SoVITS API server is accessible"""
try:
response = await proxy.client.get(f"{proxy.base_url}/docs")
return {
"status": "healthy",
"gpt_sovits_api": "accessible",
"response_code": response.status_code
}
except Exception as e:
return {
"status": "unhealthy",
"gpt_sovits_api": "inaccessible",
"error": str(e)
}
@app.on_event("shutdown")
async def shutdown_event():
"""Clean up resources on shutdown"""
await proxy.client.aclose()
# Mount MCP server - this creates the /mcp SSE endpoint
mcp.mount()
def main():
"""Main entry point"""
import argparse
parser = argparse.ArgumentParser(description="GPT-SoVITS FastAPI-MCP Server")
parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
parser.add_argument("--port", type=int, default=8000, help="Port to bind to")
parser.add_argument("--gpt-sovits-url", default="http://localhost:9880",
help="GPT-SoVITS API base URL")
parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
args = parser.parse_args()
# Update proxy base URL
proxy.base_url = args.gpt_sovits_url
print(f"Starting GPT-SoVITS FastAPI-MCP Server...")
print(f"FastAPI server: http://{args.host}:{args.port}")
print(f"API docs: http://{args.host}:{args.port}/docs")
print(f"MCP endpoint: http://{args.host}:{args.port}/mcp")
print(f"GPT-SoVITS API: {args.gpt_sovits_url}")
uvicorn.run(
"fastapi_mcp_server:app",
host=args.host,
port=args.port,
reload=args.reload
)
if __name__ == "__main__":
main()