GPT-SoVITS MCP Server

fastapi_mcp_server.py•12.9 KiB

#!/usr/bin/env python3 """ GPT-SoVITS FastAPI-MCP Server Uses FastAPI-MCP to automatically generate MCP tools from GPT-SoVITS API """ from fastapi import FastAPI, HTTPException from fastapi_mcp import FastApiMCP import httpx import uvicorn from typing import Dict, Any, Optional from pydantic import BaseModel import json import os import platform from pathlib import Path # Pydantic models for API requests class TTSRequest(BaseModel): text: str text_lang: str = "zh" refer_wav_path: str = "" prompt_text: str = "" top_k: int = 5 top_p: float = 1.0 temperature: float = 1.0 format: str = "wav" streaming: bool = False class ModelRequest(BaseModel): gpt_model_path: str sovits_model_path: str class ReferenceRequest(BaseModel): refer_wav_path: str prompt_text: str prompt_lang: str = "zh" class ControlRequest(BaseModel): command: str class SimpleTTSRequest(BaseModel): text: str lang: str = "ja" class PresetTTSRequest(BaseModel): text: str voice_preset: str = "default" lang: str = "ja" class CharacterTTSRequest(BaseModel): text: str character: str = "default" lang: str = "ja" emotion: Optional[str] = None # 将来的な感情制御用 class GPTSoVITSProxy: """Proxy class to handle GPT-SoVITS API communication""" def __init__(self, base_url: str = "http://localhost:9880"): self.base_url = base_url self.client = httpx.AsyncClient(timeout=30.0) async def forward_request(self, endpoint: str, data: Dict[str, Any]) -> Dict[str, Any]: """Forward request to GPT-SoVITS API""" try: url = f"{self.base_url}{endpoint}" response = await self.client.post(url, json=data) response.raise_for_status() # Handle different response types content_type = response.headers.get("content-type", "") if "audio/" in content_type: return { "status": "success", "message": "Audio generated successfully", "content_type": content_type, "audio_size": len(response.content) } else: try: return response.json() except: return { "status": "success", "message": "Request completed successfully" } except httpx.HTTPError as e: raise HTTPException(status_code=500, detail=f"API request failed: {str(e)}") except Exception as e: raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}") # Create FastAPI app app = FastAPI( title="GPT-SoVITS MCP API", description="MCP-enabled API proxy for GPT-SoVITS text-to-speech synthesis", version="1.0.0" ) # Cross-platform path configuration def get_voices_base_path(): """Get the base path for voice files, supporting environment variables and cross-platform paths""" # Check environment variable first voices_path = os.getenv("SOVITS_VOICES_PATH") if voices_path: return Path(voices_path) # Default paths based on platform if platform.system() == "Windows": return Path("voices/characters") else: return Path("voices/characters") def get_voice_file_path(filename: str) -> str: """Get cross-platform voice file path""" if not filename: return "" base_path = get_voices_base_path() full_path = base_path / filename return str(full_path) # Character voice presets configuration CHARACTER_PRESETS = { "default": { "name": "デフォルト", "description": "システムデフォルトの音声", "refer_wav_path": "", "prompt_text": "", "prompt_lang": "ja", "personality": "neutral" }, "yuki": { "name": "雪菜（ゆきな）", "description": "明るく元気な女性の声。アニメキャラクター風", "refer_wav_path": get_voice_file_path("yuki.wav"), "prompt_text": "おはよう！今日も一緒に頑張ろうね！", "prompt_lang": "ja", "personality": "cheerful" }, "hiroshi": { "name": "博（ひろし）", "description": "落ち着いた大人の男性の声。ビジネス向け", "refer_wav_path": get_voice_file_path("hiroshi.wav"), "prompt_text": "お疲れ様です。会議の資料を確認いたします", "prompt_lang": "ja", "personality": "professional" }, "akane": { "name": "茜（あかね）", "description": "優しく丁寧な女性の声。接客・案内向け", "refer_wav_path": get_voice_file_path("akane.wav"), "prompt_text": "いらっしゃいませ。何かお手伝いできることはございますか", "prompt_lang": "ja", "personality": "polite" }, "takeshi": { "name": "武（たけし）", "description": "力強く頼もしい男性の声。アナウンス向け", "refer_wav_path": get_voice_file_path("takeshi.wav"), "prompt_text": "こちらは緊急放送です。落ち着いて行動してください", "prompt_lang": "ja", "personality": "authoritative" }, "miku": { "name": "美紅（みく）", "description": "可愛らしい少女の声。エンターテイメント向け", "refer_wav_path": get_voice_file_path("miku.wav"), "prompt_text": "みんなー！一緒に歌おうよ〜♪", "prompt_lang": "ja", "personality": "cute" }, "sensei": { "name": "先生", "description": "知的で教育的な中性的な声。学習・説明向け", "refer_wav_path": get_voice_file_path("sensei.wav"), "prompt_text": "それでは今日の授業を始めましょう。よろしくお願いします", "prompt_lang": "ja", "personality": "educational" } } # Initialize proxy proxy = GPTSoVITSProxy() # Create FastAPI-MCP instance mcp = FastApiMCP(app) @app.post("/tts", summary="Text-to-Speech Synthesis") async def text_to_speech(request: TTSRequest) -> Dict[str, Any]: """ Generate speech from text using GPT-SoVITS. This endpoint synthesizes speech from input text with optional voice cloning using reference audio and prompt text. """ return await proxy.forward_request("/", request.dict()) @app.post("/set_model", summary="Set Model Weights") async def set_model(request: ModelRequest) -> Dict[str, Any]: """ Set GPT and SoVITS model weights. Updates the model weights used for text-to-speech synthesis. """ return await proxy.forward_request("/set_model", request.dict()) @app.post("/set_reference", summary="Set Reference Audio") async def set_reference(request: ReferenceRequest) -> Dict[str, Any]: """ Set default reference audio and prompt text for voice cloning. This sets the default voice characteristics that will be used for synthesis. """ return await proxy.forward_request("/change_refer", request.dict()) @app.post("/control", summary="Server Control") async def control_server(request: ControlRequest) -> Dict[str, Any]: """ Control GPT-SoVITS server operations. Allows restarting or shutting down the GPT-SoVITS server. """ if request.command not in ["restart", "shutdown"]: raise HTTPException(status_code=400, detail="Invalid command. Use 'restart' or 'shutdown'") return await proxy.forward_request("/control", request.dict()) @app.post("/tts_simple", summary="Simple Text-to-Speech") async def text_to_speech_simple(request: SimpleTTSRequest) -> Dict[str, Any]: """ Simple text-to-speech synthesis with minimal parameters. Uses default settings for quick voice generation. """ full_request = TTSRequest( text=request.text, text_lang=request.lang, refer_wav_path="", prompt_text="", top_k=5, top_p=1.0, temperature=1.0, format="wav", streaming=False ) return await proxy.forward_request("/", full_request.dict()) @app.post("/tts_preset", summary="Text-to-Speech with Voice Presets") async def text_to_speech_preset(request: PresetTTSRequest) -> Dict[str, Any]: """ Generate speech using predefined voice presets. Available presets: default, announcer, assistant """ if request.voice_preset not in CHARACTER_PRESETS: raise HTTPException( status_code=400, detail=f"Unknown voice preset: {request.voice_preset}. Available: {list(CHARACTER_PRESETS.keys())}" ) preset = CHARACTER_PRESETS[request.voice_preset] full_request = TTSRequest( text=request.text, text_lang=request.lang, refer_wav_path=preset["refer_wav_path"], prompt_text=preset["prompt_text"], top_k=5, top_p=1.0, temperature=1.0, format="wav", streaming=False ) return await proxy.forward_request("/", full_request.dict()) @app.post("/tts_character", summary="Text-to-Speech with Character Voices") async def text_to_speech_character(request: CharacterTTSRequest) -> Dict[str, Any]: """ Generate speech using predefined character voices. Available characters: yuki (元気), hiroshi (ビジネス), akane (丁寧), takeshi (力強い), miku (可愛い), sensei (教育的) """ if request.character not in CHARACTER_PRESETS: raise HTTPException( status_code=400, detail=f"Unknown character: {request.character}. Available: {list(CHARACTER_PRESETS.keys())}" ) character = CHARACTER_PRESETS[request.character] full_request = TTSRequest( text=request.text, text_lang=request.lang, refer_wav_path=character["refer_wav_path"], prompt_text=character["prompt_text"], top_k=5, top_p=1.0, temperature=1.0, format="wav", streaming=False ) result = await proxy.forward_request("/", full_request.dict()) # キャラクター情報を結果に追加 if isinstance(result, dict): result["character_info"] = { "name": character["name"], "personality": character["personality"] } return result @app.get("/characters", summary="List Available Characters") async def list_characters(): """List all available character voices with descriptions.""" characters = {} for key, char in CHARACTER_PRESETS.items(): characters[key] = { "name": char["name"], "description": char["description"], "personality": char["personality"] } return { "characters": characters, "total_count": len(characters) } @app.get("/presets", summary="List Available Voice Presets") async def list_voice_presets(): """List all available voice presets with their configurations.""" return { "presets": list(CHARACTER_PRESETS.keys()), "configurations": CHARACTER_PRESETS } @app.get("/health", summary="Health Check") async def health_check(): """Check if the GPT-SoVITS API server is accessible""" try: response = await proxy.client.get(f"{proxy.base_url}/docs") return { "status": "healthy", "gpt_sovits_api": "accessible", "response_code": response.status_code } except Exception as e: return { "status": "unhealthy", "gpt_sovits_api": "inaccessible", "error": str(e) } @app.on_event("shutdown") async def shutdown_event(): """Clean up resources on shutdown""" await proxy.client.aclose() # Mount MCP server - this creates the /mcp SSE endpoint mcp.mount() def main(): """Main entry point""" import argparse parser = argparse.ArgumentParser(description="GPT-SoVITS FastAPI-MCP Server") parser.add_argument("--host", default="0.0.0.0", help="Host to bind to") parser.add_argument("--port", type=int, default=8000, help="Port to bind to") parser.add_argument("--gpt-sovits-url", default="http://localhost:9880", help="GPT-SoVITS API base URL") parser.add_argument("--reload", action="store_true", help="Enable auto-reload") args = parser.parse_args() # Update proxy base URL proxy.base_url = args.gpt_sovits_url print(f"Starting GPT-SoVITS FastAPI-MCP Server...") print(f"FastAPI server: http://{args.host}:{args.port}") print(f"API docs: http://{args.host}:{args.port}/docs") print(f"MCP endpoint: http://{args.host}:{args.port}/mcp") print(f"GPT-SoVITS API: {args.gpt_sovits_url}") uvicorn.run( "fastapi_mcp_server:app", host=args.host, port=args.port, reload=args.reload ) if __name__ == "__main__": main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ganpare/gpt-sovits-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

fastapi_mcp_server.py•12.9 KiB