Doubao TTS MCP

doubao-tts-mcp
volcengine_mcp

server.py•6.85 KiB

# server.py
import sys
#print("当前Python路径：", sys.executable)
import os
from mcp.server.fastmcp import FastMCP
from volcengine_mcp.volc_tts import VolcTTS
from dotenv import load_dotenv
from mcp.types import TextContent
from volcengine_mcp.config import get_voice_type, VOICE_LIST
from typing import Optional
import uuid

load_dotenv()

VOLC_APPID = os.getenv("VOLC_APPID")
VOLC_TOKEN = os.getenv("VOLC_TOKEN")
#print("VOLC_APPID:", VOLC_APPID)
#print("VOLC_TOKEN:", VOLC_TOKEN)
tts_client = VolcTTS(VOLC_APPID, VOLC_TOKEN)

mcp = FastMCP("VolcEngineTTS")

#print("准备注册 volcengine_tts 工具")
@mcp.tool(
    description="""
    将文本转为语音并保存为音频文件，支持所有火山引擎TTS参数。
    支持自然语言描述音色，如"青年男声""甜美女声"等。

    参数说明：
        text (str): 要合成的文本内容。
        voice_type (str, 可选): 使用的音色名称，默认为官方女声。
        encoding (str, 可选): 音频编码格式，默认为mp3。
        speed_ratio (float, 可选): 语速，1.0为正常语速。
        uid (str, 可选): 用户ID，默认uid123。
        cluster (str, 可选): 集群，默认volcano_tts。
        emotion (str, 可选): 情感标签。
        enable_emotion (bool, 可选): 是否启用情感。
        emotion_scale (float, 可选): 情感强度。
        rate (int, 可选): 采样率，默认24000。
        bitrate (int, 可选): 比特率，默认160。
        loudness_ratio (float, 可选): 响度，默认1.0。
        explicit_language (str, 可选): 明确语言。
        context_language (str, 可选): 上下文语言。
        silence_duration (float, 可选): 静音时长。
        with_timestamp (bool, 可选): 是否返回时间戳。
        extra_param (dict, 可选): 额外参数。
        output_dir (str, 可选): 音频文件保存目录，优先级高于环境变量 OUTPUT_DIR，默认当前目录。
        output_filename (str, 可选): 自定义音频文件名（无需扩展名），如不指定则自动生成唯一文件名。

    返回：
        TextContent，包含合成成功提示和音频文件路径。

    English:
    Convert text to speech with a given voice and save the output audio file to a given directory.
    Directory is optional, if not provided, the output file will be saved to the current working directory.

    Args:
        text (str): The text to convert to speech.
        voice_type (str, optional): The name of the voice to use.
        encoding (str, optional): Output audio encoding, default is mp3.
        speed_ratio (float, optional): Speed of the generated audio. 1.0 is normal speed.
        uid (str, optional): User ID, default is uid123.
        cluster (str, optional): Cluster, default is volcano_tts.
        emotion (str, optional): Emotion tag.
        enable_emotion (bool, optional): Whether to enable emotion.
        emotion_scale (float, optional): Emotion intensity.
        rate (int, optional): Sampling rate, default is 24000.
        bitrate (int, optional): Bitrate, default is 160.
        loudness_ratio (float, optional): Loudness, default is 1.0.
        explicit_language (str, optional): Explicit language.
        context_language (str, optional): Context language.
        silence_duration (float, optional): Silence duration.
        with_timestamp (bool, optional): Whether to return timestamp.
        extra_param (dict, optional): Extra parameters.
        output_dir (str, optional): Directory to save the output audio file. Priority: argument > env OUTPUT_DIR > current dir.
        output_filename (str, optional): Custom audio file name (no extension needed). If not specified, a unique file name will be generated.

    Returns:
        Text content with the path to the output file and name of the voice used.
    """
)
def volcengine_tts(
    text: str,
    voice_type: Optional[str] = "zh_female_wanqudashu_moon_bigtts",
    encoding: Optional[str] = "mp3",
    speed_ratio: Optional[float] = 1.0,
    uid: Optional[str] = None,
    cluster: Optional[str] = "volcano_tts",
    emotion: Optional[str] = None,
    enable_emotion: Optional[bool] = None,
    emotion_scale: Optional[float] = None,
    rate: Optional[int] = 24000,
    bitrate: Optional[int] = 160,
    loudness_ratio: Optional[float] = 1.0,
    explicit_language: Optional[str] = None,
    context_language: Optional[str] = None,
    silence_duration: Optional[float] = None,
    with_timestamp: Optional[bool] = None,
    extra_param: Optional[dict] = None,
    output_dir: Optional[str] = None,
    output_filename: Optional[str] = None
) -> TextContent:
    """调用火山引擎语音合成，将文本转为语音mp3，返回音频文件路径"""
    # 类型安全：所有 int 类型参数都强制转为 int
    if rate is not None:
        rate = int(rate)
    if bitrate is not None:
        bitrate = int(bitrate)
    # 使用config.py的get_voice_type进行音色解析
    #print("volcengine_tts 被调用")
    #print(f"调用 synthesize, text={text}, voice_type={voice_type}")
    voice_type = get_voice_type(voice_type)
    #print('映射后voice_type:', voice_type)
    if not uid or uid is None:
        uid = str(uuid.uuid4())
    audio_bytes = tts_client.synthesize(
        text,
        voice_type=voice_type,
        encoding=encoding,
        speed_ratio=speed_ratio,
        uid=uid,
        cluster=cluster,
        emotion=emotion,
        enable_emotion=enable_emotion,
        emotion_scale=emotion_scale,
        rate=rate,
        bitrate=bitrate,
        loudness_ratio=loudness_ratio,
        explicit_language=explicit_language,
        context_language=context_language,
        silence_duration=silence_duration,
        with_timestamp=with_timestamp,
        extra_param=extra_param
    )
    # 优先用参数，其次用环境变量，最后用当前目录
    if not output_dir:
        output_dir = os.getenv("OUTPUT_DIR", os.getcwd())
    os.makedirs(output_dir, exist_ok=True)
    import time
    if output_filename:
        filename = output_filename
        if not filename.endswith(f".{encoding}"):
            filename += f".{encoding}"
    else:
        filename = f"volc_tts_{int(time.time())}_{uuid.uuid4().hex[:8]}.{encoding}"
    out_path = os.path.join(output_dir, filename)
    with open(out_path, "wb") as f:
        f.write(audio_bytes)
    return TextContent(
        type="text",
        text=f"合成成功，音频文件路径：{out_path}"
    )

@mcp.tool(
    description="""
    获取所有可用的火山引擎TTS音色列表。
    返回：
        List[dict]，每个元素包含音色的scene、desc、voice_type、lang、emotions、keywords等信息。
    """
)
def list_voice() -> list:
    """返回所有可用音色信息列表"""
    return VOICE_LIST

def main():
    mcp.run()
#print("准备启动 MCP 服务")
if __name__ == "__main__":
    #mcp.run(transport="streamable-http")
    main()
    #print("MCP 服务已启动")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/lxy2109/doubao-tts-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•6.85 KiB

# server.py
import sys
#print("当前Python路径：", sys.executable)
import os
from mcp.server.fastmcp import FastMCP
from volcengine_mcp.volc_tts import VolcTTS
from dotenv import load_dotenv
from mcp.types import TextContent
from volcengine_mcp.config import get_voice_type, VOICE_LIST
from typing import Optional
import uuid

load_dotenv()

VOLC_APPID = os.getenv("VOLC_APPID")
VOLC_TOKEN = os.getenv("VOLC_TOKEN")
#print("VOLC_APPID:", VOLC_APPID)
#print("VOLC_TOKEN:", VOLC_TOKEN)
tts_client = VolcTTS(VOLC_APPID, VOLC_TOKEN)

mcp = FastMCP("VolcEngineTTS")

#print("准备注册 volcengine_tts 工具")
@mcp.tool(
    description="""
    将文本转为语音并保存为音频文件，支持所有火山引擎TTS参数。
    支持自然语言描述音色，如"青年男声""甜美女声"等。

    参数说明：
        text (str): 要合成的文本内容。
        voice_type (str, 可选): 使用的音色名称，默认为官方女声。
        encoding (str, 可选): 音频编码格式，默认为mp3。
        speed_ratio (float, 可选): 语速，1.0为正常语速。
        uid (str, 可选): 用户ID，默认uid123。
        cluster (str, 可选): 集群，默认volcano_tts。
        emotion (str, 可选): 情感标签。
        enable_emotion (bool, 可选): 是否启用情感。
        emotion_scale (float, 可选): 情感强度。
        rate (int, 可选): 采样率，默认24000。
        bitrate (int, 可选): 比特率，默认160。
        loudness_ratio (float, 可选): 响度，默认1.0。
        explicit_language (str, 可选): 明确语言。
        context_language (str, 可选): 上下文语言。
        silence_duration (float, 可选): 静音时长。
        with_timestamp (bool, 可选): 是否返回时间戳。
        extra_param (dict, 可选): 额外参数。
        output_dir (str, 可选): 音频文件保存目录，优先级高于环境变量 OUTPUT_DIR，默认当前目录。
        output_filename (str, 可选): 自定义音频文件名（无需扩展名），如不指定则自动生成唯一文件名。

    返回：
        TextContent，包含合成成功提示和音频文件路径。

    English:
    Convert text to speech with a given voice and save the output audio file to a given directory.
    Directory is optional, if not provided, the output file will be saved to the current working directory.

    Args:
        text (str): The text to convert to speech.
        voice_type (str, optional): The name of the voice to use.
        encoding (str, optional): Output audio encoding, default is mp3.
        speed_ratio (float, optional): Speed of the generated audio. 1.0 is normal speed.
        uid (str, optional): User ID, default is uid123.
        cluster (str, optional): Cluster, default is volcano_tts.
        emotion (str, optional): Emotion tag.
        enable_emotion (bool, optional): Whether to enable emotion.
        emotion_scale (float, optional): Emotion intensity.
        rate (int, optional): Sampling rate, default is 24000.
        bitrate (int, optional): Bitrate, default is 160.
        loudness_ratio (float, optional): Loudness, default is 1.0.
        explicit_language (str, optional): Explicit language.
        context_language (str, optional): Context language.
        silence_duration (float, optional): Silence duration.
        with_timestamp (bool, optional): Whether to return timestamp.
        extra_param (dict, optional): Extra parameters.
        output_dir (str, optional): Directory to save the output audio file. Priority: argument > env OUTPUT_DIR > current dir.
        output_filename (str, optional): Custom audio file name (no extension needed). If not specified, a unique file name will be generated.

    Returns:
        Text content with the path to the output file and name of the voice used.
    """
)
def volcengine_tts(
    text: str,
    voice_type: Optional[str] = "zh_female_wanqudashu_moon_bigtts",
    encoding: Optional[str] = "mp3",
    speed_ratio: Optional[float] = 1.0,
    uid: Optional[str] = None,
    cluster: Optional[str] = "volcano_tts",
    emotion: Optional[str] = None,
    enable_emotion: Optional[bool] = None,
    emotion_scale: Optional[float] = None,
    rate: Optional[int] = 24000,
    bitrate: Optional[int] = 160,
    loudness_ratio: Optional[float] = 1.0,
    explicit_language: Optional[str] = None,
    context_language: Optional[str] = None,
    silence_duration: Optional[float] = None,
    with_timestamp: Optional[bool] = None,
    extra_param: Optional[dict] = None,
    output_dir: Optional[str] = None,
    output_filename: Optional[str] = None
) -> TextContent:
    """调用火山引擎语音合成，将文本转为语音mp3，返回音频文件路径"""
    # 类型安全：所有 int 类型参数都强制转为 int
    if rate is not None:
        rate = int(rate)
    if bitrate is not None:
        bitrate = int(bitrate)
    # 使用config.py的get_voice_type进行音色解析
    #print("volcengine_tts 被调用")
    #print(f"调用 synthesize, text={text}, voice_type={voice_type}")
    voice_type = get_voice_type(voice_type)
    #print('映射后voice_type:', voice_type)
    if not uid or uid is None:
        uid = str(uuid.uuid4())
    audio_bytes = tts_client.synthesize(
        text,
        voice_type=voice_type,
        encoding=encoding,
        speed_ratio=speed_ratio,
        uid=uid,
        cluster=cluster,
        emotion=emotion,
        enable_emotion=enable_emotion,
        emotion_scale=emotion_scale,
        rate=rate,
        bitrate=bitrate,
        loudness_ratio=loudness_ratio,
        explicit_language=explicit_language,
        context_language=context_language,
        silence_duration=silence_duration,
        with_timestamp=with_timestamp,
        extra_param=extra_param
    )
    # 优先用参数，其次用环境变量，最后用当前目录
    if not output_dir:
        output_dir = os.getenv("OUTPUT_DIR", os.getcwd())
    os.makedirs(output_dir, exist_ok=True)
    import time
    if output_filename:
        filename = output_filename
        if not filename.endswith(f".{encoding}"):
            filename += f".{encoding}"
    else:
        filename = f"volc_tts_{int(time.time())}_{uuid.uuid4().hex[:8]}.{encoding}"
    out_path = os.path.join(output_dir, filename)
    with open(out_path, "wb") as f:
        f.write(audio_bytes)
    return TextContent(
        type="text",
        text=f"合成成功，音频文件路径：{out_path}"
    )

@mcp.tool(
    description="""
    获取所有可用的火山引擎TTS音色列表。
    返回：
        List[dict]，每个元素包含音色的scene、desc、voice_type、lang、emotions、keywords等信息。
    """
)
def list_voice() -> list:
    """返回所有可用音色信息列表"""
    return VOICE_LIST

def main():
    mcp.run()
#print("准备启动 MCP 服务")
if __name__ == "__main__":
    #mcp.run(transport="streamable-http")
    main()
    #print("MCP 服务已启动")