speak
Convert text to speech using Windows' built-in Speech API. Control playback, adjust speed, and manage volume for clear audio output directly from your desktop.
Instructions
텍스트를 음성으로 읽어줍니다
Input Schema
TableJSON Schema
| Name | Required | Description | Default |
|---|---|---|---|
| text | Yes |
Implementation Reference
- windows_tts_mcp/main.py:170-202 (handler)The handler function for the 'speak' tool. Decorated with @mcp.tool() which registers it in the MCP server. Splits long text into chunks and spawns a background thread to speak each chunk using the powershell_tts helper.@mcp.tool() def speak(text: str) -> str: """텍스트를 음성으로 읽어줍니다""" try: # 텍스트 분할 text_chunks = split_text_for_tts(text, 500) total_chunks = len(text_chunks) def _speak_thread(): for i, chunk in enumerate(text_chunks, 1): safe_print(f"[TTS] {i}/{total_chunks} 부분 재생 중: {chunk[:50]}...") success = powershell_tts(chunk) if not success: safe_print(f"[RETRY] TTS 재시도: {chunk[:30]}...") # 한 번 더 시도 powershell_tts(chunk) # 각 청크 사이에 짧은 간격 if i < total_chunks: time.sleep(0.5) # 백그라운드에서 실행 thread = threading.Thread(target=_speak_thread, daemon=True) thread.start() if total_chunks > 1: return f"[START] 음성 재생 시작 ({total_chunks}개 부분으로 분할): '{text[:50]}...'" else: return f"[START] 음성 재생 시작: '{text[:50]}...'" except Exception as e: return f"[ERROR] 음성 재생 오류: {str(e)}"
- windows_tts_mcp/main.py:67-130 (helper)Core helper function that runs the actual TTS via PowerShell, invoking System.Speech.Synthesis.SpeechSynthesizer.Speak(). Handles process management, escaping, timeouts, and errors.def powershell_tts(text: str, rate: int = 0, volume: int = 100) -> bool: """PowerShell을 사용한 TTS 실행""" process = None try: if platform.system() != "Windows": safe_print("[ERROR] Windows가 아닙니다") return False # 텍스트에서 작은따옴표 이스케이프 처리 escaped_text = text.replace("'", "''") # PowerShell TTS 명령어 cmd = [ "powershell", "-Command", f"Add-Type -AssemblyName System.Speech; " f"$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer; " f"$synth.Rate = {rate}; " f"$synth.Volume = {volume}; " f"$synth.Speak('{escaped_text}'); " f"$synth.Dispose()" ] # 프로세스 시작 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) # 실행 중인 프로세스 목록에 추가 with process_lock: running_processes.append(process) # 프로세스 완료 대기 stdout, stderr = process.communicate(timeout=180) # 완료된 프로세스 목록에서 제거 with process_lock: if process in running_processes: running_processes.remove(process) if process.returncode == 0: safe_print(f"[SUCCESS] TTS 완료: {text[:30]}...") return True else: safe_print(f"[ERROR] TTS 오류: {stderr}") return False except subprocess.TimeoutExpired: safe_print("[WARNING] TTS 시간 초과") if process: process.kill() with process_lock: if process in running_processes: running_processes.remove(process) return False except Exception as e: safe_print(f"[ERROR] TTS 예외: {e}") if process: try: process.kill() with process_lock: if process in running_processes: running_processes.remove(process) except: pass return False
- windows_tts_mcp/main.py:131-168 (helper)Helper function to intelligently split long text into smaller chunks suitable for TTS, prioritizing sentence boundaries.def split_text_for_tts(text: str, max_length: int = 500) -> list: """텍스트를 TTS용으로 적절히 분할""" if len(text) <= max_length: return [text] # 문장 단위로 분할 시도 import re sentences = re.split(r'[.!?。!?]\s*', text) chunks = [] current_chunk = "" for sentence in sentences: # 문장이 너무 긴 경우 더 작게 분할 if len(sentence) > max_length: # 쉼표나 기타 구두점으로 분할 sub_parts = re.split(r'[,;:\s]\s*', sentence) for part in sub_parts: if len(current_chunk + part) <= max_length: current_chunk += part + " " else: if current_chunk.strip(): chunks.append(current_chunk.strip()) current_chunk = part + " " else: # 현재 청크에 문장을 추가할 수 있는지 확인 if len(current_chunk + sentence) <= max_length: current_chunk += sentence + ". " else: if current_chunk.strip(): chunks.append(current_chunk.strip()) current_chunk = sentence + ". " # 마지막 청크 추가 if current_chunk.strip(): chunks.append(current_chunk.strip()) return chunks