Voice Mode

MIT License

401

Overview InspectNew Endpoints Schema Related Servers Reviews Score

voicemode
voice_mode
tools
transcription

formats.py•4.49 kB

"""Format converters for transcription output.""" import csv import io from typing import Dict, Any, List from .types import TranscriptionResult, OutputFormat def format_timestamp_srt(seconds: float) -> str: """Format timestamp for SRT (HH:MM:SS,mmm)""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 return f"{hours:02d}:{minutes:02d}:{secs:06.3f}".replace(".", ",") def format_timestamp_vtt(seconds: float) -> str: """Format timestamp for WebVTT (HH:MM:SS.mmm)""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 return f"{hours:02d}:{minutes:02d}:{secs:06.3f}" def convert_to_srt(transcription: Dict[str, Any]) -> str: """ Convert transcription to SRT subtitle format. """ srt_lines = [] for i, segment in enumerate(transcription.get("segments", []), 1): start = format_timestamp_srt(segment.get("start", 0)) end = format_timestamp_srt(segment.get("end", 0)) text = segment.get("text", "").strip() # Add speaker if available if "speaker" in segment: text = f"[{segment['speaker']}] {text}" srt_lines.append(str(i)) srt_lines.append(f"{start} --> {end}") srt_lines.append(text) srt_lines.append("") return "\n".join(srt_lines) def convert_to_vtt(transcription: Dict[str, Any]) -> str: """ Convert transcription to WebVTT format. """ vtt_lines = ["WEBVTT", ""] for segment in transcription.get("segments", []): start = format_timestamp_vtt(segment.get("start", 0)) end = format_timestamp_vtt(segment.get("end", 0)) text = segment.get("text", "").strip() # Add speaker if available if "speaker" in segment: text = f"<v {segment['speaker']}>{text}" vtt_lines.append(f"{start} --> {end}") vtt_lines.append(text) vtt_lines.append("") return "\n".join(vtt_lines) def convert_to_csv(transcription: Dict[str, Any]) -> str: """ Convert transcription to CSV format with word-level data. """ output = io.StringIO() # Determine columns based on available data has_words = "words" in transcription and transcription["words"] has_speakers = any("speaker" in w for w in transcription.get("words", [])) has_probability = any("probability" in w for w in transcription.get("words", [])) # Write header if has_words: headers = ["word", "start", "end"] if has_speakers: headers.append("speaker") if has_probability: headers.append("probability") else: headers = ["text", "start", "end"] if has_speakers: headers.append("speaker") writer = csv.DictWriter(output, fieldnames=headers) writer.writeheader() # Write data if has_words: for word in transcription.get("words", []): row = { "word": word.get("word", ""), "start": word.get("start", 0), "end": word.get("end", 0) } if has_speakers: row["speaker"] = word.get("speaker", "") if has_probability: row["probability"] = word.get("probability", "") writer.writerow(row) else: for segment in transcription.get("segments", []): row = { "text": segment.get("text", "").strip(), "start": segment.get("start", 0), "end": segment.get("end", 0) } if has_speakers: row["speaker"] = segment.get("speaker", "") writer.writerow(row) return output.getvalue() def convert_to_format(transcription: TranscriptionResult, format: OutputFormat) -> str: """ Convert transcription to specified format. Args: transcription: The transcription result format: Target output format Returns: Formatted string representation """ if format == OutputFormat.SRT: return convert_to_srt(transcription) elif format == OutputFormat.VTT: return convert_to_vtt(transcription) elif format == OutputFormat.CSV: return convert_to_csv(transcription) else: # Default to JSON (handled elsewhere) import json return json.dumps(transcription, indent=2)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mbailey/voicemode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server