Skip to main content
Glama
transcription_tools.py5.41 kB
"""MCP tools for transcription operations.""" from typing import Literal, Optional from openai.types import AudioModel, AudioResponseFormat from ..config import check_and_get_audio_path from ..constants import ENHANCEMENT_PROMPTS, AudioChatModel, EnhancementType from ..infrastructure import FileSystemRepository, MCPServer, OpenAIClientWrapper, SecurePathResolver from ..models import ChatResult, TranscriptionResult from ..services import TranscriptionService def create_transcription_tools(mcp: MCPServer) -> None: """Register transcription tools with the MCP server. Args: ---- mcp: FastMCP server instance. """ # Initialize services audio_path = check_and_get_audio_path() file_repo = FileSystemRepository(audio_path) openai_client = OpenAIClientWrapper() path_resolver = SecurePathResolver(audio_path) transcription_service = TranscriptionService(file_repo, openai_client, path_resolver) @mcp.tool( description=( "A tool used to transcribe audio files. " "It is recommended to use `gpt-4o-mini-transcribe` by default. " "If the user wants maximum performance, use `gpt-4o-transcribe`. " "Rarely should you use `whisper-1` as it is least performant, but it is available if needed. " "You can use prompts to guide the transcription process based on the users preference." ) ) async def transcribe_audio( input_file_name: str, model: AudioModel = "gpt-4o-mini-transcribe", response_format: AudioResponseFormat = "text", prompt: Optional[str] = None, timestamp_granularities: Optional[list[Literal["word", "segment"]]] = None, ) -> TranscriptionResult: """Transcribe audio using OpenAI's transcribe API. Args: input_file_name: Name of the input audio file to process model: The transcription model to use response_format: The response format (text, json, verbose_json, etc.) prompt: Optional prompt to guide the transcription timestamp_granularities: Optional timestamp granularities (word, segment) Returns: ------- TranscriptionResult with transcribed text and metadata """ return await transcription_service.transcribe_audio( filename=input_file_name, model=model, response_format=response_format, prompt=prompt, timestamp_granularities=timestamp_granularities, ) @mcp.tool( description=( "A tool used to chat with audio files. The response will be a response to the audio file sent. " "It is recommended to use `gpt-4o-audio-preview` by default for best results. " "Note: `gpt-4o-mini-audio-preview` has limitations with audio chat and may not process audio correctly." ) ) async def chat_with_audio( input_file_name: str, model: AudioChatModel = "gpt-4o-audio-preview", system_prompt: Optional[str] = None, user_prompt: Optional[str] = None, ) -> ChatResult: """Chat with audio using GPT-4o audio models. Args: input_file_name: Name of the input audio file to process model: The audio LLM model to use for transcription system_prompt: Optional system prompt user_prompt: Optional user prompt Returns: ------- ChatResult with the response text """ return await transcription_service.chat_with_audio( filename=input_file_name, model=model, system_prompt=system_prompt, user_prompt=user_prompt, ) @mcp.tool() async def transcribe_with_enhancement( input_file_name: str, enhancement_type: EnhancementType = "detailed", model: AudioModel = "gpt-4o-mini-transcribe", response_format: AudioResponseFormat = "text", timestamp_granularities: Optional[list[Literal["word", "segment"]]] = None, ) -> TranscriptionResult: """Transcribe audio with GPT-4 using specific enhancement prompts. Enhancement types: - detailed: Provides detailed description including tone, emotion, and background - storytelling: Transforms the transcription into a narrative - professional: Formats the transcription in a formal, business-appropriate way - analytical: Includes analysis of speech patterns, key points, and structure Args: input_file_name: Name of the input audio file to process enhancement_type: Type of enhancement to apply to the transcription model: The transcription model to use response_format: The response format timestamp_granularities: Optional timestamp granularities Returns: ------- TranscriptionResult with enhanced transcription """ # Get the enhancement prompt prompt = ENHANCEMENT_PROMPTS[enhancement_type] # Call transcribe_audio with the enhancement prompt return await transcription_service.transcribe_audio( filename=input_file_name, model=model, response_format=response_format, prompt=prompt, timestamp_granularities=timestamp_granularities, )

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arcaputo3/mcp-server-whisper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server