MCP Server Whisper

transcription_tools.py•5.29 KiB

"""MCP tools for transcription operations.""" from typing import Literal, Optional from openai.types import AudioModel, AudioResponseFormat from ..config import check_and_get_audio_path from ..constants import ENHANCEMENT_PROMPTS, AudioChatModel, EnhancementType from ..infrastructure import FileSystemRepository, MCPServer, OpenAIClientWrapper, SecurePathResolver from ..models import ChatResult, TranscriptionResult from ..services import TranscriptionService def create_transcription_tools(mcp: MCPServer) -> None: """Register transcription tools with the MCP server. Args: ---- mcp: FastMCP server instance. """ # Initialize services audio_path = check_and_get_audio_path() file_repo = FileSystemRepository(audio_path) openai_client = OpenAIClientWrapper() path_resolver = SecurePathResolver(audio_path) transcription_service = TranscriptionService(file_repo, openai_client, path_resolver) @mcp.tool( description=( "A tool used to transcribe audio files. " "It is recommended to use `gpt-4o-mini-transcribe` by default. " "If the user wants maximum performance, use `gpt-4o-transcribe`. " "Rarely should you use `whisper-1` as it is least performant, but it is available if needed. " "You can use prompts to guide the transcription process based on the users preference." ) ) async def transcribe_audio( input_file_name: str, model: AudioModel = "gpt-4o-mini-transcribe", response_format: AudioResponseFormat = "text", prompt: Optional[str] = None, timestamp_granularities: Optional[list[Literal["word", "segment"]]] = None, ) -> TranscriptionResult: """Transcribe audio using OpenAI's transcribe API. Args: input_file_name: Name of the input audio file to process model: The transcription model to use response_format: The response format (text, json, verbose_json, etc.) prompt: Optional prompt to guide the transcription timestamp_granularities: Optional timestamp granularities (word, segment) Returns: ------- TranscriptionResult with transcribed text and metadata """ return await transcription_service.transcribe_audio( filename=input_file_name, model=model, response_format=response_format, prompt=prompt, timestamp_granularities=timestamp_granularities, ) @mcp.tool( description=( "A tool used to chat with audio files. The response will be a response to the audio file sent. " "It is recommended to use `gpt-4o-audio-preview` by default for best results. " "Note: `gpt-4o-mini-audio-preview` has limitations with audio chat and may not process audio correctly." ) ) async def chat_with_audio( input_file_name: str, model: AudioChatModel = "gpt-4o-audio-preview", system_prompt: Optional[str] = None, user_prompt: Optional[str] = None, ) -> ChatResult: """Chat with audio using GPT-4o audio models. Args: input_file_name: Name of the input audio file to process model: The audio LLM model to use for transcription system_prompt: Optional system prompt user_prompt: Optional user prompt Returns: ------- ChatResult with the response text """ return await transcription_service.chat_with_audio( filename=input_file_name, model=model, system_prompt=system_prompt, user_prompt=user_prompt, ) @mcp.tool() async def transcribe_with_enhancement( input_file_name: str, enhancement_type: EnhancementType = "detailed", model: AudioModel = "gpt-4o-mini-transcribe", response_format: AudioResponseFormat = "text", timestamp_granularities: Optional[list[Literal["word", "segment"]]] = None, ) -> TranscriptionResult: """Transcribe audio with GPT-4 using specific enhancement prompts. Enhancement types: - detailed: Provides detailed description including tone, emotion, and background - storytelling: Transforms the transcription into a narrative - professional: Formats the transcription in a formal, business-appropriate way - analytical: Includes analysis of speech patterns, key points, and structure Args: input_file_name: Name of the input audio file to process enhancement_type: Type of enhancement to apply to the transcription model: The transcription model to use response_format: The response format timestamp_granularities: Optional timestamp granularities Returns: ------- TranscriptionResult with enhanced transcription """ # Get the enhancement prompt prompt = ENHANCEMENT_PROMPTS[enhancement_type] # Call transcribe_audio with the enhancement prompt return await transcription_service.transcribe_audio( filename=input_file_name, model=model, response_format=response_format, prompt=prompt, timestamp_granularities=timestamp_granularities, )

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/arcaputo3/mcp-server-whisper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

transcription_tools.py•5.29 KiB