Whissle MCP Server

Overview Schema Related Servers Score Discussions

whissle-mcp
whissle_mcp

server.py•23 KiB

"""
Whissle MCP Server

⚠️ IMPORTANT: This server provides access to Whissle API endpoints which may incur costs.
Each tool that makes an API call is marked with a cost warning. Please follow these guidelines:

1. Only use tools when explicitly requested by the user
2. For tools that process audio, consider the length of the audio as it affects costs
3. Some operations like translation or summarization may have higher costs

Tools without cost warnings in their description are free to use as they only read existing data.
"""

import os
import time
import logging
from pathlib import Path
from typing import List, Optional, Dict
from dotenv import load_dotenv
from mcp.server.fastmcp import FastMCP
from mcp.types import TextContent
from whissle import WhissleClient
from whissle_mcp.utils import (
    make_error,
    make_output_path,
    make_output_file,
    handle_input_file,
)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("whissle_mcp")

load_dotenv()
auth_token = os.getenv("WHISSLE_AUTH_TOKEN")
base_path = os.getenv("WHISSLE_MCP_BASE_PATH")

if not auth_token:
    raise ValueError("WHISSLE_AUTH_TOKEN environment variable is required")

try:
    client = WhissleClient(auth_token=auth_token).sync_client
    logger.info("Whissle client initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize Whissle client: {str(e)}")
    raise ValueError(f"Failed to initialize Whissle client: {str(e)}")

mcp = FastMCP("Whissle")


def handle_api_error(error_msg, operation_name, retry_count=0, max_retries=2):
    """Helper function to handle API errors with retries and better error messages"""
    logger.error(f"API error during {operation_name}: {error_msg}")
    
    if "HTTP 500" in error_msg:
        if retry_count < max_retries:
            # Exponential backoff: 2, 4, 8 seconds
            wait_time = 2 ** (retry_count + 1)
            logger.info(f"HTTP 500 error during {operation_name}. Retrying in {wait_time} seconds... (Attempt {retry_count+1}/{max_retries+1})")
            time.sleep(wait_time)
            return None  # Signal to retry
        else:
            # Provide more detailed error message for upload issues
            if "uploading file" in error_msg.lower():
                return make_error(
                    f"Server error during {operation_name}. The file upload to the Whissle API failed. "
                    f"This could be due to:\n"
                    f"1. Temporary server issues\n"
                    f"2. File format compatibility issues\n"
                    f"3. Network connectivity problems\n"
                    f"Please try again later or contact Whissle support. Error: {error_msg}"
                )
            else:
                return make_error(
                    f"Server error during {operation_name}. This might be a temporary issue with the Whissle API. "
                    f"Please try again later or contact Whissle support. Error: {error_msg}"
                )
    elif "HTTP 413" in error_msg:
        return make_error(f"File too large. Please try a smaller file. Error: {error_msg}")
    elif "HTTP 415" in error_msg:
        return make_error(f"Unsupported file format. Please use a supported format. Error: {error_msg}")
    elif "HTTP 401" in error_msg or "HTTP 403" in error_msg:
        return make_error(f"Authentication error. Please check your API token. Error: {error_msg}")
    else:
        return make_error(f"API error during {operation_name}: {error_msg}")


@mcp.tool(
    description="""Convert speech to text with a given model and save the output text file to a given directory.
    Directory is optional, if not provided, the output file will be saved to $HOME/Desktop.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        audio_file_path (str): Path to the audio file to transcribe
        model_name (str, optional): The name of the ASR model to use. Defaults to "en-NER"
        timestamps (bool, optional): Whether to include word timestamps
        boosted_lm_words (List[str], optional): Words to boost in recognition
        boosted_lm_score (int, optional): Score for boosted words (0-100)
        output_directory (str, optional): Directory where files should be saved.
            Defaults to $HOME/Desktop if not provided.

    Returns:
        TextContent with the transcription and path to the output file.
    """
)
def speech_to_text(audio_file_path: str, model_name: str = "en-NER", timestamps: bool = True, boosted_lm_words: List[str] = None, boosted_lm_score: int = 80) -> Dict:
    """Convert speech to text using Whissle API"""
    try:
        # Check if file exists
        if not os.path.exists(audio_file_path):
            logger.error(f"Audio file not found: {audio_file_path}")
            return {"error": f"Audio file not found: {audio_file_path}"}
        
        # Check file size
        file_size = os.path.getsize(audio_file_path)
        if file_size == 0:
            logger.error(f"Audio file is empty: {audio_file_path}")
            return {"error": f"Audio file is empty: {audio_file_path}"}
        
        # Check file format
        file_ext = os.path.splitext(audio_file_path)[1].lower()
        if file_ext not in ['.wav', '.mp3', '.ogg', '.flac', '.m4a']:
            logger.error(f"Unsupported audio format: {file_ext}")
            return {"error": f"Unsupported audio format: {file_ext}. Supported formats: wav, mp3, ogg, flac, m4a"}
        
        # Check file size limits
        max_size_mb = 25
        if file_size > max_size_mb * 1024 * 1024:
            logger.error(f"File too large: {file_size / (1024*1024):.2f} MB")
            return {"error": f"File too large ({file_size / (1024*1024):.2f} MB). Maximum size is {max_size_mb} MB."}
        
        # Log the request details
        logger.info(f"Transcribing audio file: {audio_file_path}")
        logger.info(f"File size: {file_size / (1024*1024):.2f} MB")
        logger.info(f"File format: {file_ext}")
        
        # Try with a different model if the default one fails
        models_to_try = ["en-NER"]
        last_error = None
        
        for try_model in models_to_try:
            retry_count = 0
            max_retries = 2
            
            while retry_count <= max_retries:
                try:
                    logger.info(f"Attempting transcription with model: {try_model} (Attempt {retry_count+1}/{max_retries+1})")
                    response = client.speech_to_text(
                        audio_file_path=audio_file_path,
                        model_name=try_model,
                        timestamps=timestamps,
                        boosted_lm_words=boosted_lm_words,
                        boosted_lm_score=boosted_lm_score
                    )
                    
                    if response and hasattr(response, 'transcript'):
                        logger.info(f"Transcription successful with model: {try_model}")
                        
                        result = {
                            "transcript": response.transcript,
                            "duration_seconds": getattr(response, 'duration_seconds', 0),
                            "language_code": getattr(response, 'language_code', 'en')
                        }
                        
                        if hasattr(response, 'timestamps'):
                            result["timestamps"] = response.timestamps
                        
                        if hasattr(response, 'diarize_output') and response.diarize_output:
                            result["diarize_output"] = response.diarize_output
                        
                        return result
                    else:
                        last_error = "No transcription was returned from the API"
                        logger.error(f"No transcription returned from API with model {try_model}")
                        break
                except Exception as api_error:
                    error_msg = str(api_error)
                    logger.error(f"Error with model {try_model}: {error_msg}")
                    last_error = error_msg
                    
                    error_result = handle_api_error(error_msg, "transcription", retry_count, max_retries)
                    if error_result is not None:
                        if retry_count == max_retries:
                            break
                        else:
                            return {"error": error_result}
                    
                    retry_count += 1
        
        if "HTTP 500" in last_error:
            logger.error(f"All transcription attempts failed with HTTP 500: {last_error}")
            return {"error": f"Server error during transcription. This might be a temporary issue with the Whissle API. Please try again later or contact Whissle support. Error: {last_error}"}
        else:
            logger.error(f"All transcription attempts failed: {last_error}")
            return {"error": f"Failed to transcribe audio: {last_error}"}
            
    except Exception as e:
        logger.error(f"Unexpected error during transcription: {str(e)}")
        return {"error": f"Failed to transcribe audio: {str(e)}"}


@mcp.tool(
    description="""Convert speech to text with speaker diarization and save the output text file to a given directory.
    Directory is optional, if not provided, the output file will be saved to $HOME/Desktop.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        audio_file_path (str): Path to the audio file to transcribe
        model_name (str, optional): The name of the ASR model to use. Defaults to "en-NER"
        max_speakers (int, optional): Maximum number of speakers to identify
        boosted_lm_words (List[str], optional): Words to boost in recognition
        boosted_lm_score (int, optional): Score for boosted words (0-100)
        output_directory (str, optional): Directory where files should be saved.
            Defaults to $HOME/Desktop if not provided.

    Returns:
        TextContent with the diarized transcription and path to the output file.
    """
)
def diarize_speech(audio_file_path: str, model_name: str = "en-NER", max_speakers: int = 2, boosted_lm_words: List[str] = None, boosted_lm_score: int = 80) -> Dict:
    """Diarize speech using Whissle API"""
    try:
        # Check if file exists
        if not os.path.exists(audio_file_path):
            logger.error(f"Audio file not found: {audio_file_path}")
            return {"error": f"Audio file not found: {audio_file_path}"}
        
        # Check file size
        file_size = os.path.getsize(audio_file_path)
        if file_size == 0:
            logger.error(f"Audio file is empty: {audio_file_path}")
            return {"error": f"Audio file is empty: {audio_file_path}"}
        
        # Check file format
        file_ext = os.path.splitext(audio_file_path)[1].lower()
        if file_ext not in ['.wav', '.mp3', '.ogg', '.flac', '.m4a']:
            logger.error(f"Unsupported audio format: {file_ext}")
            return {"error": f"Unsupported audio format: {file_ext}. Supported formats: wav, mp3, ogg, flac, m4a"}
        
        # Check file size limits
        max_size_mb = 25
        if file_size > max_size_mb * 1024 * 1024:
            logger.error(f"File too large: {file_size / (1024*1024):.2f} MB")
            return {"error": f"File too large ({file_size / (1024*1024):.2f} MB). Maximum size is {max_size_mb} MB."}
        
        # Log the request details
        logger.info(f"Diarizing audio file: {audio_file_path}")
        logger.info(f"File size: {file_size / (1024*1024):.2f} MB")
        logger.info(f"File format: {file_ext}")
        
        # Try with a different model if the default one fails
        models_to_try = ["en-NER"]
        last_error = None
        
        for try_model in models_to_try:
            retry_count = 0
            max_retries = 2
            
            while retry_count <= max_retries:
                try:
                    logger.info(f"Attempting diarization with model: {try_model} (Attempt {retry_count+1}/{max_retries+1})")
                    response = client.diarize_stt(
                        audio_file_path=audio_file_path,
                        model_name=try_model,
                        max_speakers=max_speakers,
                        boosted_lm_words=boosted_lm_words,
                        boosted_lm_score=boosted_lm_score
                    )
                    
                    if response and hasattr(response, 'diarize_output') and response.diarize_output:
                        logger.info(f"Diarization successful with model: {try_model}")
                        
                        result = {
                            "transcript": getattr(response, 'transcript', ''),
                            "duration_seconds": getattr(response, 'duration_seconds', 0),
                            "language_code": getattr(response, 'language_code', 'en'),
                            "diarize_output": response.diarize_output
                        }
                        
                        if hasattr(response, 'timestamps'):
                            result["timestamps"] = response.timestamps
                        
                        return result
                    else:
                        last_error = "No diarized transcription was returned from the API"
                        logger.error(f"No diarized transcription returned from API with model {try_model}")
                        break
                except Exception as api_error:
                    error_msg = str(api_error)
                    logger.error(f"Error with model {try_model}: {error_msg}")
                    last_error = error_msg
                    
                    error_result = handle_api_error(error_msg, "diarization", retry_count, max_retries)
                    if error_result is not None:
                        if retry_count == max_retries:
                            break
                        else:
                            return {"error": error_result}
                    
                    retry_count += 1
        
        if "HTTP 500" in last_error:
            logger.error(f"All diarization attempts failed with HTTP 500: {last_error}")
            return {"error": f"Server error during diarization. This might be a temporary issue with the Whissle API. Please try again later or contact Whissle support. Error: {last_error}"}
        else:
            logger.error(f"All diarization attempts failed: {last_error}")
            return {"error": f"Failed to diarize speech: {last_error}"}
            
    except Exception as e:
        logger.error(f"Unexpected error during diarization: {str(e)}")
        return {"error": f"Failed to diarize speech: {str(e)}"}


@mcp.tool(
    description="""Translate text from one language to another.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        text (str): The text to translate
        source_language (str): Source language code (e.g., "en" for English)
        target_language (str): Target language code (e.g., "es" for Spanish)

    Returns:
        TextContent with the translated text.
    """
)
def translate_text(
    text: str,
    source_language: str,
    target_language: str,
) -> TextContent:
    try:
        if not text:
            logger.error("Empty text provided for translation")
            return make_error("Text is required")
        
        # Log the request details
        logger.info(f"Translating text from {source_language} to {target_language}")
        logger.info(f"Text length: {len(text)} characters")
        
        retry_count = 0
        max_retries = 2  # Increased from 1 to 2
        
        while retry_count <= max_retries:
            try:
                logger.info(f"Attempting translation (Attempt {retry_count+1}/{max_retries+1})")
                response = client.machine_translation(
                    text=text,
                    source_language=source_language,
                    target_language=target_language,
                )
                
                if response and response.translated_text:
                    logger.info("Translation successful")
                    return TextContent(
                        type="text",
                        text=f"Translation:\n{response.translated_text}",
                    )
                else:
                    logger.error("No translation was returned from the API")
                    return make_error("No translation was returned from the API")
            except Exception as api_error:
                error_msg = str(api_error)
                logger.error(f"Translation error: {error_msg}")
                
                # Handle API errors with retries
                error_result = handle_api_error(error_msg, "translation", retry_count, max_retries)
                if error_result is not None:  # If we should not retry
                    return error_result  # Return the error message
                
                retry_count += 1
        
        # If we get here, all retries failed
        logger.error(f"All translation attempts failed after {max_retries+1} attempts")
        return make_error(f"Failed to translate text after {max_retries+1} attempts")
    except Exception as e:
        logger.error(f"Unexpected error during translation: {str(e)}")
        return make_error(f"Failed to translate text: {str(e)}")


@mcp.tool(
    description="""Summarize text using an LLM model.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        content (str): The text to summarize
        model_name (str, optional): The LLM model to use. Defaults to "openai"
        instruction (str, optional): Specific instructions for summarization

    Returns:
        TextContent with the summary.
    """
)
def summarize_text(
    content: str,
    model_name: str = "openai",
    instruction: Optional[str] = None,
) -> TextContent:
    try:
        if not content:
            logger.error("Empty content provided for summarization")
            return make_error("Content is required")
        
        # Log the request details
        logger.info(f"Summarizing text using model: {model_name}")
        logger.info(f"Text length: {len(content)} characters")
        
        retry_count = 0
        max_retries = 2  # Increased from 1 to 2
        
        while retry_count <= max_retries:
            try:
                logger.info(f"Attempting summarization (Attempt {retry_count+1}/{max_retries+1})")
                response = client.llm_text_summarizer(
                    content=content,
                    model_name=model_name,
                    instruction=instruction,
                )
                
                if response and response.response:
                    logger.info("Summarization successful")
                    return TextContent(
                        type="text",
                        text=f"Summary:\n{response.response}",
                    )
                else:
                    logger.error("No summary was returned from the API")
                    return make_error("No summary was returned from the API")
            except Exception as api_error:
                error_msg = str(api_error)
                logger.error(f"Summarization error: {error_msg}")
                
                # Handle API errors with retries
                error_result = handle_api_error(error_msg, "summarization", retry_count, max_retries)
                if error_result is not None:  # If we should not retry
                    return error_result  # Return the error message
                
                retry_count += 1
        
        # If we get here, all retries failed
        logger.error(f"All summarization attempts failed after {max_retries+1} attempts")
        return make_error(f"Failed to summarize text after {max_retries+1} attempts")
    except Exception as e:
        logger.error(f"Unexpected error during summarization: {str(e)}")
        return make_error(f"Failed to summarize text: {str(e)}")


@mcp.tool(
    description="List all available ASR models and their capabilities."
)
def list_asr_models() -> TextContent:
    """List all available ASR models.

    Returns:
        TextContent with a formatted list of available models
    """
    try:
        logger.info("Fetching available ASR models...")
        
        retry_count = 0
        max_retries = 2  # Increased from 1 to 2
        
        while retry_count <= max_retries:
            try:
                logger.info(f"Attempting to list models (Attempt {retry_count+1}/{max_retries+1})")
                models = client.list_asr_models()
                
                if not models:
                    logger.error("No models were returned from the API")
                    return make_error("No models were returned from the API")

                # Handle both string and object responses
                if isinstance(models, list):
                    if all(isinstance(model, str) for model in models):
                        # If models is a list of strings
                        model_list = "\n".join(f"Model: {model}" for model in models)
                    else:
                        # If models is a list of objects with name and description
                        model_list = "\n".join(
                            f"Model: {model.name}\nDescription: {model.description}\n"
                            for model in models
                        )
                else:
                    logger.error("Unexpected response format from API")
                    return make_error("Unexpected response format from API")

                logger.info("Successfully retrieved ASR models")
                return TextContent(
                    type="text",
                    text=f"Available ASR Models:\n\n{model_list}",
                )
            except Exception as api_error:
                error_msg = str(api_error)
                logger.error(f"Error listing models: {error_msg}")
                
                # Handle API errors with retries
                error_result = handle_api_error(error_msg, "listing models", retry_count, max_retries)
                if error_result is not None:  # If we should not retry
                    return error_result  # Return the error message
                
                retry_count += 1
        
        # If we get here, all retries failed
        logger.error(f"All attempts to list models failed after {max_retries+1} attempts")
        return make_error(f"Failed to list ASR models after {max_retries+1} attempts")
    except Exception as e:
        logger.error(f"Unexpected error listing ASR models: {str(e)}")
        return make_error(f"Failed to list ASR models: {str(e)}")


def main():
    print("Starting Whissle MCP server")
    """Run the MCP server"""
    mcp.run()


if __name__ == "__main__":
    main()

Loading blob content...

Implementation Reference

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/WhissleAI/whissle-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

server.py•23 KiB

"""
Whissle MCP Server

⚠️ IMPORTANT: This server provides access to Whissle API endpoints which may incur costs.
Each tool that makes an API call is marked with a cost warning. Please follow these guidelines:

1. Only use tools when explicitly requested by the user
2. For tools that process audio, consider the length of the audio as it affects costs
3. Some operations like translation or summarization may have higher costs

Tools without cost warnings in their description are free to use as they only read existing data.
"""

import os
import time
import logging
from pathlib import Path
from typing import List, Optional, Dict
from dotenv import load_dotenv
from mcp.server.fastmcp import FastMCP
from mcp.types import TextContent
from whissle import WhissleClient
from whissle_mcp.utils import (
    make_error,
    make_output_path,
    make_output_file,
    handle_input_file,
)

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("whissle_mcp")

load_dotenv()
auth_token = os.getenv("WHISSLE_AUTH_TOKEN")
base_path = os.getenv("WHISSLE_MCP_BASE_PATH")

if not auth_token:
    raise ValueError("WHISSLE_AUTH_TOKEN environment variable is required")

try:
    client = WhissleClient(auth_token=auth_token).sync_client
    logger.info("Whissle client initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize Whissle client: {str(e)}")
    raise ValueError(f"Failed to initialize Whissle client: {str(e)}")

mcp = FastMCP("Whissle")


def handle_api_error(error_msg, operation_name, retry_count=0, max_retries=2):
    """Helper function to handle API errors with retries and better error messages"""
    logger.error(f"API error during {operation_name}: {error_msg}")
    
    if "HTTP 500" in error_msg:
        if retry_count < max_retries:
            # Exponential backoff: 2, 4, 8 seconds
            wait_time = 2 ** (retry_count + 1)
            logger.info(f"HTTP 500 error during {operation_name}. Retrying in {wait_time} seconds... (Attempt {retry_count+1}/{max_retries+1})")
            time.sleep(wait_time)
            return None  # Signal to retry
        else:
            # Provide more detailed error message for upload issues
            if "uploading file" in error_msg.lower():
                return make_error(
                    f"Server error during {operation_name}. The file upload to the Whissle API failed. "
                    f"This could be due to:\n"
                    f"1. Temporary server issues\n"
                    f"2. File format compatibility issues\n"
                    f"3. Network connectivity problems\n"
                    f"Please try again later or contact Whissle support. Error: {error_msg}"
                )
            else:
                return make_error(
                    f"Server error during {operation_name}. This might be a temporary issue with the Whissle API. "
                    f"Please try again later or contact Whissle support. Error: {error_msg}"
                )
    elif "HTTP 413" in error_msg:
        return make_error(f"File too large. Please try a smaller file. Error: {error_msg}")
    elif "HTTP 415" in error_msg:
        return make_error(f"Unsupported file format. Please use a supported format. Error: {error_msg}")
    elif "HTTP 401" in error_msg or "HTTP 403" in error_msg:
        return make_error(f"Authentication error. Please check your API token. Error: {error_msg}")
    else:
        return make_error(f"API error during {operation_name}: {error_msg}")


@mcp.tool(
    description="""Convert speech to text with a given model and save the output text file to a given directory.
    Directory is optional, if not provided, the output file will be saved to $HOME/Desktop.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        audio_file_path (str): Path to the audio file to transcribe
        model_name (str, optional): The name of the ASR model to use. Defaults to "en-NER"
        timestamps (bool, optional): Whether to include word timestamps
        boosted_lm_words (List[str], optional): Words to boost in recognition
        boosted_lm_score (int, optional): Score for boosted words (0-100)
        output_directory (str, optional): Directory where files should be saved.
            Defaults to $HOME/Desktop if not provided.

    Returns:
        TextContent with the transcription and path to the output file.
    """
)
def speech_to_text(audio_file_path: str, model_name: str = "en-NER", timestamps: bool = True, boosted_lm_words: List[str] = None, boosted_lm_score: int = 80) -> Dict:
    """Convert speech to text using Whissle API"""
    try:
        # Check if file exists
        if not os.path.exists(audio_file_path):
            logger.error(f"Audio file not found: {audio_file_path}")
            return {"error": f"Audio file not found: {audio_file_path}"}
        
        # Check file size
        file_size = os.path.getsize(audio_file_path)
        if file_size == 0:
            logger.error(f"Audio file is empty: {audio_file_path}")
            return {"error": f"Audio file is empty: {audio_file_path}"}
        
        # Check file format
        file_ext = os.path.splitext(audio_file_path)[1].lower()
        if file_ext not in ['.wav', '.mp3', '.ogg', '.flac', '.m4a']:
            logger.error(f"Unsupported audio format: {file_ext}")
            return {"error": f"Unsupported audio format: {file_ext}. Supported formats: wav, mp3, ogg, flac, m4a"}
        
        # Check file size limits
        max_size_mb = 25
        if file_size > max_size_mb * 1024 * 1024:
            logger.error(f"File too large: {file_size / (1024*1024):.2f} MB")
            return {"error": f"File too large ({file_size / (1024*1024):.2f} MB). Maximum size is {max_size_mb} MB."}
        
        # Log the request details
        logger.info(f"Transcribing audio file: {audio_file_path}")
        logger.info(f"File size: {file_size / (1024*1024):.2f} MB")
        logger.info(f"File format: {file_ext}")
        
        # Try with a different model if the default one fails
        models_to_try = ["en-NER"]
        last_error = None
        
        for try_model in models_to_try:
            retry_count = 0
            max_retries = 2
            
            while retry_count <= max_retries:
                try:
                    logger.info(f"Attempting transcription with model: {try_model} (Attempt {retry_count+1}/{max_retries+1})")
                    response = client.speech_to_text(
                        audio_file_path=audio_file_path,
                        model_name=try_model,
                        timestamps=timestamps,
                        boosted_lm_words=boosted_lm_words,
                        boosted_lm_score=boosted_lm_score
                    )
                    
                    if response and hasattr(response, 'transcript'):
                        logger.info(f"Transcription successful with model: {try_model}")
                        
                        result = {
                            "transcript": response.transcript,
                            "duration_seconds": getattr(response, 'duration_seconds', 0),
                            "language_code": getattr(response, 'language_code', 'en')
                        }
                        
                        if hasattr(response, 'timestamps'):
                            result["timestamps"] = response.timestamps
                        
                        if hasattr(response, 'diarize_output') and response.diarize_output:
                            result["diarize_output"] = response.diarize_output
                        
                        return result
                    else:
                        last_error = "No transcription was returned from the API"
                        logger.error(f"No transcription returned from API with model {try_model}")
                        break
                except Exception as api_error:
                    error_msg = str(api_error)
                    logger.error(f"Error with model {try_model}: {error_msg}")
                    last_error = error_msg
                    
                    error_result = handle_api_error(error_msg, "transcription", retry_count, max_retries)
                    if error_result is not None:
                        if retry_count == max_retries:
                            break
                        else:
                            return {"error": error_result}
                    
                    retry_count += 1
        
        if "HTTP 500" in last_error:
            logger.error(f"All transcription attempts failed with HTTP 500: {last_error}")
            return {"error": f"Server error during transcription. This might be a temporary issue with the Whissle API. Please try again later or contact Whissle support. Error: {last_error}"}
        else:
            logger.error(f"All transcription attempts failed: {last_error}")
            return {"error": f"Failed to transcribe audio: {last_error}"}
            
    except Exception as e:
        logger.error(f"Unexpected error during transcription: {str(e)}")
        return {"error": f"Failed to transcribe audio: {str(e)}"}


@mcp.tool(
    description="""Convert speech to text with speaker diarization and save the output text file to a given directory.
    Directory is optional, if not provided, the output file will be saved to $HOME/Desktop.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        audio_file_path (str): Path to the audio file to transcribe
        model_name (str, optional): The name of the ASR model to use. Defaults to "en-NER"
        max_speakers (int, optional): Maximum number of speakers to identify
        boosted_lm_words (List[str], optional): Words to boost in recognition
        boosted_lm_score (int, optional): Score for boosted words (0-100)
        output_directory (str, optional): Directory where files should be saved.
            Defaults to $HOME/Desktop if not provided.

    Returns:
        TextContent with the diarized transcription and path to the output file.
    """
)
def diarize_speech(audio_file_path: str, model_name: str = "en-NER", max_speakers: int = 2, boosted_lm_words: List[str] = None, boosted_lm_score: int = 80) -> Dict:
    """Diarize speech using Whissle API"""
    try:
        # Check if file exists
        if not os.path.exists(audio_file_path):
            logger.error(f"Audio file not found: {audio_file_path}")
            return {"error": f"Audio file not found: {audio_file_path}"}
        
        # Check file size
        file_size = os.path.getsize(audio_file_path)
        if file_size == 0:
            logger.error(f"Audio file is empty: {audio_file_path}")
            return {"error": f"Audio file is empty: {audio_file_path}"}
        
        # Check file format
        file_ext = os.path.splitext(audio_file_path)[1].lower()
        if file_ext not in ['.wav', '.mp3', '.ogg', '.flac', '.m4a']:
            logger.error(f"Unsupported audio format: {file_ext}")
            return {"error": f"Unsupported audio format: {file_ext}. Supported formats: wav, mp3, ogg, flac, m4a"}
        
        # Check file size limits
        max_size_mb = 25
        if file_size > max_size_mb * 1024 * 1024:
            logger.error(f"File too large: {file_size / (1024*1024):.2f} MB")
            return {"error": f"File too large ({file_size / (1024*1024):.2f} MB). Maximum size is {max_size_mb} MB."}
        
        # Log the request details
        logger.info(f"Diarizing audio file: {audio_file_path}")
        logger.info(f"File size: {file_size / (1024*1024):.2f} MB")
        logger.info(f"File format: {file_ext}")
        
        # Try with a different model if the default one fails
        models_to_try = ["en-NER"]
        last_error = None
        
        for try_model in models_to_try:
            retry_count = 0
            max_retries = 2
            
            while retry_count <= max_retries:
                try:
                    logger.info(f"Attempting diarization with model: {try_model} (Attempt {retry_count+1}/{max_retries+1})")
                    response = client.diarize_stt(
                        audio_file_path=audio_file_path,
                        model_name=try_model,
                        max_speakers=max_speakers,
                        boosted_lm_words=boosted_lm_words,
                        boosted_lm_score=boosted_lm_score
                    )
                    
                    if response and hasattr(response, 'diarize_output') and response.diarize_output:
                        logger.info(f"Diarization successful with model: {try_model}")
                        
                        result = {
                            "transcript": getattr(response, 'transcript', ''),
                            "duration_seconds": getattr(response, 'duration_seconds', 0),
                            "language_code": getattr(response, 'language_code', 'en'),
                            "diarize_output": response.diarize_output
                        }
                        
                        if hasattr(response, 'timestamps'):
                            result["timestamps"] = response.timestamps
                        
                        return result
                    else:
                        last_error = "No diarized transcription was returned from the API"
                        logger.error(f"No diarized transcription returned from API with model {try_model}")
                        break
                except Exception as api_error:
                    error_msg = str(api_error)
                    logger.error(f"Error with model {try_model}: {error_msg}")
                    last_error = error_msg
                    
                    error_result = handle_api_error(error_msg, "diarization", retry_count, max_retries)
                    if error_result is not None:
                        if retry_count == max_retries:
                            break
                        else:
                            return {"error": error_result}
                    
                    retry_count += 1
        
        if "HTTP 500" in last_error:
            logger.error(f"All diarization attempts failed with HTTP 500: {last_error}")
            return {"error": f"Server error during diarization. This might be a temporary issue with the Whissle API. Please try again later or contact Whissle support. Error: {last_error}"}
        else:
            logger.error(f"All diarization attempts failed: {last_error}")
            return {"error": f"Failed to diarize speech: {last_error}"}
            
    except Exception as e:
        logger.error(f"Unexpected error during diarization: {str(e)}")
        return {"error": f"Failed to diarize speech: {str(e)}"}


@mcp.tool(
    description="""Translate text from one language to another.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        text (str): The text to translate
        source_language (str): Source language code (e.g., "en" for English)
        target_language (str): Target language code (e.g., "es" for Spanish)

    Returns:
        TextContent with the translated text.
    """
)
def translate_text(
    text: str,
    source_language: str,
    target_language: str,
) -> TextContent:
    try:
        if not text:
            logger.error("Empty text provided for translation")
            return make_error("Text is required")
        
        # Log the request details
        logger.info(f"Translating text from {source_language} to {target_language}")
        logger.info(f"Text length: {len(text)} characters")
        
        retry_count = 0
        max_retries = 2  # Increased from 1 to 2
        
        while retry_count <= max_retries:
            try:
                logger.info(f"Attempting translation (Attempt {retry_count+1}/{max_retries+1})")
                response = client.machine_translation(
                    text=text,
                    source_language=source_language,
                    target_language=target_language,
                )
                
                if response and response.translated_text:
                    logger.info("Translation successful")
                    return TextContent(
                        type="text",
                        text=f"Translation:\n{response.translated_text}",
                    )
                else:
                    logger.error("No translation was returned from the API")
                    return make_error("No translation was returned from the API")
            except Exception as api_error:
                error_msg = str(api_error)
                logger.error(f"Translation error: {error_msg}")
                
                # Handle API errors with retries
                error_result = handle_api_error(error_msg, "translation", retry_count, max_retries)
                if error_result is not None:  # If we should not retry
                    return error_result  # Return the error message
                
                retry_count += 1
        
        # If we get here, all retries failed
        logger.error(f"All translation attempts failed after {max_retries+1} attempts")
        return make_error(f"Failed to translate text after {max_retries+1} attempts")
    except Exception as e:
        logger.error(f"Unexpected error during translation: {str(e)}")
        return make_error(f"Failed to translate text: {str(e)}")


@mcp.tool(
    description="""Summarize text using an LLM model.

    ⚠️ COST WARNING: This tool makes an API call to Whissle which may incur costs. Only use when explicitly requested by the user.

    Args:
        content (str): The text to summarize
        model_name (str, optional): The LLM model to use. Defaults to "openai"
        instruction (str, optional): Specific instructions for summarization

    Returns:
        TextContent with the summary.
    """
)
def summarize_text(
    content: str,
    model_name: str = "openai",
    instruction: Optional[str] = None,
) -> TextContent:
    try:
        if not content:
            logger.error("Empty content provided for summarization")
            return make_error("Content is required")
        
        # Log the request details
        logger.info(f"Summarizing text using model: {model_name}")
        logger.info(f"Text length: {len(content)} characters")
        
        retry_count = 0
        max_retries = 2  # Increased from 1 to 2
        
        while retry_count <= max_retries:
            try:
                logger.info(f"Attempting summarization (Attempt {retry_count+1}/{max_retries+1})")
                response = client.llm_text_summarizer(
                    content=content,
                    model_name=model_name,
                    instruction=instruction,
                )
                
                if response and response.response:
                    logger.info("Summarization successful")
                    return TextContent(
                        type="text",
                        text=f"Summary:\n{response.response}",
                    )
                else:
                    logger.error("No summary was returned from the API")
                    return make_error("No summary was returned from the API")
            except Exception as api_error:
                error_msg = str(api_error)
                logger.error(f"Summarization error: {error_msg}")
                
                # Handle API errors with retries
                error_result = handle_api_error(error_msg, "summarization", retry_count, max_retries)
                if error_result is not None:  # If we should not retry
                    return error_result  # Return the error message
                
                retry_count += 1
        
        # If we get here, all retries failed
        logger.error(f"All summarization attempts failed after {max_retries+1} attempts")
        return make_error(f"Failed to summarize text after {max_retries+1} attempts")
    except Exception as e:
        logger.error(f"Unexpected error during summarization: {str(e)}")
        return make_error(f"Failed to summarize text: {str(e)}")


@mcp.tool(
    description="List all available ASR models and their capabilities."
)
def list_asr_models() -> TextContent:
    """List all available ASR models.

    Returns:
        TextContent with a formatted list of available models
    """
    try:
        logger.info("Fetching available ASR models...")
        
        retry_count = 0
        max_retries = 2  # Increased from 1 to 2
        
        while retry_count <= max_retries:
            try:
                logger.info(f"Attempting to list models (Attempt {retry_count+1}/{max_retries+1})")
                models = client.list_asr_models()
                
                if not models:
                    logger.error("No models were returned from the API")
                    return make_error("No models were returned from the API")

                # Handle both string and object responses
                if isinstance(models, list):
                    if all(isinstance(model, str) for model in models):
                        # If models is a list of strings
                        model_list = "\n".join(f"Model: {model}" for model in models)
                    else:
                        # If models is a list of objects with name and description
                        model_list = "\n".join(
                            f"Model: {model.name}\nDescription: {model.description}\n"
                            for model in models
                        )
                else:
                    logger.error("Unexpected response format from API")
                    return make_error("Unexpected response format from API")

                logger.info("Successfully retrieved ASR models")
                return TextContent(
                    type="text",
                    text=f"Available ASR Models:\n\n{model_list}",
                )
            except Exception as api_error:
                error_msg = str(api_error)
                logger.error(f"Error listing models: {error_msg}")
                
                # Handle API errors with retries
                error_result = handle_api_error(error_msg, "listing models", retry_count, max_retries)
                if error_result is not None:  # If we should not retry
                    return error_result  # Return the error message
                
                retry_count += 1
        
        # If we get here, all retries failed
        logger.error(f"All attempts to list models failed after {max_retries+1} attempts")
        return make_error(f"Failed to list ASR models after {max_retries+1} attempts")
    except Exception as e:
        logger.error(f"Unexpected error listing ASR models: {str(e)}")
        return make_error(f"Failed to list ASR models: {str(e)}")


def main():
    print("Starting Whissle MCP server")
    """Run the MCP server"""
    mcp.run()


if __name__ == "__main__":
    main()