Skip to main content
Glama

Voice Mode

by mbailey
model_benchmark.py•5.79 kB
"""MCP tool for benchmarking Whisper models.""" from typing import Union, List, Dict, Any, Optional from voice_mode.tools.whisper.models import ( get_installed_whisper_models, benchmark_whisper_model, is_whisper_model_installed, WHISPER_MODEL_REGISTRY ) async def whisper_model_benchmark( models: Union[str, List[str]] = "installed", sample_file: Optional[str] = None, runs: int = 1 ) -> Dict[str, Any]: """Benchmark Whisper model performance. Args: models: 'installed' (default), 'all', specific model name, or list of models sample_file: Optional audio file for testing (uses default JFK sample if None) runs: Number of benchmark runs per model (default: 1) Returns: Dict with benchmark results and recommendations """ # Determine which models to benchmark if models == "installed": model_list = get_installed_whisper_models() if not model_list: return { "success": False, "error": "No Whisper models are installed. Install models first with whisper_model_install()" } elif models == "all": # Only benchmark installed models from the full list all_models = list(WHISPER_MODEL_REGISTRY.keys()) model_list = [m for m in all_models if is_whisper_model_installed(m)] if not model_list: return { "success": False, "error": "No Whisper models are installed" } elif isinstance(models, str): # Single model specified if not is_whisper_model_installed(models): return { "success": False, "error": f"Model {models} is not installed" } model_list = [models] elif isinstance(models, list): # List of models specified model_list = [] for model in models: if is_whisper_model_installed(model): model_list.append(model) else: # Model not installed, skip silently or could use logger.warning pass if not model_list: return { "success": False, "error": "None of the specified models are installed" } else: return { "success": False, "error": f"Invalid models parameter: {models}" } # Run benchmarks results = [] failed = [] for model in model_list: best_result = None for run_num in range(runs): result = benchmark_whisper_model(model, sample_file) if result.get("success"): # Keep the best (fastest) result from multiple runs if best_result is None or result["total_time_ms"] < best_result["total_time_ms"]: best_result = result else: # If any run fails, record the failure if model not in failed: failed.append(model) results.append({ "model": model, "success": False, "error": result.get("error", "Benchmark failed") }) break if best_result: results.append(best_result) if not results: return { "success": False, "error": "No benchmarks completed successfully" } # Find successful results for analysis successful_results = [r for r in results if r.get("success")] if successful_results: # Find fastest model fastest = min(successful_results, key=lambda x: x["total_time_ms"]) # Generate recommendations based on results recommendations = [] # Categorize by speed for result in successful_results: rtf = result.get("real_time_factor", 0) if rtf > 20: category = "Ultra-fast (good for real-time)" elif rtf > 5: category = "Fast (good for interactive use)" elif rtf > 1: category = "Moderate (good balance)" else: category = "Slow (best accuracy)" result["category"] = category # Generate specific recommendations if fastest["real_time_factor"] > 10: recommendations.append(f"Use {fastest['model']} for real-time applications") # Find best balance (medium or base if available) balance_models = [r for r in successful_results if r["model"] in ["base", "medium"]] if balance_models: best_balance = min(balance_models, key=lambda x: x["total_time_ms"]) recommendations.append(f"Use {best_balance['model']} for balanced speed/accuracy") # Recommend large models for accuracy large_models = [r for r in successful_results if "large" in r["model"]] if large_models: best_large = min(large_models, key=lambda x: x["total_time_ms"]) recommendations.append(f"Use {best_large['model']} for best accuracy") else: fastest = None recommendations = ["Unable to generate recommendations - no successful benchmarks"] return { "success": True, "benchmarks": results, "models_tested": len(model_list), "models_failed": len(failed), "fastest_model": fastest["model"] if fastest else None, "fastest_time_ms": fastest["total_time_ms"] if fastest else None, "recommendations": recommendations, "sample_file": sample_file or "default JFK sample", "runs_per_model": runs }

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/mbailey/voicemode'

If you have feedback or need assistance with the MCP directory API, please join our Discord server