Skip to main content
Glama

MCP Search Server

by Nghiauet
artificial_analysis_llm_benchmarks.json243 kB
[ { "name": "o4-mini-(high)", "description": "o4-mini (high)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 46790.0, "tokens_per_second": 130.0 }, "intelligence": { "quality_score": 70.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o4-mini-(high)", "description": "o4-mini (high)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 67780.0, "tokens_per_second": 88.1 }, "intelligence": { "quality_score": 70.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o3", "description": "o3", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 17.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 19940.0, "tokens_per_second": 148.9 }, "intelligence": { "quality_score": 69.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o3", "description": "o3", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 17.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 37170.0, "tokens_per_second": 86.3 }, "intelligence": { "quality_score": 69.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-pro", "description": "Gemini 2.5 Pro", "provider": "Google", "metrics": { "cost": { "blended_cost_per_1m": 3.44, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 44130.0, "tokens_per_second": 147.8 }, "intelligence": { "quality_score": 69.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "DeepSeek", "metrics": { "cost": { "blended_cost_per_1m": 0.96, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 3140.0, "tokens_per_second": 31.9 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "Parasail", "metrics": { "cost": { "blended_cost_per_1m": 2.71, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 120.2 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1050.0, "tokens_per_second": 36.6 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "Nebius", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 670.0, "tokens_per_second": 33.3 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 4.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 253.3 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.92, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 39.7 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 1.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 830.0, "tokens_per_second": 75.4 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(may-'25)", "description": "DeepSeek R1 (May '25)", "provider": "kluster.ai", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 530.0, "tokens_per_second": 36.3 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-pro-(may-'25)-(ai-studio)", "description": "Gemini 2.5 Pro (May '25) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 3.44, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 38340.0, "tokens_per_second": 147.7 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-pro-(may-'25)-vertex", "description": "Gemini 2.5 Pro (May '25) Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 3.44, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 31880.0, "tokens_per_second": 167.4 }, "intelligence": { "quality_score": 68.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-3-mini-reasoning-(high)", "description": "Grok 3 mini Reasoning (high)", "provider": "xAI", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 58.0 }, "intelligence": { "quality_score": 67.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-3-mini-reasoning-(high)-fast", "description": "Grok 3 mini Reasoning (high) Fast", "provider": "xAI Fast", "metrics": { "cost": { "blended_cost_per_1m": 1.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 211.4 }, "intelligence": { "quality_score": 67.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o3-mini-(high)", "description": "o3-mini (high)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 38970.0, "tokens_per_second": 179.0 }, "intelligence": { "quality_score": 66.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o3-mini-(high)", "description": "o3-mini (high)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 43540.0, "tokens_per_second": 160.1 }, "intelligence": { "quality_score": 66.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-flash-(reasoning)-(ai_studio)", "description": "Gemini 2.5 Flash (Reasoning) (AI_Studio)", "provider": "Google (AI_Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.99, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 15770.0, "tokens_per_second": 330.9 }, "intelligence": { "quality_score": 65.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-flash-(reasoning)-(vertex)", "description": "Gemini 2.5 Flash (Reasoning) (Vertex)", "provider": "Google (Vertex)", "metrics": { "cost": { "blended_cost_per_1m": 0.99, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 16120.000000000002, "tokens_per_second": 311.9 }, "intelligence": { "quality_score": 65.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o3-mini", "description": "o3-mini", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 13070.0, "tokens_per_second": 173.3 }, "intelligence": { "quality_score": 63.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o3-mini", "description": "o3-mini", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 13520.0, "tokens_per_second": 201.2 }, "intelligence": { "quality_score": 63.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)-(fp8)", "description": "Qwen3 235B (Reasoning) (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 51.1 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)-base", "description": "Qwen3 235B (Reasoning) Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 600.0, "tokens_per_second": 25.4 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)", "description": "Qwen3 235B (Reasoning)", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 97.5 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)-(fp8)", "description": "Qwen3 235B (Reasoning) (FP8)", "provider": "Deepinfra (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 630.0, "tokens_per_second": 20.4 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)-(fp8)", "description": "Qwen3 235B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 830.0, "tokens_per_second": 20.8 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)-(fp8)", "description": "Qwen3 235B (Reasoning) (FP8)", "provider": "Together.ai (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 26.7 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)-(fp8)", "description": "Qwen3 235B (Reasoning) (FP8)", "provider": "kluster.ai (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.61, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 770.0, "tokens_per_second": 34.0 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b-(reasoning)", "description": "Qwen3 235B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 2.63, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1180.0, "tokens_per_second": 70.2 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o1", "description": "o1", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 26.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 25860.0, "tokens_per_second": 124.1 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o1", "description": "o1", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 26.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 30340.0, "tokens_per_second": 110.4 }, "intelligence": { "quality_score": 62.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-nemotron-ultra-reasoning-base", "description": "Llama Nemotron Ultra Reasoning Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 640.0, "tokens_per_second": 41.8 }, "intelligence": { "quality_score": 61.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-sonnet-thinking", "description": "Claude 4 Sonnet Thinking", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1880.0, "tokens_per_second": 35.7 }, "intelligence": { "quality_score": 61.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-flash-(april-'25)-(reasoning)-(ai_studio)", "description": "Gemini 2.5 Flash (April '25) (Reasoning) (AI_Studio)", "provider": "Google (AI_Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.99, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 7790.0, "tokens_per_second": 380.1 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Lambda Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.95, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 38.9 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1030.0, "tokens_per_second": 97.6 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 2.36, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 360.0, "tokens_per_second": 227.2 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)-base", "description": "DeepSeek R1 (Jan '25) Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 27.8 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)-fast", "description": "DeepSeek R1 (Jan '25) Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 670.0, "tokens_per_second": 83.9 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "CentML", "metrics": { "cost": { "blended_cost_per_1m": 2.99, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 460.0, "tokens_per_second": 82.2 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 2.36, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 80.0 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)-(fast)", "description": "DeepSeek R1 (Jan '25) (Fast)", "provider": "Fireworks (Fast)", "metrics": { "cost": { "blended_cost_per_1m": 4.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 490.0, "tokens_per_second": 239.7 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)-(turbo,-fp4)", "description": "DeepSeek R1 (Jan '25) (Turbo, FP4)", "provider": "Deepinfra (Turbo, FP4)", "metrics": { "cost": { "blended_cost_per_1m": 1.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 181.1 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 115.9 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "FriendliAI", "metrics": { "cost": { "blended_cost_per_1m": 4.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 88.0 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)-turbo", "description": "DeepSeek R1 (Jan '25) Turbo", "provider": "Novita Turbo", "metrics": { "cost": { "blended_cost_per_1m": 1.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 780.0, "tokens_per_second": 31.0 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 4.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 830.0, "tokens_per_second": 32.1 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 5.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 2120.0, "tokens_per_second": 199.3 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 4.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 570.0, "tokens_per_second": 101.1 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-(jan-'25)", "description": "DeepSeek R1 (Jan '25)", "provider": "kluster.ai", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 770.0, "tokens_per_second": 38.3 }, "intelligence": { "quality_score": 60.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)-(fp8)", "description": "Qwen3 32B (Reasoning) (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 460.0, "tokens_per_second": 52.0 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)", "description": "Qwen3 32B (Reasoning)", "provider": "Cerebras", "metrics": { "cost": { "blended_cost_per_1m": 0.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 2341.3 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)-base", "description": "Qwen3 32B (Reasoning) Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 600.0, "tokens_per_second": 45.9 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)-(fp8)", "description": "Qwen3 32B (Reasoning) (FP8)", "provider": "Deepinfra (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 42.8 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)-(fp8)", "description": "Qwen3 32B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 930.0, "tokens_per_second": 39.6 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)", "description": "Qwen3 32B (Reasoning)", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 334.5 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b-(reasoning)", "description": "Qwen3 32B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 2.63, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1120.0, "tokens_per_second": 62.9 }, "intelligence": { "quality_score": 59.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1150.0, "tokens_per_second": 108.8 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b-base", "description": "QwQ-32B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.23, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 57.1 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "CentML", "metrics": { "cost": { "blended_cost_per_1m": 0.65, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 350.0, "tokens_per_second": 91.6 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 178.9 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.16, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 47.7 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.18, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 670.0, "tokens_per_second": 36.2 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.32, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 401.8 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.63, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 418.4 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b", "description": "QwQ-32B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 97.5 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-opus", "description": "Claude 4 Opus", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 3510.0, "tokens_per_second": 18.2 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-opus-vertex", "description": "Claude 4 Opus Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1700.0, "tokens_per_second": 91.7 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-opus", "description": "Claude 4 Opus", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 2540.0, "tokens_per_second": 54.5 }, "intelligence": { "quality_score": 58.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.7-sonnet-thinking", "description": "Claude 3.7 Sonnet Thinking", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1580.0, "tokens_per_second": 55.4 }, "intelligence": { "quality_score": 57.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.7-sonnet-thinking", "description": "Claude 3.7 Sonnet Thinking", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1730.0, "tokens_per_second": 88.3 }, "intelligence": { "quality_score": 57.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-14b-(reasoning)-base", "description": "Qwen3 14B (Reasoning) Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 71.3 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-14b-(reasoning)-(fp8)", "description": "Qwen3 14B (Reasoning) (FP8)", "provider": "Deepinfra (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 74.1 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-14b-(reasoning)-(fp8)", "description": "Qwen3 14B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 39810.0, "tokens_per_second": 56.2 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-14b-(reasoning)", "description": "Qwen3 14B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 1.31, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1040.0, "tokens_per_second": 63.5 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)-(fp8)", "description": "Qwen3 30B A3B (Reasoning) (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 157.2 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)-fast", "description": "Qwen3 30B A3B (Reasoning) Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 138.8 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)-base", "description": "Qwen3 30B A3B (Reasoning) Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 119.4 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)", "description": "Qwen3 30B A3B (Reasoning)", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 490.0, "tokens_per_second": 166.7 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)-(fp8)", "description": "Qwen3 30B A3B (Reasoning) (FP8)", "provider": "Deepinfra (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 90.4 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)-(fp8)", "description": "Qwen3 30B A3B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 610.0, "tokens_per_second": 177.0 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b-(reasoning)", "description": "Qwen3 30B A3B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1040.0, "tokens_per_second": 92.2 }, "intelligence": { "quality_score": 56.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o1-mini", "description": "o1-mini", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 9890.0, "tokens_per_second": 204.9 }, "intelligence": { "quality_score": 54.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o1-mini", "description": "o1-mini", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 1.93, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 9080.0, "tokens_per_second": 269.9 }, "intelligence": { "quality_score": 54.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-flash-(ai_studio)", "description": "Gemini 2.5 Flash (AI_Studio)", "provider": "Google (AI_Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 257.4 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-flash-(vertex)", "description": "Gemini 2.5 Flash (Vertex)", "provider": "Google (Vertex)", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 251.9 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "DeepSeek", "metrics": { "cost": { "blended_cost_per_1m": 0.48, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 3520.0, "tokens_per_second": 25.4 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Replicate", "metrics": { "cost": { "blended_cost_per_1m": 1.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 106.9 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 1.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1150.0, "tokens_per_second": 34.5 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-fast", "description": "DeepSeek V3 Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 670.0, "tokens_per_second": 91.1 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Nebius", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 650.0, "tokens_per_second": 18.2 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "CentML", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 490.0, "tokens_per_second": 28.3 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 73.2 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 570.0, "tokens_per_second": 266.8 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 33.0 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.57, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1010.0, "tokens_per_second": 29.0 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 3.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1790.0, "tokens_per_second": 167.4 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 1.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 98.3 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3", "description": "DeepSeek V3", "provider": "kluster.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 700.0, "tokens_per_second": 26.8 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-sonnet", "description": "Claude 4 Sonnet", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 2180.0, "tokens_per_second": 54.7 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-sonnet-vertex", "description": "Claude 4 Sonnet Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1610.0, "tokens_per_second": 94.2 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-sonnet", "description": "Claude 4 Sonnet", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1590.0, "tokens_per_second": 73.1 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.1-mini", "description": "GPT-4.1 mini", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 0.7, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 600.0, "tokens_per_second": 71.1 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.1-mini", "description": "GPT-4.1 mini", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.7, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 670.0, "tokens_per_second": 162.8 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.1", "description": "GPT-4.1", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 119.1 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.1", "description": "GPT-4.1", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 780.0, "tokens_per_second": 197.6 }, "intelligence": { "quality_score": 53.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-qwen-32b", "description": "DeepSeek R1 Distill Qwen 32B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.14, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 410.0, "tokens_per_second": 50.0 }, "intelligence": { "quality_score": 52.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-qwen-32b", "description": "DeepSeek R1 Distill Qwen 32B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1170.0, "tokens_per_second": 21.0 }, "intelligence": { "quality_score": 52.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-8b-(reasoning)-(fp8)", "description": "Qwen3 8B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 700.0, "tokens_per_second": 53.6 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-8b-(reasoning)", "description": "Qwen3 8B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.66, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1000.0, "tokens_per_second": 94.2 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-3", "description": "Grok 3", "provider": "xAI", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 83.3 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-3-fast", "description": "Grok 3 Fast", "provider": "xAI Fast", "metrics": { "cost": { "blended_cost_per_1m": 10.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 84.5 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "Lambda Labs (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.28, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 360.0, "tokens_per_second": 153.8 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 330.0, "tokens_per_second": 189.6 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick", "description": "Llama 4 Maverick", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.42, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 281.2 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-vertex", "description": "Llama 4 Maverick Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.55, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 330.0, "tokens_per_second": 125.7 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "CentML (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 122.9 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "Microsoft Azure (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.61, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 54.6 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick", "description": "Llama 4 Maverick", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.39, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 177.0 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "Deepinfra (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.27, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 280.0, "tokens_per_second": 105.6 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(turbo,-fp8)", "description": "Llama 4 Maverick (Turbo, FP8)", "provider": "Deepinfra (Turbo, FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 660.9 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.34, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 570.0, "tokens_per_second": 65.5 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick", "description": "Llama 4 Maverick", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 110.0, "tokens_per_second": 548.6 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick", "description": "Llama 4 Maverick", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.92, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 798.8 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "Together.ai (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.41, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 200.0, "tokens_per_second": 109.6 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-maverick-(fp8)", "description": "Llama 4 Maverick (FP8)", "provider": "kluster.ai (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 710.0, "tokens_per_second": 155.3 }, "intelligence": { "quality_score": 51.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(march-2025)", "description": "GPT-4o (March 2025)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 7.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 183.4 }, "intelligence": { "quality_score": 50.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.0-pro-experimental-(ai-studio)", "description": "Gemini 2.0 Pro Experimental (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 17240.0, "tokens_per_second": 68.5 }, "intelligence": { "quality_score": 49.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-qwen-14b", "description": "DeepSeek R1 Distill Qwen 14B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 800.0, "tokens_per_second": 43.9 }, "intelligence": { "quality_score": 49.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-qwen-14b", "description": "DeepSeek R1 Distill Qwen 14B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 1.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 169.4 }, "intelligence": { "quality_score": 49.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-medium-3", "description": "Mistral Medium 3", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 84.6 }, "intelligence": { "quality_score": 49.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-medium-3", "description": "Mistral Medium 3", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 55.4 }, "intelligence": { "quality_score": 49.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.5-flash-(ai_studio)", "description": "Gemini 2.5 Flash (AI_Studio)", "provider": "Google (AI_Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 360.0, "tokens_per_second": 300.2 }, "intelligence": { "quality_score": 49.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "Lambda Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 350.0, "tokens_per_second": 65.2 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "Cerebras", "metrics": { "cost": { "blended_cost_per_1m": 0.94, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 2107.9 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b-base", "description": "DeepSeek R1 Distill Llama 70B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 570.0, "tokens_per_second": 58.4 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 330.0, "tokens_per_second": 31.6 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 31.5 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.81, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 160.0, "tokens_per_second": 411.0 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1740.0, "tokens_per_second": 300.2 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-70b", "description": "DeepSeek R1 Distill Llama 70B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 119.5 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.7-sonnet", "description": "Claude 3.7 Sonnet", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1410.0, "tokens_per_second": 49.5 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.7-sonnet-vertex", "description": "Claude 3.7 Sonnet Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 890.0, "tokens_per_second": 78.8 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.7-sonnet", "description": "Claude 3.7 Sonnet", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1660.0, "tokens_per_second": 78.5 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.0-flash-vertex", "description": "Gemini 2.0 Flash Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 223.1 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.0-flash-(ai-studio)", "description": "Gemini 2.0 Flash (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 223.9 }, "intelligence": { "quality_score": 48.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-4b-(reasoning)-fast", "description": "Qwen3 4B (Reasoning) Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 158.4 }, "intelligence": { "quality_score": 47.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-4b-(reasoning)-(fp8)", "description": "Qwen3 4B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 640.0, "tokens_per_second": 51.4 }, "intelligence": { "quality_score": 47.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-4b-(reasoning)", "description": "Qwen3 4B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1030.0, "tokens_per_second": 100.2 }, "intelligence": { "quality_score": 47.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "reka-flash-3", "description": "Reka Flash 3", "provider": "Reka AI", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 940.0, "tokens_per_second": 56.2 }, "intelligence": { "quality_score": 47.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-235b", "description": "Qwen3 235B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 1.23, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1110.0, "tokens_per_second": 71.1 }, "intelligence": { "quality_score": 47.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.0-flash-(exp)-(ai-studio)", "description": "Gemini 2.0 Flash (exp) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 250.0, "tokens_per_second": 222.7 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)-(fp8)", "description": "DeepSeek V3 (Dec '24) (FP8)", "provider": "Hyperbolic (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1340.0, "tokens_per_second": 29.0 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)", "description": "DeepSeek V3 (Dec '24)", "provider": "Nebius", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 660.0, "tokens_per_second": 21.2 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)", "description": "DeepSeek V3 (Dec '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 77.6 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)", "description": "DeepSeek V3 (Dec '24)", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 1.31, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 850.0, "tokens_per_second": 57.2 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)", "description": "DeepSeek V3 (Dec '24)", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.51, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 410.0, "tokens_per_second": 28.8 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)-turbo", "description": "DeepSeek V3 (Dec '24) Turbo", "provider": "Novita Turbo", "metrics": { "cost": { "blended_cost_per_1m": 0.63, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1140.0, "tokens_per_second": 30.3 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)", "description": "DeepSeek V3 (Dec '24)", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.89, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 810.0, "tokens_per_second": 28.9 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-v3-(dec-'24)-(fp8)", "description": "DeepSeek V3 (Dec '24) (FP8)", "provider": "Together.ai (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 1.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 610.0, "tokens_per_second": 93.0 }, "intelligence": { "quality_score": 46.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-max", "description": "Qwen2.5 Max", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 2.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1250.0, "tokens_per_second": 42.7 }, "intelligence": { "quality_score": 45.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-pro-(sep)-(vertex)", "description": "Gemini 1.5 Pro (Sep) (Vertex)", "provider": "Google (Vertex)", "metrics": { "cost": { "blended_cost_per_1m": 2.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 92.6 }, "intelligence": { "quality_score": 45.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-pro-(sep)-(ai-studio)", "description": "Gemini 1.5 Pro (Sep) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 2.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 93.0 }, "intelligence": { "quality_score": 45.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-sonnet-(oct)", "description": "Claude 3.5 Sonnet (Oct)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 920.0, "tokens_per_second": 47.3 }, "intelligence": { "quality_score": 44.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-sonnet-(oct)-vertex", "description": "Claude 3.5 Sonnet (Oct) Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1300.0, "tokens_per_second": 80.2 }, "intelligence": { "quality_score": 44.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-sonnet-(oct)", "description": "Claude 3.5 Sonnet (Oct)", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 810.0, "tokens_per_second": 78.8 }, "intelligence": { "quality_score": 44.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-32b", "description": "Qwen3 32B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 1.23, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1030.0, "tokens_per_second": 63.1 }, "intelligence": { "quality_score": 44.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "sonar", "description": "Sonar", "provider": "Perplexity", "metrics": { "cost": { "blended_cost_per_1m": 1.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1810.0, "tokens_per_second": 107.5 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Lambda Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.14, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 120.7 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout-(fp8)", "description": "Llama 4 Scout (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 370.0, "tokens_per_second": 128.0 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Cerebras", "metrics": { "cost": { "blended_cost_per_1m": 0.7, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 2428.4 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.29, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 160.4 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout-vertex", "description": "Llama 4 Scout Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.36, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 130.1 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "CentML", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 250.0, "tokens_per_second": 115.4 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.34, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 32.4 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 164.2 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.14, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 34.6 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 720.0, "tokens_per_second": 55.5 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 170.0, "tokens_per_second": 601.8 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.47, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1690.0, "tokens_per_second": 786.2 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.28, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 190.0, "tokens_per_second": 122.9 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-4-scout", "description": "Llama 4 Scout", "provider": "kluster.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.71, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 640.0, "tokens_per_second": 98.6 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "sonar-pro", "description": "Sonar Pro", "provider": "Perplexity", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 2700.0, "tokens_per_second": 80.0 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b-preview", "description": "QwQ 32B-Preview", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.14, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 49.8 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwq-32b-preview", "description": "QwQ 32B-Preview", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 98.0 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "nova-premier", "description": "Nova Premier", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 5.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 830.0, "tokens_per_second": 61.8 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-30b-a3b", "description": "Qwen3 30B A3B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1100.0, "tokens_per_second": 92.6 }, "intelligence": { "quality_score": 43.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(nov-'24)", "description": "GPT-4o (Nov '24)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 4.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 600.0, "tokens_per_second": 110.4 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(nov-'24)", "description": "GPT-4o (Nov '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 4.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1220.0, "tokens_per_second": 122.0 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.0-flash-lite-(feb-'25)-(ai-studio)", "description": "Gemini 2.0 Flash-Lite (Feb '25) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 250.0, "tokens_per_second": 213.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-(fp8)", "description": "Llama 3.3 70B (FP8)", "provider": "Lambda Labs (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 350.0, "tokens_per_second": 55.9 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Parasail", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 435.7 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-(fp8)", "description": "Llama 3.3 70B (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.28, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 72.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Cerebras", "metrics": { "cost": { "blended_cost_per_1m": 0.94, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 190.0, "tokens_per_second": 2428.8 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1110.0, "tokens_per_second": 38.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.71, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 248.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-fast", "description": "Llama 3.3 70B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 138.5 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-base", "description": "Llama 3.3 70B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 630.0, "tokens_per_second": 40.0 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-vertex", "description": "Llama 3.3 70B Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.72, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 74.5 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-snowflake", "description": "Llama 3.3 70B Snowflake", "provider": "Snowflake Snowflake", "metrics": { "cost": { "blended_cost_per_1m": 0.58, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 570.0, "tokens_per_second": 39.0 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "CentML", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 129.4 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.71, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 49.1 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 3580.0, "tokens_per_second": 155.1 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-(turbo,-fp8)", "description": "Llama 3.3 70B (Turbo, FP8)", "provider": "Deepinfra (Turbo, FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 250.0, "tokens_per_second": 31.1 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.27, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 610.0, "tokens_per_second": 26.5 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "FriendliAI", "metrics": { "cost": { "blended_cost_per_1m": 0.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 182.4 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 680.0, "tokens_per_second": 55.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.64, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 220.0, "tokens_per_second": 444.0 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 443.7 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b-turbo", "description": "Llama 3.3 70B Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 151.4 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.3-70b", "description": "Llama 3.3 70B", "provider": "kluster.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.7, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 18.1 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.1-nano", "description": "GPT-4.1 nano", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 114.1 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.1-nano", "description": "GPT-4.1 nano", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 980.0, "tokens_per_second": 137.2 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-14b", "description": "Qwen3 14B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.61, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1050.0, "tokens_per_second": 64.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(may-'24)", "description": "GPT-4o (May '24)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 7.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 640.0, "tokens_per_second": 93.3 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(may-'24)", "description": "GPT-4o (May '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 7.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 720.0, "tokens_per_second": 120.9 }, "intelligence": { "quality_score": 41.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b-(fp8)", "description": "Llama 3.1 405B (FP8)", "provider": "Lambda Labs (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 32.9 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Parasail", "metrics": { "cost": { "blended_cost_per_1m": 7.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1680.0, "tokens_per_second": 170.8 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Replicate", "metrics": { "cost": { "blended_cost_per_1m": 9.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 990.0, "tokens_per_second": 19.2 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 4.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1220.0, "tokens_per_second": 87.5 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b-standard", "description": "Llama 3.1 405B Standard", "provider": "Amazon Bedrock Standard", "metrics": { "cost": { "blended_cost_per_1m": 2.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1830.0, "tokens_per_second": 30.0 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b-latency-optimized", "description": "Llama 3.1 405B Latency Optimized", "provider": "Amazon Bedrock Latency Optimized", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 89.3 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b-base", "description": "Llama 3.1 405B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 1.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 700.0, "tokens_per_second": 32.4 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b-vertex", "description": "Llama 3.1 405B Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 7.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 29.9 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 8.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 31.3 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 660.0, "tokens_per_second": 83.7 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 760.0, "tokens_per_second": 26.7 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 6.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1640.0, "tokens_per_second": 172.4 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b", "description": "Llama 3.1 405B", "provider": "Databricks", "metrics": { "cost": { "blended_cost_per_1m": 7.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1030.0, "tokens_per_second": 35.6 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-405b-turbo", "description": "Llama 3.1 405B Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 530.0, "tokens_per_second": 95.0 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b", "description": "Qwen2.5 72B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1250.0, "tokens_per_second": 29.3 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b", "description": "Qwen2.5 72B", "provider": "Nebius", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 690.0, "tokens_per_second": 23.1 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b-fast", "description": "Qwen2.5 72B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 69.2 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b", "description": "Qwen2.5 72B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 380.0, "tokens_per_second": 72.4 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b", "description": "Qwen2.5 72B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 550.0, "tokens_per_second": 36.8 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b-turbo", "description": "Qwen2.5 72B Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 113.9 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-72b", "description": "Qwen2.5 72B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1220.0, "tokens_per_second": 58.1 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "minimax-text-01", "description": "MiniMax-Text-01", "provider": "MiniMax", "metrics": { "cost": { "blended_cost_per_1m": 0.42, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 770.0, "tokens_per_second": 33.9 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-4", "description": "Phi-4", "provider": "Nebius", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 115.7 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-4", "description": "Phi-4", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.22, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 39.9 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-4", "description": "Phi-4", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.09, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 550.0, "tokens_per_second": 32.4 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-a", "description": "Command A", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 4.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 220.0, "tokens_per_second": 92.5 }, "intelligence": { "quality_score": 40.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-flash-(sep)-(vertex)", "description": "Gemini 1.5 Flash (Sep) (Vertex)", "provider": "Google (Vertex)", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 180.0, "tokens_per_second": 184.7 }, "intelligence": { "quality_score": 39.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-flash-(sep)-(ai-studio)", "description": "Gemini 1.5 Flash (Sep) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 191.7 }, "intelligence": { "quality_score": 39.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-large-2-(nov-'24)", "description": "Mistral Large 2 (Nov '24)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 72.7 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-large-2-(nov-'24)", "description": "Mistral Large 2 (Nov '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 36.4 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-1.7b-(reasoning)-(fp8)", "description": "Qwen3 1.7B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 720.0, "tokens_per_second": 48.2 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-1.7b-(reasoning)", "description": "Qwen3 1.7B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 960.0, "tokens_per_second": 130.2 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-3-27b", "description": "Gemma 3 27B", "provider": "Parasail", "metrics": { "cost": { "blended_cost_per_1m": 0.29, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 85.8 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-3-27b-(ai_studio)", "description": "Gemma 3 27B (AI_Studio)", "provider": "Google (AI_Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 48.2 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-3-27b", "description": "Gemma 3 27B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 630.0, "tokens_per_second": 33.8 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-beta", "description": "Grok Beta", "provider": "xAI", "metrics": { "cost": { "blended_cost_per_1m": 7.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 66.7 }, "intelligence": { "quality_score": 38.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "pixtral-large", "description": "Pixtral Large", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 380.0, "tokens_per_second": 79.0 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-instruct-32b-fast", "description": "Qwen2.5 Instruct 32B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 88.0 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-instruct-32b-base", "description": "Qwen2.5 Instruct 32B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 550.0, "tokens_per_second": 59.3 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-nemotron-70b-(fp8)", "description": "Llama 3.1 Nemotron 70B (FP8)", "provider": "Lambda Labs (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 50.6 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-nemotron-70b-base", "description": "Llama 3.1 Nemotron 70B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 39.1 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-nemotron-70b-fast", "description": "Llama 3.1 Nemotron 70B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 560.0, "tokens_per_second": 74.2 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-nemotron-70b", "description": "Llama 3.1 Nemotron 70B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 570.0, "tokens_per_second": 28.3 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "nova-pro", "description": "Nova Pro", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 1.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 167.2 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-8b", "description": "Qwen3 8B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.31, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 960.0, "tokens_per_second": 95.1 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-large-2-(jul-'24)", "description": "Mistral Large 2 (Jul '24)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 37.2 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-large-2-(jul-'24)", "description": "Mistral Large 2 (Jul '24)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 32.0 }, "intelligence": { "quality_score": 37.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-coder-32b", "description": "Qwen2.5 Coder 32B", "provider": "Lambda Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.09, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 43.1 }, "intelligence": { "quality_score": 36.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-coder-32b", "description": "Qwen2.5 Coder 32B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1130.0, "tokens_per_second": 54.0 }, "intelligence": { "quality_score": 36.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-coder-32b", "description": "Qwen2.5 Coder 32B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.08, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 50.6 }, "intelligence": { "quality_score": 36.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-coder-32b", "description": "Qwen2.5 Coder 32B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 74.7 }, "intelligence": { "quality_score": 36.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-mini", "description": "GPT-4o mini", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 550.0, "tokens_per_second": 73.5 }, "intelligence": { "quality_score": 36.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-mini", "description": "GPT-4o mini", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 870.0, "tokens_per_second": 144.7 }, "intelligence": { "quality_score": 36.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-(fp8)", "description": "Llama 3.1 70B (FP8)", "provider": "Lambda Labs (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.17, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 210.0, "tokens_per_second": 50.3 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b", "description": "Llama 3.1 70B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 940.0, "tokens_per_second": 175.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-standard", "description": "Llama 3.1 70B Standard", "provider": "Amazon Bedrock Standard", "metrics": { "cost": { "blended_cost_per_1m": 0.72, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 31.6 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-latency-optimized", "description": "Llama 3.1 70B Latency Optimized", "provider": "Amazon Bedrock Latency Optimized", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 143.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-base", "description": "Llama 3.1 70B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 650.0, "tokens_per_second": 33.1 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-fast", "description": "Llama 3.1 70B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 139.5 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-vertex", "description": "Llama 3.1 70B Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.72, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 280.0, "tokens_per_second": 72.9 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b", "description": "Llama 3.1 70B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 2.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 54.1 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b", "description": "Llama 3.1 70B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 380.0, "tokens_per_second": 172.8 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-(turbo,-fp8)", "description": "Llama 3.1 70B (Turbo, FP8)", "provider": "Deepinfra (Turbo, FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.14, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 250.0, "tokens_per_second": 38.9 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b", "description": "Llama 3.1 70B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.27, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 21.9 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b", "description": "Llama 3.1 70B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1330.0, "tokens_per_second": 47.5 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b-turbo", "description": "Llama 3.1 70B Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 350.0, "tokens_per_second": 155.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-70b", "description": "Llama 3.1 70B", "provider": "Simplismart", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 125.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-3.1", "description": "Mistral Small 3.1", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 123.5 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-3.1", "description": "Mistral Small 3.1", "provider": "Parasail", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 63.7 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-3.1-vertex", "description": "Mistral Small 3.1 Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 200.0, "tokens_per_second": 209.6 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-3", "description": "Mistral Small 3", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 152.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-3", "description": "Mistral Small 3", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.07, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 66.9 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-3", "description": "Mistral Small 3", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 200.0, "tokens_per_second": 97.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-4b", "description": "Qwen3 4B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 980.0, "tokens_per_second": 102.6 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-opus", "description": "Claude 3 Opus", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1200.0, "tokens_per_second": 26.4 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-opus-vertex", "description": "Claude 3 Opus Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 2690.0, "tokens_per_second": 22.4 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-opus", "description": "Claude 3 Opus", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1070.0, "tokens_per_second": 27.6 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-haiku-standard", "description": "Claude 3.5 Haiku Standard", "provider": "Amazon Bedrock Standard", "metrics": { "cost": { "blended_cost_per_1m": 1.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 940.0, "tokens_per_second": 57.6 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-haiku-latency-optimized", "description": "Claude 3.5 Haiku Latency Optimized", "provider": "Amazon Bedrock Latency Optimized", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 530.0, "tokens_per_second": 97.0 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-haiku-vertex", "description": "Claude 3.5 Haiku Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 1.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1710.0, "tokens_per_second": 67.1 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-haiku", "description": "Claude 3.5 Haiku", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 1.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 750.0, "tokens_per_second": 66.2 }, "intelligence": { "quality_score": 35.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "devstral", "description": "Devstral", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 130.0 }, "intelligence": { "quality_score": 34.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-llama-8b", "description": "DeepSeek R1 Distill Llama 8B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 690.0, "tokens_per_second": 52.8 }, "intelligence": { "quality_score": 34.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-3-12b", "description": "Gemma 3 12B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1010.0, "tokens_per_second": 19.9 }, "intelligence": { "quality_score": 34.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-pro-(may)-(vertex)", "description": "Gemini 1.5 Pro (May) (Vertex)", "provider": "Google (Vertex)", "metrics": { "cost": { "blended_cost_per_1m": 2.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 330.0, "tokens_per_second": 66.7 }, "intelligence": { "quality_score": 34.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-pro-(may)-(ai-studio)", "description": "Gemini 1.5 Pro (May) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 2.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 420.0, "tokens_per_second": 67.7 }, "intelligence": { "quality_score": 34.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-turbo", "description": "Qwen2.5 Turbo", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.09, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1010.0, "tokens_per_second": 108.2 }, "intelligence": { "quality_score": 34.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-90b-(vision)", "description": "Llama 3.2 90B (Vision)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.72, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 60.7 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-90b-(vision)-vertex", "description": "Llama 3.2 90B (Vision) Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 200.0, "tokens_per_second": 32.8 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-90b-(vision)", "description": "Llama 3.2 90B (Vision)", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.36, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 19.6 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-90b-(vision)-turbo", "description": "Llama 3.2 90B (Vision) Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 30.0 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2-72b", "description": "Qwen2 72B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 350.0, "tokens_per_second": 42.0 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2-72b", "description": "Qwen2 72B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1350.0, "tokens_per_second": 31.0 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "nova-lite", "description": "Nova Lite", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 274.3 }, "intelligence": { "quality_score": 33.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-flash-8b-ai-studio", "description": "Gemini 1.5 Flash-8B AI Studio", "provider": "Google AI Studio", "metrics": { "cost": { "blended_cost_per_1m": 0.07, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 200.0, "tokens_per_second": 280.5 }, "intelligence": { "quality_score": 31.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "jamba-1.5-large", "description": "Jamba 1.5 Large", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 680.0, "tokens_per_second": 50.6 }, "intelligence": { "quality_score": 29.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "jamba-1.6-large", "description": "Jamba 1.6 Large", "provider": "AI21 Labs", "metrics": { "cost": { "blended_cost_per_1m": 3.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 610.0, "tokens_per_second": 49.7 }, "intelligence": { "quality_score": 29.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-flash-(may)-(vertex)", "description": "Gemini 1.5 Flash (May) (Vertex)", "provider": "Google (Vertex)", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 333.1 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-1.5-flash-(may)-(ai-studio)", "description": "Gemini 1.5 Flash (May) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 210.0, "tokens_per_second": 320.8 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "nova-micro", "description": "Nova Micro", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 311.9 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "yi-large", "description": "Yi-Large", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 66.0 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-sonnet", "description": "Claude 3 Sonnet", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 850.0, "tokens_per_second": 32.7 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-sonnet", "description": "Claude 3 Sonnet", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 620.0, "tokens_per_second": 60.4 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "codestral-(jan-'25)", "description": "Codestral (Jan '25)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 118.5 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "codestral-(jan-'25)-vertex", "description": "Codestral (Jan '25) Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 150.0, "tokens_per_second": 150.0 }, "intelligence": { "quality_score": 28.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Replicate", "metrics": { "cost": { "blended_cost_per_1m": 1.18, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 49.6 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1570.0, "tokens_per_second": 15.6 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 2.86, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 47.3 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 2.9, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 740.0, "tokens_per_second": 18.6 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.33, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 43.0 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.57, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1070.0, "tokens_per_second": 15.8 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b", "description": "Llama 3 70B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.64, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 250.0, "tokens_per_second": 332.8 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b-(reference,-fp16)", "description": "Llama 3 70B (Reference, FP16)", "provider": "Together.ai (Reference, FP16)", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 690.0, "tokens_per_second": 130.7 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-70b-(turbo,-fp8)", "description": "Llama 3 70B (Turbo, FP8)", "provider": "Together.ai (Turbo, FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 136.3 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-(sep-'24)", "description": "Mistral Small (Sep '24)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 76.7 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-4-multimodal", "description": "Phi-4 Multimodal", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 350.0, "tokens_per_second": 17.4 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-coder-7b--fast", "description": "Qwen2.5 Coder 7B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 226.6 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen2.5-coder-7b--base", "description": "Qwen2.5 Coder 7B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.01, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 192.2 }, "intelligence": { "quality_score": 27.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-large-(feb-'24)", "description": "Mistral Large (Feb '24)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 590.0, "tokens_per_second": 28.9 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-large-(feb-'24)", "description": "Mistral Large (Feb '24)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 43.6 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x22b", "description": "Mixtral 8x22B", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 3.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 59.6 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x22b-base", "description": "Mixtral 8x22B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 550.0, "tokens_per_second": 82.2 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x22b-fast", "description": "Mixtral 8x22B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 1.05, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 107.4 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x22b", "description": "Mixtral 8x22B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 92.5 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-4-mini", "description": "Phi-4 Mini", "provider": "CentML", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 217.4 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-4-mini", "description": "Phi-4 Mini", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 56.8 }, "intelligence": { "quality_score": 26.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-1.7b", "description": "Qwen3 1.7B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1000.0, "tokens_per_second": 133.8 }, "intelligence": { "quality_score": 25.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "phi-3-medium-14b", "description": "Phi-3 Medium 14B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 400.0, "tokens_per_second": 52.9 }, "intelligence": { "quality_score": 25.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-3-4b", "description": "Gemma 3 4B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.03, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 90.0 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-2.1", "description": "Claude 2.1", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 12.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1580.0, "tokens_per_second": 29.1 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-2.1", "description": "Claude 2.1", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 12.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 890.0, "tokens_per_second": 13.9 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Lambda Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.03, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 134.7 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Parasail", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 1135.1 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Cerebras", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 2155.9 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 800.0, "tokens_per_second": 431.1 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.22, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 280.0, "tokens_per_second": 241.8 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b-fast", "description": "Llama 3.1 8B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 182.0 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b-base", "description": "Llama 3.1 8B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.03, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 560.0, "tokens_per_second": 58.7 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b-vertex", "description": "Llama 3.1 8B Vertex", "provider": "Google Vertex", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 170.0, "tokens_per_second": 119.1 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 215.0 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Fireworks", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 220.0, "tokens_per_second": 295.0 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 500.0, "tokens_per_second": 53.9 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "FriendliAI", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 280.0, "tokens_per_second": 428.0 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.03, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 670.0, "tokens_per_second": 73.8 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 170.0, "tokens_per_second": 853.9 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 1183.5 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b-turbo", "description": "Llama 3.1 8B Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 0.18, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 166.1 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "Simplismart", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 170.0, "tokens_per_second": 458.3 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.1-8b", "description": "Llama 3.1 8B", "provider": "kluster.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.18, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 62.1 }, "intelligence": { "quality_score": 24.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "pixtral-12b", "description": "Pixtral 12B", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 83.8 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "pixtral-12b", "description": "Pixtral 12B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 580.0, "tokens_per_second": 76.4 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-0.6b-(reasoning)-(fp8)", "description": "Qwen3 0.6B (Reasoning) (FP8)", "provider": "Novita (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 630.0, "tokens_per_second": 47.1 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-0.6b-(reasoning)", "description": "Qwen3 0.6B (Reasoning)", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.4, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 920.0, "tokens_per_second": 211.0 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-(feb-'24)", "description": "Mistral Small (Feb '24)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 1.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 280.0, "tokens_per_second": 144.1 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-small-(feb-'24)", "description": "Mistral Small (Feb '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 1.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 87.3 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-medium", "description": "Mistral Medium", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 4.09, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 450.0, "tokens_per_second": 70.4 }, "intelligence": { "quality_score": 23.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "ministral-8b", "description": "Ministral 8B", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 130.0 }, "intelligence": { "quality_score": 22.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-9b-fast", "description": "Gemma 2 9B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 166.2 }, "intelligence": { "quality_score": 22.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-9b-base", "description": "Gemma 2 9B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.03, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 151.9 }, "intelligence": { "quality_score": 22.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-9b", "description": "Gemma 2 9B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 710.0, "tokens_per_second": 18.0 }, "intelligence": { "quality_score": 22.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-9b", "description": "Gemma 2 9B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 709.1 }, "intelligence": { "quality_score": 22.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "lfm-40b", "description": "LFM 40B", "provider": "Lambda Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 170.0, "tokens_per_second": 160.8 }, "intelligence": { "quality_score": 22.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r+", "description": "Command-R+", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 47.7 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r+", "description": "Command-R+", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 4.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 280.0, "tokens_per_second": 47.9 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Replicate", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 390.0, "tokens_per_second": 79.4 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 104.0 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 340.0, "tokens_per_second": 73.8 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 113.4 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 740.0, "tokens_per_second": 58.2 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Groq", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 1348.7 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3-8b", "description": "Llama 3 8B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 164.7 }, "intelligence": { "quality_score": 21.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "codestral-(may-'24)", "description": "Codestral (May '24)", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 113.3 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "aya-expanse-32b", "description": "Aya Expanse 32B", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 180.0, "tokens_per_second": 121.2 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r+-(apr-'24)", "description": "Command-R+ (Apr '24)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 490.0, "tokens_per_second": 47.2 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r+-(apr-'24)", "description": "Command-R+ (Apr '24)", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 56.1 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r+-(apr-'24)", "description": "Command-R+ (Apr '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 650.0, "tokens_per_second": 28.7 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "ministral-3b", "description": "Ministral 3B", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 225.2 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-nemo", "description": "Mistral NeMo", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 139.1 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-nemo-(fp8)", "description": "Mistral NeMo (FP8)", "provider": "Parasail (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.11, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 520.0, "tokens_per_second": 99.8 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-nemo-fast", "description": "Mistral NeMo Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 490.0, "tokens_per_second": 154.7 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-nemo-base", "description": "Mistral NeMo Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 580.0, "tokens_per_second": 44.6 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-nemo", "description": "Mistral NeMo", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.03, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 52.4 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b-(fp8)", "description": "Llama 3.2 3B (FP8)", "provider": "Lambda Labs (FP8)", "metrics": { "cost": { "blended_cost_per_1m": 0.02, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 224.2 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b", "description": "Llama 3.2 3B", "provider": "Hyperbolic", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1030.0, "tokens_per_second": 98.4 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b", "description": "Llama 3.2 3B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 460.0, "tokens_per_second": 72.1 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b-base", "description": "Llama 3.2 3B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.01, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 490.0, "tokens_per_second": 122.1 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b", "description": "Llama 3.2 3B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.01, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 180.0, "tokens_per_second": 123.5 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b", "description": "Llama 3.2 3B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 610.0, "tokens_per_second": 88.8 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b", "description": "Llama 3.2 3B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 210.0, "tokens_per_second": 1586.3 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-3b-turbo", "description": "Llama 3.2 3B Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 162.3 }, "intelligence": { "quality_score": 20.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-r1-distill-qwen-1.5b", "description": "DeepSeek R1 Distill Qwen 1.5B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.18, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 240.0, "tokens_per_second": 384.6 }, "intelligence": { "quality_score": 19.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "jamba-1.5-mini", "description": "Jamba 1.5 Mini", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 82.7 }, "intelligence": { "quality_score": 18.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "jamba-1.6-mini", "description": "Jamba 1.6 Mini", "provider": "AI21 Labs", "metrics": { "cost": { "blended_cost_per_1m": 0.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 360.0, "tokens_per_second": 179.7 }, "intelligence": { "quality_score": 18.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x7b", "description": "Mixtral 8x7B", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.7, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 80.6 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x7b", "description": "Mixtral 8x7B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.51, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 83.9 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x7b-fast", "description": "Mixtral 8x7B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.23, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 125.9 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x7b-base", "description": "Mixtral 8x7B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 550.0, "tokens_per_second": 117.6 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x7b", "description": "Mixtral 8x7B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 93.0 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mixtral-8x7b", "description": "Mixtral 8x7B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.6, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 57.3 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen3-0.6b", "description": "Qwen3 0.6B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.19, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 980.0, "tokens_per_second": 215.9 }, "intelligence": { "quality_score": 17.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "aya-expanse-8b", "description": "Aya Expanse 8B", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 140.0, "tokens_per_second": 165.2 }, "intelligence": { "quality_score": 16.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r", "description": "Command-R", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 330.0, "tokens_per_second": 109.0 }, "intelligence": { "quality_score": 15.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r", "description": "Command-R", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 200.0, "tokens_per_second": 58.2 }, "intelligence": { "quality_score": 15.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r-(mar-'24)", "description": "Command-R (Mar '24)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 330.0, "tokens_per_second": 108.5 }, "intelligence": { "quality_score": 15.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r-(mar-'24)", "description": "Command-R (Mar '24)", "provider": "Cohere", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 160.0, "tokens_per_second": 160.8 }, "intelligence": { "quality_score": 15.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "command-r-(mar-'24)", "description": "Command-R (Mar '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 45.9 }, "intelligence": { "quality_score": 15.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "codestral-mamba", "description": "Codestral-Mamba", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 94.3 }, "intelligence": { "quality_score": 14.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-7b", "description": "Mistral 7B", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 290.0, "tokens_per_second": 104.3 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-7b", "description": "Mistral 7B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.16, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 93.4 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-7b", "description": "Mistral 7B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 210.0, "tokens_per_second": 94.8 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-7b", "description": "Mistral 7B", "provider": "Novita", "metrics": { "cost": { "blended_cost_per_1m": 0.04, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 800.0, "tokens_per_second": 126.2 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-7b", "description": "Mistral 7B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 150.0, "tokens_per_second": 180.6 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-1b", "description": "Llama 3.2 1B", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 119.1 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-1b-base", "description": "Llama 3.2 1B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.01, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 96.9 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-1b", "description": "Llama 3.2 1B", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.01, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 270.0, "tokens_per_second": 115.6 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-1b", "description": "Llama 3.2 1B", "provider": "SambaNova", "metrics": { "cost": { "blended_cost_per_1m": 0.05, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 190.0, "tokens_per_second": 2222.9 }, "intelligence": { "quality_score": 10.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-2-chat-7b", "description": "Llama 2 Chat 7B", "provider": "Replicate", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 430.0, "tokens_per_second": 132.6 }, "intelligence": { "quality_score": 8.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-11b-(vision)", "description": "Llama 3.2 11B (Vision)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.16, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 440.0, "tokens_per_second": 189.3 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-11b-(vision)", "description": "Llama 3.2 11B (Vision)", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.05, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 300.0, "tokens_per_second": 55.1 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "llama-3.2-11b-(vision)-turbo", "description": "Llama 3.2 11B (Vision) Turbo", "provider": "Together.ai Turbo", "metrics": { "cost": { "blended_cost_per_1m": 0.18, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 190.0, "tokens_per_second": 122.2 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-4-opus-thinking", "description": "Claude 4 Opus Thinking", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 30.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 3160.0, "tokens_per_second": 15.5 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "mistral-saba", "description": "Mistral Saba", "provider": "Mistral", "metrics": { "cost": { "blended_cost_per_1m": 0.3, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 91.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "sonar-reasoning", "description": "Sonar Reasoning", "provider": "Perplexity", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1760.0, "tokens_per_second": 73.6 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-3-mini-reasoning-(low)", "description": "Grok 3 mini Reasoning (low)", "provider": "xAI", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 310.0, "tokens_per_second": 120.2 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "grok-3-mini-reasoning-(low)-fast", "description": "Grok 3 mini Reasoning (low) Fast", "provider": "xAI Fast", "metrics": { "cost": { "blended_cost_per_1m": 1.45, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 320.0, "tokens_per_second": 203.6 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "reka-flash", "description": "Reka Flash", "provider": "Reka AI", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 890.0, "tokens_per_second": 46.2 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "reka-core", "description": "Reka Core", "provider": "Reka AI", "metrics": { "cost": { "blended_cost_per_1m": 2.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 860.0, "tokens_per_second": 27.3 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "reka-flash-(feb-'24)", "description": "Reka Flash (Feb '24)", "provider": "Reka AI", "metrics": { "cost": { "blended_cost_per_1m": 0.35, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 840.0, "tokens_per_second": 45.4 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "reka-edge", "description": "Reka Edge", "provider": "Reka AI", "metrics": { "cost": { "blended_cost_per_1m": 0.1, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 820.0, "tokens_per_second": 85.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o1-preview", "description": "o1-preview", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 26.25, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 19470.0, "tokens_per_second": 162.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "o1-preview", "description": "o1-preview", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 28.88, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 21280.0, "tokens_per_second": 157.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(aug-'24)", "description": "GPT-4o (Aug '24)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 4.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 710.0, "tokens_per_second": 82.3 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4o-(aug-'24)", "description": "GPT-4o (Aug '24)", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 4.38, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 700.0, "tokens_per_second": 125.6 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4-turbo", "description": "GPT-4 Turbo", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 15.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 840.0, "tokens_per_second": 46.8 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4-turbo", "description": "GPT-4 Turbo", "provider": "Microsoft Azure", "metrics": { "cost": { "blended_cost_per_1m": 15.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1640.0, "tokens_per_second": 40.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-3.5-turbo", "description": "GPT-3.5 Turbo", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 0.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 380.0, "tokens_per_second": 140.4 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4.5-(preview)", "description": "GPT-4.5 (Preview)", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 93.75, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1110.0, "tokens_per_second": 71.9 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gpt-4", "description": "GPT-4", "provider": "OpenAI", "metrics": { "cost": { "blended_cost_per_1m": 37.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 780.0, "tokens_per_second": 25.5 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemini-2.0-flash-lite-(preview)-(ai-studio)", "description": "Gemini 2.0 Flash-Lite (Preview) (AI Studio)", "provider": "Google (AI Studio)", "metrics": { "cost": { "blended_cost_per_1m": 0.13, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 260.0, "tokens_per_second": 215.5 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-27b-fast", "description": "Gemma 2 27B Fast", "provider": "Nebius Fast", "metrics": { "cost": { "blended_cost_per_1m": 0.26, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 510.0, "tokens_per_second": 88.1 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-27b-base", "description": "Gemma 2 27B Base", "provider": "Nebius Base", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 580.0, "tokens_per_second": 52.0 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "gemma-2-27b", "description": "Gemma 2 27B", "provider": "Together.ai", "metrics": { "cost": { "blended_cost_per_1m": 0.8, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 230.0, "tokens_per_second": 90.2 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-sonnet-(june)", "description": "Claude 3.5 Sonnet (June)", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 930.0, "tokens_per_second": 47.0 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3.5-sonnet-(june)", "description": "Claude 3.5 Sonnet (June)", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 6.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 940.0, "tokens_per_second": 79.9 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-haiku", "description": "Claude 3 Haiku", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 0.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1000.0, "tokens_per_second": 107.0 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-3-haiku", "description": "Claude 3 Haiku", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 0.5, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 470.0, "tokens_per_second": 144.9 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-instant", "description": "Claude Instant", "provider": "Amazon Bedrock", "metrics": { "cost": { "blended_cost_per_1m": 1.2, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 540.0, "tokens_per_second": 62.9 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "claude-2.0", "description": "Claude 2.0", "provider": "Anthropic", "metrics": { "cost": { "blended_cost_per_1m": 12.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 890.0, "tokens_per_second": 30.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-coder-v2-lite-fast,-fp8", "description": "DeepSeek Coder V2 Lite Fast, FP8", "provider": "Nebius Fast, FP8", "metrics": { "cost": { "blended_cost_per_1m": 0.12, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 480.0, "tokens_per_second": 98.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "deepseek-coder-v2-lite-base,-fp8", "description": "DeepSeek Coder V2 Lite Base, FP8", "provider": "Nebius Base, FP8", "metrics": { "cost": { "blended_cost_per_1m": 0.06, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 530.0, "tokens_per_second": 111.9 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "openchat-3.5", "description": "OpenChat 3.5", "provider": "Deepinfra", "metrics": { "cost": { "blended_cost_per_1m": 0.05, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 380.0, "tokens_per_second": 54.2 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "solar-mini", "description": "Solar Mini", "provider": "Upstage", "metrics": { "cost": { "blended_cost_per_1m": 0.15, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1010.0, "tokens_per_second": 82.8 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } }, { "name": "qwen1.5-chat-110b", "description": "Qwen1.5 Chat 110B", "provider": "Alibaba Cloud", "metrics": { "cost": { "blended_cost_per_1m": 0.0, "input_cost_per_1m": null, "output_cost_per_1m": null }, "speed": { "time_to_first_token_ms": 1630.0, "tokens_per_second": 23.7 }, "intelligence": { "quality_score": 0.0, "mmlu_score": null, "gsm8k_score": null, "bbh_score": null } } } ]

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Nghiauet/mcp-agent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server