artificial_analysis_llm_benchmarks.json•243 kB
[
{
"name": "o4-mini-(high)",
"description": "o4-mini (high)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 46790.0,
"tokens_per_second": 130.0
},
"intelligence": {
"quality_score": 70.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o4-mini-(high)",
"description": "o4-mini (high)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 67780.0,
"tokens_per_second": 88.1
},
"intelligence": {
"quality_score": 70.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o3",
"description": "o3",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 17.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 19940.0,
"tokens_per_second": 148.9
},
"intelligence": {
"quality_score": 69.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o3",
"description": "o3",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 17.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 37170.0,
"tokens_per_second": 86.3
},
"intelligence": {
"quality_score": 69.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-pro",
"description": "Gemini 2.5 Pro",
"provider": "Google",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.44,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 44130.0,
"tokens_per_second": 147.8
},
"intelligence": {
"quality_score": 69.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "DeepSeek",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.96,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 3140.0,
"tokens_per_second": 31.9
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "Parasail",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.71,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 120.2
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1050.0,
"tokens_per_second": 36.6
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "Nebius",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 670.0,
"tokens_per_second": 33.3
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 253.3
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.92,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 39.7
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 830.0,
"tokens_per_second": 75.4
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(may-'25)",
"description": "DeepSeek R1 (May '25)",
"provider": "kluster.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 530.0,
"tokens_per_second": 36.3
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-pro-(may-'25)-(ai-studio)",
"description": "Gemini 2.5 Pro (May '25) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.44,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 38340.0,
"tokens_per_second": 147.7
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-pro-(may-'25)-vertex",
"description": "Gemini 2.5 Pro (May '25) Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.44,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 31880.0,
"tokens_per_second": 167.4
},
"intelligence": {
"quality_score": 68.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-3-mini-reasoning-(high)",
"description": "Grok 3 mini Reasoning (high)",
"provider": "xAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 58.0
},
"intelligence": {
"quality_score": 67.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-3-mini-reasoning-(high)-fast",
"description": "Grok 3 mini Reasoning (high) Fast",
"provider": "xAI Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 211.4
},
"intelligence": {
"quality_score": 67.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o3-mini-(high)",
"description": "o3-mini (high)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 38970.0,
"tokens_per_second": 179.0
},
"intelligence": {
"quality_score": 66.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o3-mini-(high)",
"description": "o3-mini (high)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 43540.0,
"tokens_per_second": 160.1
},
"intelligence": {
"quality_score": 66.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-flash-(reasoning)-(ai_studio)",
"description": "Gemini 2.5 Flash (Reasoning) (AI_Studio)",
"provider": "Google (AI_Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.99,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 15770.0,
"tokens_per_second": 330.9
},
"intelligence": {
"quality_score": 65.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-flash-(reasoning)-(vertex)",
"description": "Gemini 2.5 Flash (Reasoning) (Vertex)",
"provider": "Google (Vertex)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.99,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 16120.000000000002,
"tokens_per_second": 311.9
},
"intelligence": {
"quality_score": 65.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o3-mini",
"description": "o3-mini",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 13070.0,
"tokens_per_second": 173.3
},
"intelligence": {
"quality_score": 63.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o3-mini",
"description": "o3-mini",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 13520.0,
"tokens_per_second": 201.2
},
"intelligence": {
"quality_score": 63.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)-(fp8)",
"description": "Qwen3 235B (Reasoning) (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 51.1
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)-base",
"description": "Qwen3 235B (Reasoning) Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 600.0,
"tokens_per_second": 25.4
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)",
"description": "Qwen3 235B (Reasoning)",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 97.5
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)-(fp8)",
"description": "Qwen3 235B (Reasoning) (FP8)",
"provider": "Deepinfra (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 630.0,
"tokens_per_second": 20.4
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)-(fp8)",
"description": "Qwen3 235B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 830.0,
"tokens_per_second": 20.8
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)-(fp8)",
"description": "Qwen3 235B (Reasoning) (FP8)",
"provider": "Together.ai (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 26.7
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)-(fp8)",
"description": "Qwen3 235B (Reasoning) (FP8)",
"provider": "kluster.ai (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.61,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 770.0,
"tokens_per_second": 34.0
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b-(reasoning)",
"description": "Qwen3 235B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.63,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1180.0,
"tokens_per_second": 70.2
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o1",
"description": "o1",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 26.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 25860.0,
"tokens_per_second": 124.1
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o1",
"description": "o1",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 26.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 30340.0,
"tokens_per_second": 110.4
},
"intelligence": {
"quality_score": 62.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-nemotron-ultra-reasoning-base",
"description": "Llama Nemotron Ultra Reasoning Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 640.0,
"tokens_per_second": 41.8
},
"intelligence": {
"quality_score": 61.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-sonnet-thinking",
"description": "Claude 4 Sonnet Thinking",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1880.0,
"tokens_per_second": 35.7
},
"intelligence": {
"quality_score": 61.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-flash-(april-'25)-(reasoning)-(ai_studio)",
"description": "Gemini 2.5 Flash (April '25) (Reasoning) (AI_Studio)",
"provider": "Google (AI_Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.99,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 7790.0,
"tokens_per_second": 380.1
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Lambda Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.95,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 38.9
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1030.0,
"tokens_per_second": 97.6
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.36,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 360.0,
"tokens_per_second": 227.2
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)-base",
"description": "DeepSeek R1 (Jan '25) Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 27.8
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)-fast",
"description": "DeepSeek R1 (Jan '25) Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 670.0,
"tokens_per_second": 83.9
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "CentML",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.99,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 460.0,
"tokens_per_second": 82.2
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.36,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 80.0
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)-(fast)",
"description": "DeepSeek R1 (Jan '25) (Fast)",
"provider": "Fireworks (Fast)",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 490.0,
"tokens_per_second": 239.7
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)-(turbo,-fp4)",
"description": "DeepSeek R1 (Jan '25) (Turbo, FP4)",
"provider": "Deepinfra (Turbo, FP4)",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 181.1
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 115.9
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "FriendliAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 88.0
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)-turbo",
"description": "DeepSeek R1 (Jan '25) Turbo",
"provider": "Novita Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 780.0,
"tokens_per_second": 31.0
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 830.0,
"tokens_per_second": 32.1
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 5.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 2120.0,
"tokens_per_second": 199.3
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 570.0,
"tokens_per_second": 101.1
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-(jan-'25)",
"description": "DeepSeek R1 (Jan '25)",
"provider": "kluster.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 770.0,
"tokens_per_second": 38.3
},
"intelligence": {
"quality_score": 60.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)-(fp8)",
"description": "Qwen3 32B (Reasoning) (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 460.0,
"tokens_per_second": 52.0
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)",
"description": "Qwen3 32B (Reasoning)",
"provider": "Cerebras",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 2341.3
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)-base",
"description": "Qwen3 32B (Reasoning) Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 600.0,
"tokens_per_second": 45.9
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)-(fp8)",
"description": "Qwen3 32B (Reasoning) (FP8)",
"provider": "Deepinfra (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 42.8
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)-(fp8)",
"description": "Qwen3 32B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 930.0,
"tokens_per_second": 39.6
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)",
"description": "Qwen3 32B (Reasoning)",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 334.5
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b-(reasoning)",
"description": "Qwen3 32B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.63,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1120.0,
"tokens_per_second": 62.9
},
"intelligence": {
"quality_score": 59.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1150.0,
"tokens_per_second": 108.8
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b-base",
"description": "QwQ-32B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.23,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 57.1
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "CentML",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.65,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 350.0,
"tokens_per_second": 91.6
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 178.9
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.16,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 47.7
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.18,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 670.0,
"tokens_per_second": 36.2
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.32,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 401.8
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.63,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 418.4
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b",
"description": "QwQ-32B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 97.5
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-opus",
"description": "Claude 4 Opus",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 3510.0,
"tokens_per_second": 18.2
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-opus-vertex",
"description": "Claude 4 Opus Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1700.0,
"tokens_per_second": 91.7
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-opus",
"description": "Claude 4 Opus",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 2540.0,
"tokens_per_second": 54.5
},
"intelligence": {
"quality_score": 58.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.7-sonnet-thinking",
"description": "Claude 3.7 Sonnet Thinking",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1580.0,
"tokens_per_second": 55.4
},
"intelligence": {
"quality_score": 57.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.7-sonnet-thinking",
"description": "Claude 3.7 Sonnet Thinking",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1730.0,
"tokens_per_second": 88.3
},
"intelligence": {
"quality_score": 57.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-14b-(reasoning)-base",
"description": "Qwen3 14B (Reasoning) Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 71.3
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-14b-(reasoning)-(fp8)",
"description": "Qwen3 14B (Reasoning) (FP8)",
"provider": "Deepinfra (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 74.1
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-14b-(reasoning)-(fp8)",
"description": "Qwen3 14B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 39810.0,
"tokens_per_second": 56.2
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-14b-(reasoning)",
"description": "Qwen3 14B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.31,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1040.0,
"tokens_per_second": 63.5
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)-(fp8)",
"description": "Qwen3 30B A3B (Reasoning) (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 157.2
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)-fast",
"description": "Qwen3 30B A3B (Reasoning) Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 138.8
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)-base",
"description": "Qwen3 30B A3B (Reasoning) Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 119.4
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)",
"description": "Qwen3 30B A3B (Reasoning)",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 490.0,
"tokens_per_second": 166.7
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)-(fp8)",
"description": "Qwen3 30B A3B (Reasoning) (FP8)",
"provider": "Deepinfra (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 90.4
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)-(fp8)",
"description": "Qwen3 30B A3B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 610.0,
"tokens_per_second": 177.0
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b-(reasoning)",
"description": "Qwen3 30B A3B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1040.0,
"tokens_per_second": 92.2
},
"intelligence": {
"quality_score": 56.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o1-mini",
"description": "o1-mini",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 9890.0,
"tokens_per_second": 204.9
},
"intelligence": {
"quality_score": 54.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o1-mini",
"description": "o1-mini",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.93,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 9080.0,
"tokens_per_second": 269.9
},
"intelligence": {
"quality_score": 54.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-flash-(ai_studio)",
"description": "Gemini 2.5 Flash (AI_Studio)",
"provider": "Google (AI_Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 257.4
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-flash-(vertex)",
"description": "Gemini 2.5 Flash (Vertex)",
"provider": "Google (Vertex)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 251.9
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "DeepSeek",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.48,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 3520.0,
"tokens_per_second": 25.4
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Replicate",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 106.9
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1150.0,
"tokens_per_second": 34.5
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-fast",
"description": "DeepSeek V3 Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 670.0,
"tokens_per_second": 91.1
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Nebius",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 650.0,
"tokens_per_second": 18.2
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "CentML",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 490.0,
"tokens_per_second": 28.3
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 73.2
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 570.0,
"tokens_per_second": 266.8
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 33.0
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.57,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1010.0,
"tokens_per_second": 29.0
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1790.0,
"tokens_per_second": 167.4
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 98.3
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3",
"description": "DeepSeek V3",
"provider": "kluster.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 700.0,
"tokens_per_second": 26.8
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-sonnet",
"description": "Claude 4 Sonnet",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 2180.0,
"tokens_per_second": 54.7
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-sonnet-vertex",
"description": "Claude 4 Sonnet Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1610.0,
"tokens_per_second": 94.2
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-sonnet",
"description": "Claude 4 Sonnet",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1590.0,
"tokens_per_second": 73.1
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.1-mini",
"description": "GPT-4.1 mini",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.7,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 600.0,
"tokens_per_second": 71.1
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.1-mini",
"description": "GPT-4.1 mini",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.7,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 670.0,
"tokens_per_second": 162.8
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.1",
"description": "GPT-4.1",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 119.1
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.1",
"description": "GPT-4.1",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 780.0,
"tokens_per_second": 197.6
},
"intelligence": {
"quality_score": 53.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-qwen-32b",
"description": "DeepSeek R1 Distill Qwen 32B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.14,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 410.0,
"tokens_per_second": 50.0
},
"intelligence": {
"quality_score": 52.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-qwen-32b",
"description": "DeepSeek R1 Distill Qwen 32B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1170.0,
"tokens_per_second": 21.0
},
"intelligence": {
"quality_score": 52.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-8b-(reasoning)-(fp8)",
"description": "Qwen3 8B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 700.0,
"tokens_per_second": 53.6
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-8b-(reasoning)",
"description": "Qwen3 8B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.66,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1000.0,
"tokens_per_second": 94.2
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-3",
"description": "Grok 3",
"provider": "xAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 83.3
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-3-fast",
"description": "Grok 3 Fast",
"provider": "xAI Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 10.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 84.5
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "Lambda Labs (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.28,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 360.0,
"tokens_per_second": 153.8
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 330.0,
"tokens_per_second": 189.6
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick",
"description": "Llama 4 Maverick",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.42,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 281.2
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-vertex",
"description": "Llama 4 Maverick Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.55,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 330.0,
"tokens_per_second": 125.7
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "CentML (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 122.9
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "Microsoft Azure (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.61,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 54.6
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick",
"description": "Llama 4 Maverick",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.39,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 177.0
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "Deepinfra (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.27,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 280.0,
"tokens_per_second": 105.6
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(turbo,-fp8)",
"description": "Llama 4 Maverick (Turbo, FP8)",
"provider": "Deepinfra (Turbo, FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 660.9
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.34,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 570.0,
"tokens_per_second": 65.5
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick",
"description": "Llama 4 Maverick",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 110.0,
"tokens_per_second": 548.6
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick",
"description": "Llama 4 Maverick",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.92,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 798.8
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "Together.ai (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.41,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 200.0,
"tokens_per_second": 109.6
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-maverick-(fp8)",
"description": "Llama 4 Maverick (FP8)",
"provider": "kluster.ai (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 710.0,
"tokens_per_second": 155.3
},
"intelligence": {
"quality_score": 51.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(march-2025)",
"description": "GPT-4o (March 2025)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 183.4
},
"intelligence": {
"quality_score": 50.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.0-pro-experimental-(ai-studio)",
"description": "Gemini 2.0 Pro Experimental (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 17240.0,
"tokens_per_second": 68.5
},
"intelligence": {
"quality_score": 49.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-qwen-14b",
"description": "DeepSeek R1 Distill Qwen 14B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 800.0,
"tokens_per_second": 43.9
},
"intelligence": {
"quality_score": 49.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-qwen-14b",
"description": "DeepSeek R1 Distill Qwen 14B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 169.4
},
"intelligence": {
"quality_score": 49.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-medium-3",
"description": "Mistral Medium 3",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 84.6
},
"intelligence": {
"quality_score": 49.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-medium-3",
"description": "Mistral Medium 3",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 55.4
},
"intelligence": {
"quality_score": 49.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.5-flash-(ai_studio)",
"description": "Gemini 2.5 Flash (AI_Studio)",
"provider": "Google (AI_Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 360.0,
"tokens_per_second": 300.2
},
"intelligence": {
"quality_score": 49.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "Lambda Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 350.0,
"tokens_per_second": 65.2
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "Cerebras",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.94,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 2107.9
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b-base",
"description": "DeepSeek R1 Distill Llama 70B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 570.0,
"tokens_per_second": 58.4
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 330.0,
"tokens_per_second": 31.6
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 31.5
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.81,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 160.0,
"tokens_per_second": 411.0
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1740.0,
"tokens_per_second": 300.2
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-70b",
"description": "DeepSeek R1 Distill Llama 70B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 119.5
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.7-sonnet",
"description": "Claude 3.7 Sonnet",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1410.0,
"tokens_per_second": 49.5
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.7-sonnet-vertex",
"description": "Claude 3.7 Sonnet Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 890.0,
"tokens_per_second": 78.8
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.7-sonnet",
"description": "Claude 3.7 Sonnet",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1660.0,
"tokens_per_second": 78.5
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.0-flash-vertex",
"description": "Gemini 2.0 Flash Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 223.1
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.0-flash-(ai-studio)",
"description": "Gemini 2.0 Flash (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 223.9
},
"intelligence": {
"quality_score": 48.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-4b-(reasoning)-fast",
"description": "Qwen3 4B (Reasoning) Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 158.4
},
"intelligence": {
"quality_score": 47.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-4b-(reasoning)-(fp8)",
"description": "Qwen3 4B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 640.0,
"tokens_per_second": 51.4
},
"intelligence": {
"quality_score": 47.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-4b-(reasoning)",
"description": "Qwen3 4B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1030.0,
"tokens_per_second": 100.2
},
"intelligence": {
"quality_score": 47.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "reka-flash-3",
"description": "Reka Flash 3",
"provider": "Reka AI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 940.0,
"tokens_per_second": 56.2
},
"intelligence": {
"quality_score": 47.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-235b",
"description": "Qwen3 235B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.23,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1110.0,
"tokens_per_second": 71.1
},
"intelligence": {
"quality_score": 47.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.0-flash-(exp)-(ai-studio)",
"description": "Gemini 2.0 Flash (exp) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 250.0,
"tokens_per_second": 222.7
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)-(fp8)",
"description": "DeepSeek V3 (Dec '24) (FP8)",
"provider": "Hyperbolic (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1340.0,
"tokens_per_second": 29.0
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)",
"description": "DeepSeek V3 (Dec '24)",
"provider": "Nebius",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 660.0,
"tokens_per_second": 21.2
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)",
"description": "DeepSeek V3 (Dec '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 77.6
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)",
"description": "DeepSeek V3 (Dec '24)",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.31,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 850.0,
"tokens_per_second": 57.2
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)",
"description": "DeepSeek V3 (Dec '24)",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.51,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 410.0,
"tokens_per_second": 28.8
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)-turbo",
"description": "DeepSeek V3 (Dec '24) Turbo",
"provider": "Novita Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.63,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1140.0,
"tokens_per_second": 30.3
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)",
"description": "DeepSeek V3 (Dec '24)",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.89,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 810.0,
"tokens_per_second": 28.9
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-v3-(dec-'24)-(fp8)",
"description": "DeepSeek V3 (Dec '24) (FP8)",
"provider": "Together.ai (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 610.0,
"tokens_per_second": 93.0
},
"intelligence": {
"quality_score": 46.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-max",
"description": "Qwen2.5 Max",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1250.0,
"tokens_per_second": 42.7
},
"intelligence": {
"quality_score": 45.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-pro-(sep)-(vertex)",
"description": "Gemini 1.5 Pro (Sep) (Vertex)",
"provider": "Google (Vertex)",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 92.6
},
"intelligence": {
"quality_score": 45.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-pro-(sep)-(ai-studio)",
"description": "Gemini 1.5 Pro (Sep) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 93.0
},
"intelligence": {
"quality_score": 45.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-sonnet-(oct)",
"description": "Claude 3.5 Sonnet (Oct)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 920.0,
"tokens_per_second": 47.3
},
"intelligence": {
"quality_score": 44.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-sonnet-(oct)-vertex",
"description": "Claude 3.5 Sonnet (Oct) Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1300.0,
"tokens_per_second": 80.2
},
"intelligence": {
"quality_score": 44.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-sonnet-(oct)",
"description": "Claude 3.5 Sonnet (Oct)",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 810.0,
"tokens_per_second": 78.8
},
"intelligence": {
"quality_score": 44.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-32b",
"description": "Qwen3 32B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.23,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1030.0,
"tokens_per_second": 63.1
},
"intelligence": {
"quality_score": 44.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "sonar",
"description": "Sonar",
"provider": "Perplexity",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1810.0,
"tokens_per_second": 107.5
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Lambda Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.14,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 120.7
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout-(fp8)",
"description": "Llama 4 Scout (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 370.0,
"tokens_per_second": 128.0
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Cerebras",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.7,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 2428.4
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.29,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 160.4
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout-vertex",
"description": "Llama 4 Scout Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.36,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 130.1
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "CentML",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 250.0,
"tokens_per_second": 115.4
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.34,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 32.4
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 164.2
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.14,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 34.6
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 720.0,
"tokens_per_second": 55.5
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 170.0,
"tokens_per_second": 601.8
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.47,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1690.0,
"tokens_per_second": 786.2
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.28,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 190.0,
"tokens_per_second": 122.9
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-4-scout",
"description": "Llama 4 Scout",
"provider": "kluster.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.71,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 640.0,
"tokens_per_second": 98.6
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "sonar-pro",
"description": "Sonar Pro",
"provider": "Perplexity",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 2700.0,
"tokens_per_second": 80.0
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b-preview",
"description": "QwQ 32B-Preview",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.14,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 49.8
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwq-32b-preview",
"description": "QwQ 32B-Preview",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 98.0
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "nova-premier",
"description": "Nova Premier",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 5.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 830.0,
"tokens_per_second": 61.8
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-30b-a3b",
"description": "Qwen3 30B A3B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1100.0,
"tokens_per_second": 92.6
},
"intelligence": {
"quality_score": 43.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(nov-'24)",
"description": "GPT-4o (Nov '24)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 600.0,
"tokens_per_second": 110.4
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(nov-'24)",
"description": "GPT-4o (Nov '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1220.0,
"tokens_per_second": 122.0
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.0-flash-lite-(feb-'25)-(ai-studio)",
"description": "Gemini 2.0 Flash-Lite (Feb '25) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 250.0,
"tokens_per_second": 213.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-(fp8)",
"description": "Llama 3.3 70B (FP8)",
"provider": "Lambda Labs (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 350.0,
"tokens_per_second": 55.9
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Parasail",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 435.7
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-(fp8)",
"description": "Llama 3.3 70B (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.28,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 72.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Cerebras",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.94,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 190.0,
"tokens_per_second": 2428.8
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1110.0,
"tokens_per_second": 38.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.71,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 248.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-fast",
"description": "Llama 3.3 70B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 138.5
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-base",
"description": "Llama 3.3 70B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 630.0,
"tokens_per_second": 40.0
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-vertex",
"description": "Llama 3.3 70B Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.72,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 74.5
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-snowflake",
"description": "Llama 3.3 70B Snowflake",
"provider": "Snowflake Snowflake",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.58,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 570.0,
"tokens_per_second": 39.0
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "CentML",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 129.4
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.71,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 49.1
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 3580.0,
"tokens_per_second": 155.1
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-(turbo,-fp8)",
"description": "Llama 3.3 70B (Turbo, FP8)",
"provider": "Deepinfra (Turbo, FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 250.0,
"tokens_per_second": 31.1
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.27,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 610.0,
"tokens_per_second": 26.5
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "FriendliAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 182.4
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 680.0,
"tokens_per_second": 55.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.64,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 220.0,
"tokens_per_second": 444.0
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 443.7
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b-turbo",
"description": "Llama 3.3 70B Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 151.4
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.3-70b",
"description": "Llama 3.3 70B",
"provider": "kluster.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.7,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 18.1
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.1-nano",
"description": "GPT-4.1 nano",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 114.1
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.1-nano",
"description": "GPT-4.1 nano",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 980.0,
"tokens_per_second": 137.2
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-14b",
"description": "Qwen3 14B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.61,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1050.0,
"tokens_per_second": 64.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(may-'24)",
"description": "GPT-4o (May '24)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 640.0,
"tokens_per_second": 93.3
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(may-'24)",
"description": "GPT-4o (May '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 720.0,
"tokens_per_second": 120.9
},
"intelligence": {
"quality_score": 41.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b-(fp8)",
"description": "Llama 3.1 405B (FP8)",
"provider": "Lambda Labs (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 32.9
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Parasail",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1680.0,
"tokens_per_second": 170.8
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Replicate",
"metrics": {
"cost": {
"blended_cost_per_1m": 9.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 990.0,
"tokens_per_second": 19.2
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1220.0,
"tokens_per_second": 87.5
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b-standard",
"description": "Llama 3.1 405B Standard",
"provider": "Amazon Bedrock Standard",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1830.0,
"tokens_per_second": 30.0
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b-latency-optimized",
"description": "Llama 3.1 405B Latency Optimized",
"provider": "Amazon Bedrock Latency Optimized",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 89.3
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b-base",
"description": "Llama 3.1 405B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 700.0,
"tokens_per_second": 32.4
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b-vertex",
"description": "Llama 3.1 405B Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 29.9
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 8.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 31.3
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 660.0,
"tokens_per_second": 83.7
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 760.0,
"tokens_per_second": 26.7
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1640.0,
"tokens_per_second": 172.4
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b",
"description": "Llama 3.1 405B",
"provider": "Databricks",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1030.0,
"tokens_per_second": 35.6
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-405b-turbo",
"description": "Llama 3.1 405B Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 530.0,
"tokens_per_second": 95.0
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b",
"description": "Qwen2.5 72B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1250.0,
"tokens_per_second": 29.3
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b",
"description": "Qwen2.5 72B",
"provider": "Nebius",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 690.0,
"tokens_per_second": 23.1
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b-fast",
"description": "Qwen2.5 72B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 69.2
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b",
"description": "Qwen2.5 72B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 380.0,
"tokens_per_second": 72.4
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b",
"description": "Qwen2.5 72B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 550.0,
"tokens_per_second": 36.8
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b-turbo",
"description": "Qwen2.5 72B Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 113.9
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-72b",
"description": "Qwen2.5 72B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1220.0,
"tokens_per_second": 58.1
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "minimax-text-01",
"description": "MiniMax-Text-01",
"provider": "MiniMax",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.42,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 770.0,
"tokens_per_second": 33.9
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-4",
"description": "Phi-4",
"provider": "Nebius",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 115.7
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-4",
"description": "Phi-4",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.22,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 39.9
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-4",
"description": "Phi-4",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.09,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 550.0,
"tokens_per_second": 32.4
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-a",
"description": "Command A",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 220.0,
"tokens_per_second": 92.5
},
"intelligence": {
"quality_score": 40.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-flash-(sep)-(vertex)",
"description": "Gemini 1.5 Flash (Sep) (Vertex)",
"provider": "Google (Vertex)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 180.0,
"tokens_per_second": 184.7
},
"intelligence": {
"quality_score": 39.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-flash-(sep)-(ai-studio)",
"description": "Gemini 1.5 Flash (Sep) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 191.7
},
"intelligence": {
"quality_score": 39.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-large-2-(nov-'24)",
"description": "Mistral Large 2 (Nov '24)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 72.7
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-large-2-(nov-'24)",
"description": "Mistral Large 2 (Nov '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 36.4
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-1.7b-(reasoning)-(fp8)",
"description": "Qwen3 1.7B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 720.0,
"tokens_per_second": 48.2
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-1.7b-(reasoning)",
"description": "Qwen3 1.7B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 960.0,
"tokens_per_second": 130.2
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-3-27b",
"description": "Gemma 3 27B",
"provider": "Parasail",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.29,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 85.8
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-3-27b-(ai_studio)",
"description": "Gemma 3 27B (AI_Studio)",
"provider": "Google (AI_Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 48.2
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-3-27b",
"description": "Gemma 3 27B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 630.0,
"tokens_per_second": 33.8
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-beta",
"description": "Grok Beta",
"provider": "xAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 7.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 66.7
},
"intelligence": {
"quality_score": 38.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "pixtral-large",
"description": "Pixtral Large",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 380.0,
"tokens_per_second": 79.0
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-instruct-32b-fast",
"description": "Qwen2.5 Instruct 32B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 88.0
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-instruct-32b-base",
"description": "Qwen2.5 Instruct 32B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 550.0,
"tokens_per_second": 59.3
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-nemotron-70b-(fp8)",
"description": "Llama 3.1 Nemotron 70B (FP8)",
"provider": "Lambda Labs (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 50.6
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-nemotron-70b-base",
"description": "Llama 3.1 Nemotron 70B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 39.1
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-nemotron-70b-fast",
"description": "Llama 3.1 Nemotron 70B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 560.0,
"tokens_per_second": 74.2
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-nemotron-70b",
"description": "Llama 3.1 Nemotron 70B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 570.0,
"tokens_per_second": 28.3
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "nova-pro",
"description": "Nova Pro",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 167.2
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-8b",
"description": "Qwen3 8B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.31,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 960.0,
"tokens_per_second": 95.1
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-large-2-(jul-'24)",
"description": "Mistral Large 2 (Jul '24)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 37.2
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-large-2-(jul-'24)",
"description": "Mistral Large 2 (Jul '24)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 32.0
},
"intelligence": {
"quality_score": 37.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-coder-32b",
"description": "Qwen2.5 Coder 32B",
"provider": "Lambda Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.09,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 43.1
},
"intelligence": {
"quality_score": 36.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-coder-32b",
"description": "Qwen2.5 Coder 32B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1130.0,
"tokens_per_second": 54.0
},
"intelligence": {
"quality_score": 36.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-coder-32b",
"description": "Qwen2.5 Coder 32B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.08,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 50.6
},
"intelligence": {
"quality_score": 36.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-coder-32b",
"description": "Qwen2.5 Coder 32B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 74.7
},
"intelligence": {
"quality_score": 36.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-mini",
"description": "GPT-4o mini",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 550.0,
"tokens_per_second": 73.5
},
"intelligence": {
"quality_score": 36.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-mini",
"description": "GPT-4o mini",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 870.0,
"tokens_per_second": 144.7
},
"intelligence": {
"quality_score": 36.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-(fp8)",
"description": "Llama 3.1 70B (FP8)",
"provider": "Lambda Labs (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.17,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 210.0,
"tokens_per_second": 50.3
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b",
"description": "Llama 3.1 70B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 940.0,
"tokens_per_second": 175.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-standard",
"description": "Llama 3.1 70B Standard",
"provider": "Amazon Bedrock Standard",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.72,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 31.6
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-latency-optimized",
"description": "Llama 3.1 70B Latency Optimized",
"provider": "Amazon Bedrock Latency Optimized",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 143.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-base",
"description": "Llama 3.1 70B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 650.0,
"tokens_per_second": 33.1
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-fast",
"description": "Llama 3.1 70B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 139.5
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-vertex",
"description": "Llama 3.1 70B Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.72,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 280.0,
"tokens_per_second": 72.9
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b",
"description": "Llama 3.1 70B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 54.1
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b",
"description": "Llama 3.1 70B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 380.0,
"tokens_per_second": 172.8
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-(turbo,-fp8)",
"description": "Llama 3.1 70B (Turbo, FP8)",
"provider": "Deepinfra (Turbo, FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.14,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 250.0,
"tokens_per_second": 38.9
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b",
"description": "Llama 3.1 70B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.27,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 21.9
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b",
"description": "Llama 3.1 70B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1330.0,
"tokens_per_second": 47.5
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b-turbo",
"description": "Llama 3.1 70B Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 350.0,
"tokens_per_second": 155.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-70b",
"description": "Llama 3.1 70B",
"provider": "Simplismart",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 125.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-3.1",
"description": "Mistral Small 3.1",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 123.5
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-3.1",
"description": "Mistral Small 3.1",
"provider": "Parasail",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 63.7
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-3.1-vertex",
"description": "Mistral Small 3.1 Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 200.0,
"tokens_per_second": 209.6
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-3",
"description": "Mistral Small 3",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 152.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-3",
"description": "Mistral Small 3",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.07,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 66.9
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-3",
"description": "Mistral Small 3",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 200.0,
"tokens_per_second": 97.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-4b",
"description": "Qwen3 4B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 980.0,
"tokens_per_second": 102.6
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-opus",
"description": "Claude 3 Opus",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1200.0,
"tokens_per_second": 26.4
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-opus-vertex",
"description": "Claude 3 Opus Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 2690.0,
"tokens_per_second": 22.4
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-opus",
"description": "Claude 3 Opus",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1070.0,
"tokens_per_second": 27.6
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-haiku-standard",
"description": "Claude 3.5 Haiku Standard",
"provider": "Amazon Bedrock Standard",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 940.0,
"tokens_per_second": 57.6
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-haiku-latency-optimized",
"description": "Claude 3.5 Haiku Latency Optimized",
"provider": "Amazon Bedrock Latency Optimized",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 530.0,
"tokens_per_second": 97.0
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-haiku-vertex",
"description": "Claude 3.5 Haiku Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1710.0,
"tokens_per_second": 67.1
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-haiku",
"description": "Claude 3.5 Haiku",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 750.0,
"tokens_per_second": 66.2
},
"intelligence": {
"quality_score": 35.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "devstral",
"description": "Devstral",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 130.0
},
"intelligence": {
"quality_score": 34.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-llama-8b",
"description": "DeepSeek R1 Distill Llama 8B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 690.0,
"tokens_per_second": 52.8
},
"intelligence": {
"quality_score": 34.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-3-12b",
"description": "Gemma 3 12B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1010.0,
"tokens_per_second": 19.9
},
"intelligence": {
"quality_score": 34.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-pro-(may)-(vertex)",
"description": "Gemini 1.5 Pro (May) (Vertex)",
"provider": "Google (Vertex)",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 330.0,
"tokens_per_second": 66.7
},
"intelligence": {
"quality_score": 34.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-pro-(may)-(ai-studio)",
"description": "Gemini 1.5 Pro (May) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 420.0,
"tokens_per_second": 67.7
},
"intelligence": {
"quality_score": 34.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-turbo",
"description": "Qwen2.5 Turbo",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.09,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1010.0,
"tokens_per_second": 108.2
},
"intelligence": {
"quality_score": 34.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-90b-(vision)",
"description": "Llama 3.2 90B (Vision)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.72,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 60.7
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-90b-(vision)-vertex",
"description": "Llama 3.2 90B (Vision) Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 200.0,
"tokens_per_second": 32.8
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-90b-(vision)",
"description": "Llama 3.2 90B (Vision)",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.36,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 19.6
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-90b-(vision)-turbo",
"description": "Llama 3.2 90B (Vision) Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 30.0
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2-72b",
"description": "Qwen2 72B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 350.0,
"tokens_per_second": 42.0
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2-72b",
"description": "Qwen2 72B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1350.0,
"tokens_per_second": 31.0
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "nova-lite",
"description": "Nova Lite",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 274.3
},
"intelligence": {
"quality_score": 33.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-flash-8b-ai-studio",
"description": "Gemini 1.5 Flash-8B AI Studio",
"provider": "Google AI Studio",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.07,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 200.0,
"tokens_per_second": 280.5
},
"intelligence": {
"quality_score": 31.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "jamba-1.5-large",
"description": "Jamba 1.5 Large",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 680.0,
"tokens_per_second": 50.6
},
"intelligence": {
"quality_score": 29.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "jamba-1.6-large",
"description": "Jamba 1.6 Large",
"provider": "AI21 Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 610.0,
"tokens_per_second": 49.7
},
"intelligence": {
"quality_score": 29.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-flash-(may)-(vertex)",
"description": "Gemini 1.5 Flash (May) (Vertex)",
"provider": "Google (Vertex)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 333.1
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-1.5-flash-(may)-(ai-studio)",
"description": "Gemini 1.5 Flash (May) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 210.0,
"tokens_per_second": 320.8
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "nova-micro",
"description": "Nova Micro",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 311.9
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "yi-large",
"description": "Yi-Large",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 66.0
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-sonnet",
"description": "Claude 3 Sonnet",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 850.0,
"tokens_per_second": 32.7
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-sonnet",
"description": "Claude 3 Sonnet",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 620.0,
"tokens_per_second": 60.4
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "codestral-(jan-'25)",
"description": "Codestral (Jan '25)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 118.5
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "codestral-(jan-'25)-vertex",
"description": "Codestral (Jan '25) Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 150.0,
"tokens_per_second": 150.0
},
"intelligence": {
"quality_score": 28.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Replicate",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.18,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 49.6
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1570.0,
"tokens_per_second": 15.6
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.86,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 47.3
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.9,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 740.0,
"tokens_per_second": 18.6
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.33,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 43.0
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.57,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1070.0,
"tokens_per_second": 15.8
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b",
"description": "Llama 3 70B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.64,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 250.0,
"tokens_per_second": 332.8
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b-(reference,-fp16)",
"description": "Llama 3 70B (Reference, FP16)",
"provider": "Together.ai (Reference, FP16)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 690.0,
"tokens_per_second": 130.7
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-70b-(turbo,-fp8)",
"description": "Llama 3 70B (Turbo, FP8)",
"provider": "Together.ai (Turbo, FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 136.3
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-(sep-'24)",
"description": "Mistral Small (Sep '24)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 76.7
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-4-multimodal",
"description": "Phi-4 Multimodal",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 350.0,
"tokens_per_second": 17.4
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-coder-7b--fast",
"description": "Qwen2.5 Coder 7B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 226.6
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen2.5-coder-7b--base",
"description": "Qwen2.5 Coder 7B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.01,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 192.2
},
"intelligence": {
"quality_score": 27.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-large-(feb-'24)",
"description": "Mistral Large (Feb '24)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 590.0,
"tokens_per_second": 28.9
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-large-(feb-'24)",
"description": "Mistral Large (Feb '24)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 43.6
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x22b",
"description": "Mixtral 8x22B",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 3.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 59.6
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x22b-base",
"description": "Mixtral 8x22B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 550.0,
"tokens_per_second": 82.2
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x22b-fast",
"description": "Mixtral 8x22B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.05,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 107.4
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x22b",
"description": "Mixtral 8x22B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 92.5
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-4-mini",
"description": "Phi-4 Mini",
"provider": "CentML",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 217.4
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-4-mini",
"description": "Phi-4 Mini",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 56.8
},
"intelligence": {
"quality_score": 26.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-1.7b",
"description": "Qwen3 1.7B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1000.0,
"tokens_per_second": 133.8
},
"intelligence": {
"quality_score": 25.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "phi-3-medium-14b",
"description": "Phi-3 Medium 14B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 400.0,
"tokens_per_second": 52.9
},
"intelligence": {
"quality_score": 25.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-3-4b",
"description": "Gemma 3 4B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.03,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 90.0
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-2.1",
"description": "Claude 2.1",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 12.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1580.0,
"tokens_per_second": 29.1
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-2.1",
"description": "Claude 2.1",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 12.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 890.0,
"tokens_per_second": 13.9
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Lambda Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.03,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 134.7
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Parasail",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 1135.1
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Cerebras",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 2155.9
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 800.0,
"tokens_per_second": 431.1
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.22,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 280.0,
"tokens_per_second": 241.8
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b-fast",
"description": "Llama 3.1 8B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 182.0
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b-base",
"description": "Llama 3.1 8B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.03,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 560.0,
"tokens_per_second": 58.7
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b-vertex",
"description": "Llama 3.1 8B Vertex",
"provider": "Google Vertex",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 170.0,
"tokens_per_second": 119.1
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 215.0
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Fireworks",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 220.0,
"tokens_per_second": 295.0
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 500.0,
"tokens_per_second": 53.9
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "FriendliAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 280.0,
"tokens_per_second": 428.0
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.03,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 670.0,
"tokens_per_second": 73.8
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 170.0,
"tokens_per_second": 853.9
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 1183.5
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b-turbo",
"description": "Llama 3.1 8B Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.18,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 166.1
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "Simplismart",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 170.0,
"tokens_per_second": 458.3
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.1-8b",
"description": "Llama 3.1 8B",
"provider": "kluster.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.18,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 62.1
},
"intelligence": {
"quality_score": 24.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "pixtral-12b",
"description": "Pixtral 12B",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 83.8
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "pixtral-12b",
"description": "Pixtral 12B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 580.0,
"tokens_per_second": 76.4
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-0.6b-(reasoning)-(fp8)",
"description": "Qwen3 0.6B (Reasoning) (FP8)",
"provider": "Novita (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 630.0,
"tokens_per_second": 47.1
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-0.6b-(reasoning)",
"description": "Qwen3 0.6B (Reasoning)",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.4,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 920.0,
"tokens_per_second": 211.0
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-(feb-'24)",
"description": "Mistral Small (Feb '24)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 280.0,
"tokens_per_second": 144.1
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-small-(feb-'24)",
"description": "Mistral Small (Feb '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 87.3
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-medium",
"description": "Mistral Medium",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.09,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 450.0,
"tokens_per_second": 70.4
},
"intelligence": {
"quality_score": 23.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "ministral-8b",
"description": "Ministral 8B",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 130.0
},
"intelligence": {
"quality_score": 22.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-9b-fast",
"description": "Gemma 2 9B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 166.2
},
"intelligence": {
"quality_score": 22.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-9b-base",
"description": "Gemma 2 9B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.03,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 151.9
},
"intelligence": {
"quality_score": 22.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-9b",
"description": "Gemma 2 9B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 710.0,
"tokens_per_second": 18.0
},
"intelligence": {
"quality_score": 22.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-9b",
"description": "Gemma 2 9B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 709.1
},
"intelligence": {
"quality_score": 22.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "lfm-40b",
"description": "LFM 40B",
"provider": "Lambda Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 170.0,
"tokens_per_second": 160.8
},
"intelligence": {
"quality_score": 22.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r+",
"description": "Command-R+",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 47.7
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r+",
"description": "Command-R+",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 280.0,
"tokens_per_second": 47.9
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Replicate",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 390.0,
"tokens_per_second": 79.4
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 104.0
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 340.0,
"tokens_per_second": 73.8
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 113.4
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 740.0,
"tokens_per_second": 58.2
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Groq",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 1348.7
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3-8b",
"description": "Llama 3 8B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 164.7
},
"intelligence": {
"quality_score": 21.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "codestral-(may-'24)",
"description": "Codestral (May '24)",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 113.3
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "aya-expanse-32b",
"description": "Aya Expanse 32B",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 180.0,
"tokens_per_second": 121.2
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r+-(apr-'24)",
"description": "Command-R+ (Apr '24)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 490.0,
"tokens_per_second": 47.2
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r+-(apr-'24)",
"description": "Command-R+ (Apr '24)",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 56.1
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r+-(apr-'24)",
"description": "Command-R+ (Apr '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 650.0,
"tokens_per_second": 28.7
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "ministral-3b",
"description": "Ministral 3B",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 225.2
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-nemo",
"description": "Mistral NeMo",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 139.1
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-nemo-(fp8)",
"description": "Mistral NeMo (FP8)",
"provider": "Parasail (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.11,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 520.0,
"tokens_per_second": 99.8
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-nemo-fast",
"description": "Mistral NeMo Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 490.0,
"tokens_per_second": 154.7
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-nemo-base",
"description": "Mistral NeMo Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 580.0,
"tokens_per_second": 44.6
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-nemo",
"description": "Mistral NeMo",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.03,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 52.4
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b-(fp8)",
"description": "Llama 3.2 3B (FP8)",
"provider": "Lambda Labs (FP8)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.02,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 224.2
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b",
"description": "Llama 3.2 3B",
"provider": "Hyperbolic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1030.0,
"tokens_per_second": 98.4
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b",
"description": "Llama 3.2 3B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 460.0,
"tokens_per_second": 72.1
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b-base",
"description": "Llama 3.2 3B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.01,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 490.0,
"tokens_per_second": 122.1
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b",
"description": "Llama 3.2 3B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.01,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 180.0,
"tokens_per_second": 123.5
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b",
"description": "Llama 3.2 3B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 610.0,
"tokens_per_second": 88.8
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b",
"description": "Llama 3.2 3B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 210.0,
"tokens_per_second": 1586.3
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-3b-turbo",
"description": "Llama 3.2 3B Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 162.3
},
"intelligence": {
"quality_score": 20.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-r1-distill-qwen-1.5b",
"description": "DeepSeek R1 Distill Qwen 1.5B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.18,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 240.0,
"tokens_per_second": 384.6
},
"intelligence": {
"quality_score": 19.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "jamba-1.5-mini",
"description": "Jamba 1.5 Mini",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 82.7
},
"intelligence": {
"quality_score": 18.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "jamba-1.6-mini",
"description": "Jamba 1.6 Mini",
"provider": "AI21 Labs",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 360.0,
"tokens_per_second": 179.7
},
"intelligence": {
"quality_score": 18.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x7b",
"description": "Mixtral 8x7B",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.7,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 80.6
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x7b",
"description": "Mixtral 8x7B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.51,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 83.9
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x7b-fast",
"description": "Mixtral 8x7B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.23,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 125.9
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x7b-base",
"description": "Mixtral 8x7B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 550.0,
"tokens_per_second": 117.6
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x7b",
"description": "Mixtral 8x7B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 93.0
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mixtral-8x7b",
"description": "Mixtral 8x7B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.6,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 57.3
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen3-0.6b",
"description": "Qwen3 0.6B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.19,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 980.0,
"tokens_per_second": 215.9
},
"intelligence": {
"quality_score": 17.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "aya-expanse-8b",
"description": "Aya Expanse 8B",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 140.0,
"tokens_per_second": 165.2
},
"intelligence": {
"quality_score": 16.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r",
"description": "Command-R",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 330.0,
"tokens_per_second": 109.0
},
"intelligence": {
"quality_score": 15.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r",
"description": "Command-R",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 200.0,
"tokens_per_second": 58.2
},
"intelligence": {
"quality_score": 15.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r-(mar-'24)",
"description": "Command-R (Mar '24)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 330.0,
"tokens_per_second": 108.5
},
"intelligence": {
"quality_score": 15.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r-(mar-'24)",
"description": "Command-R (Mar '24)",
"provider": "Cohere",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 160.0,
"tokens_per_second": 160.8
},
"intelligence": {
"quality_score": 15.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "command-r-(mar-'24)",
"description": "Command-R (Mar '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 45.9
},
"intelligence": {
"quality_score": 15.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "codestral-mamba",
"description": "Codestral-Mamba",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 94.3
},
"intelligence": {
"quality_score": 14.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-7b",
"description": "Mistral 7B",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 290.0,
"tokens_per_second": 104.3
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-7b",
"description": "Mistral 7B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.16,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 93.4
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-7b",
"description": "Mistral 7B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 210.0,
"tokens_per_second": 94.8
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-7b",
"description": "Mistral 7B",
"provider": "Novita",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.04,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 800.0,
"tokens_per_second": 126.2
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-7b",
"description": "Mistral 7B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 150.0,
"tokens_per_second": 180.6
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-1b",
"description": "Llama 3.2 1B",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 119.1
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-1b-base",
"description": "Llama 3.2 1B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.01,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 96.9
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-1b",
"description": "Llama 3.2 1B",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.01,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 270.0,
"tokens_per_second": 115.6
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-1b",
"description": "Llama 3.2 1B",
"provider": "SambaNova",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.05,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 190.0,
"tokens_per_second": 2222.9
},
"intelligence": {
"quality_score": 10.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-2-chat-7b",
"description": "Llama 2 Chat 7B",
"provider": "Replicate",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 430.0,
"tokens_per_second": 132.6
},
"intelligence": {
"quality_score": 8.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-11b-(vision)",
"description": "Llama 3.2 11B (Vision)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.16,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 440.0,
"tokens_per_second": 189.3
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-11b-(vision)",
"description": "Llama 3.2 11B (Vision)",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.05,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 300.0,
"tokens_per_second": 55.1
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "llama-3.2-11b-(vision)-turbo",
"description": "Llama 3.2 11B (Vision) Turbo",
"provider": "Together.ai Turbo",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.18,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 190.0,
"tokens_per_second": 122.2
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-4-opus-thinking",
"description": "Claude 4 Opus Thinking",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 30.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 3160.0,
"tokens_per_second": 15.5
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "mistral-saba",
"description": "Mistral Saba",
"provider": "Mistral",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.3,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 91.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "sonar-reasoning",
"description": "Sonar Reasoning",
"provider": "Perplexity",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1760.0,
"tokens_per_second": 73.6
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-3-mini-reasoning-(low)",
"description": "Grok 3 mini Reasoning (low)",
"provider": "xAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 310.0,
"tokens_per_second": 120.2
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "grok-3-mini-reasoning-(low)-fast",
"description": "Grok 3 mini Reasoning (low) Fast",
"provider": "xAI Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.45,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 320.0,
"tokens_per_second": 203.6
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "reka-flash",
"description": "Reka Flash",
"provider": "Reka AI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 890.0,
"tokens_per_second": 46.2
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "reka-core",
"description": "Reka Core",
"provider": "Reka AI",
"metrics": {
"cost": {
"blended_cost_per_1m": 2.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 860.0,
"tokens_per_second": 27.3
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "reka-flash-(feb-'24)",
"description": "Reka Flash (Feb '24)",
"provider": "Reka AI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.35,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 840.0,
"tokens_per_second": 45.4
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "reka-edge",
"description": "Reka Edge",
"provider": "Reka AI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.1,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 820.0,
"tokens_per_second": 85.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o1-preview",
"description": "o1-preview",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 26.25,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 19470.0,
"tokens_per_second": 162.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "o1-preview",
"description": "o1-preview",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 28.88,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 21280.0,
"tokens_per_second": 157.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(aug-'24)",
"description": "GPT-4o (Aug '24)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 710.0,
"tokens_per_second": 82.3
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4o-(aug-'24)",
"description": "GPT-4o (Aug '24)",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 4.38,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 700.0,
"tokens_per_second": 125.6
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4-turbo",
"description": "GPT-4 Turbo",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 15.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 840.0,
"tokens_per_second": 46.8
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4-turbo",
"description": "GPT-4 Turbo",
"provider": "Microsoft Azure",
"metrics": {
"cost": {
"blended_cost_per_1m": 15.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1640.0,
"tokens_per_second": 40.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-3.5-turbo",
"description": "GPT-3.5 Turbo",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 380.0,
"tokens_per_second": 140.4
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4.5-(preview)",
"description": "GPT-4.5 (Preview)",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 93.75,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1110.0,
"tokens_per_second": 71.9
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gpt-4",
"description": "GPT-4",
"provider": "OpenAI",
"metrics": {
"cost": {
"blended_cost_per_1m": 37.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 780.0,
"tokens_per_second": 25.5
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemini-2.0-flash-lite-(preview)-(ai-studio)",
"description": "Gemini 2.0 Flash-Lite (Preview) (AI Studio)",
"provider": "Google (AI Studio)",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.13,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 260.0,
"tokens_per_second": 215.5
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-27b-fast",
"description": "Gemma 2 27B Fast",
"provider": "Nebius Fast",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.26,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 510.0,
"tokens_per_second": 88.1
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-27b-base",
"description": "Gemma 2 27B Base",
"provider": "Nebius Base",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 580.0,
"tokens_per_second": 52.0
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "gemma-2-27b",
"description": "Gemma 2 27B",
"provider": "Together.ai",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.8,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 230.0,
"tokens_per_second": 90.2
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-sonnet-(june)",
"description": "Claude 3.5 Sonnet (June)",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 930.0,
"tokens_per_second": 47.0
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3.5-sonnet-(june)",
"description": "Claude 3.5 Sonnet (June)",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 6.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 940.0,
"tokens_per_second": 79.9
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-haiku",
"description": "Claude 3 Haiku",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1000.0,
"tokens_per_second": 107.0
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-3-haiku",
"description": "Claude 3 Haiku",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.5,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 470.0,
"tokens_per_second": 144.9
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-instant",
"description": "Claude Instant",
"provider": "Amazon Bedrock",
"metrics": {
"cost": {
"blended_cost_per_1m": 1.2,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 540.0,
"tokens_per_second": 62.9
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "claude-2.0",
"description": "Claude 2.0",
"provider": "Anthropic",
"metrics": {
"cost": {
"blended_cost_per_1m": 12.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 890.0,
"tokens_per_second": 30.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-coder-v2-lite-fast,-fp8",
"description": "DeepSeek Coder V2 Lite Fast, FP8",
"provider": "Nebius Fast, FP8",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.12,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 480.0,
"tokens_per_second": 98.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "deepseek-coder-v2-lite-base,-fp8",
"description": "DeepSeek Coder V2 Lite Base, FP8",
"provider": "Nebius Base, FP8",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.06,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 530.0,
"tokens_per_second": 111.9
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "openchat-3.5",
"description": "OpenChat 3.5",
"provider": "Deepinfra",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.05,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 380.0,
"tokens_per_second": 54.2
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "solar-mini",
"description": "Solar Mini",
"provider": "Upstage",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.15,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1010.0,
"tokens_per_second": 82.8
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
},
{
"name": "qwen1.5-chat-110b",
"description": "Qwen1.5 Chat 110B",
"provider": "Alibaba Cloud",
"metrics": {
"cost": {
"blended_cost_per_1m": 0.0,
"input_cost_per_1m": null,
"output_cost_per_1m": null
},
"speed": {
"time_to_first_token_ms": 1630.0,
"tokens_per_second": 23.7
},
"intelligence": {
"quality_score": 0.0,
"mmlu_score": null,
"gsm8k_score": null,
"bbh_score": null
}
}
}
]