updated benchmarks, fix start-vllm
This commit is contained in:
+21
-9
@@ -469,6 +469,10 @@
|
||||
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||
<input type="checkbox" id="toggleTP2" checked> TP2
|
||||
</label>
|
||||
<label
|
||||
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||
<input type="checkbox" id="toggleTP2Eth" checked> TP2 (Eth)
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<!-- Attention Group -->
|
||||
@@ -544,6 +548,7 @@
|
||||
activeTab: "Throughput",
|
||||
showTP1: true,
|
||||
showTP2: true,
|
||||
showTP2Eth: true,
|
||||
showTriton: true,
|
||||
showRocm: false
|
||||
};
|
||||
@@ -615,6 +620,7 @@
|
||||
// Toggles
|
||||
$('toggleTP1').addEventListener('change', e => { state.showTP1 = e.target.checked; render(); });
|
||||
$('toggleTP2').addEventListener('change', e => { state.showTP2 = e.target.checked; render(); });
|
||||
$('toggleTP2Eth').addEventListener('change', e => { state.showTP2Eth = e.target.checked; render(); });
|
||||
$('toggleTriton').addEventListener('change', e => { state.showTriton = e.target.checked; render(); });
|
||||
$('toggleRocm').addEventListener('change', e => { state.showRocm = e.target.checked; render(); });
|
||||
}
|
||||
@@ -636,13 +642,17 @@
|
||||
params: run.params_b || run.name_params_b,
|
||||
results: {
|
||||
1: { triton: null, rocm: null },
|
||||
2: { triton: null, rocm: null }
|
||||
2: { triton: null, rocm: null },
|
||||
"2_eth": { triton: null, rocm: null }
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const m = testGroups[testName].models[modelName];
|
||||
const tp = run.tp || 1;
|
||||
let tp = run.tp || 1;
|
||||
if (tp === 2 && run.network === "Ethernet") {
|
||||
tp = "2_eth";
|
||||
}
|
||||
|
||||
if (!m.results[tp]) m.results[tp] = { triton: null, rocm: null };
|
||||
|
||||
@@ -749,8 +759,12 @@
|
||||
if (state.showRocm) cols.push({ id: "tp1_rocm", label: "TP1 ROCm" });
|
||||
}
|
||||
if (state.showTP2) {
|
||||
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 Triton" });
|
||||
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 ROCm" });
|
||||
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 RoCE Triton" });
|
||||
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 RoCE ROCm" });
|
||||
}
|
||||
if (state.showTP2Eth) {
|
||||
if (state.showTriton) cols.push({ id: "tp2_eth_triton", label: "TP2 Eth Triton" });
|
||||
if (state.showRocm) cols.push({ id: "tp2_eth_rocm", label: "TP2 Eth ROCm" });
|
||||
}
|
||||
|
||||
// Thead
|
||||
@@ -790,11 +804,7 @@
|
||||
|
||||
// Data Cells
|
||||
cols.forEach(c => {
|
||||
let val = null;
|
||||
if (c.id === "tp1_triton") val = m.results[1]?.triton;
|
||||
if (c.id === "tp1_rocm") val = m.results[1]?.rocm;
|
||||
if (c.id === "tp2_triton") val = m.results[2]?.triton;
|
||||
if (c.id === "tp2_rocm") val = m.results[2]?.rocm;
|
||||
let val = getVal(m, c.id);
|
||||
|
||||
const bg = c.id.startsWith("tp2") ? 'style="background:#fbfdff;"' : "";
|
||||
rowHtml += `<td class="col-data" ${bg}>${formatVal(val, unit)}</td>`;
|
||||
@@ -823,6 +833,8 @@
|
||||
if (colId === "tp1_rocm") return m.results[1]?.rocm;
|
||||
if (colId === "tp2_triton") return m.results[2]?.triton;
|
||||
if (colId === "tp2_rocm") return m.results[2]?.rocm;
|
||||
if (colId === "tp2_eth_triton") return m.results["2_eth"]?.triton;
|
||||
if (colId === "tp2_eth_rocm") return m.results["2_eth"]?.rocm;
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +66,11 @@ def parse_logs():
|
||||
if not tp_match: continue
|
||||
tp = int(tp_match.group(1))
|
||||
|
||||
# Network
|
||||
network = "RoCE"
|
||||
if "_eth" in rest:
|
||||
network = "Ethernet"
|
||||
|
||||
# Model Name
|
||||
if "_" in model_part:
|
||||
model_display = model_part.replace("_", "/", 1)
|
||||
@@ -87,6 +92,7 @@ def parse_logs():
|
||||
"params_b": params_b,
|
||||
"name_params_b": params_b,
|
||||
"backend": backend_name, # "Triton" or "ROCm"
|
||||
"network": network,
|
||||
"error": False
|
||||
}
|
||||
|
||||
|
||||
+505
-171
@@ -1,131 +1,5 @@
|
||||
{
|
||||
"runs": [
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 383.3285005130725
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
"model_clean": "google/gemma-3-12b-it",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 169.75141153501525
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "AWQ",
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 168.9724830093454
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
"model_clean": "openai/gpt-oss-20b",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 430.8537270233502
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
"model_clean": "openai/gpt-oss-120b",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 77.93077982357597
|
||||
},
|
||||
{
|
||||
"model": "zai-org/GLM-4.7-Flash",
|
||||
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 246.02994547299596
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 227.90361622402403
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 200.1426829468909
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "Triton",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 132.2599488751683
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
@@ -135,10 +9,11 @@
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 596.5891209659404
|
||||
"tps_mean": 613.9321196754427
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
@@ -149,10 +24,11 @@
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 294.9472545848014
|
||||
"tps_mean": 291.5155379231269
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
@@ -163,10 +39,11 @@
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 348.0799308087054
|
||||
"tps_mean": 280.05330212131406
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
@@ -177,10 +54,11 @@
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 593.7843034224891
|
||||
"tps_mean": 602.6345456319963
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
@@ -191,10 +69,11 @@
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 112.4781801162827
|
||||
"tps_mean": 85.2809331488931
|
||||
},
|
||||
{
|
||||
"model": "zai-org/GLM-4.7-Flash",
|
||||
@@ -205,10 +84,11 @@
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 346.0061963818796
|
||||
"tps_mean": 300.773560320048
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
@@ -219,10 +99,11 @@
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 320.69249844623016
|
||||
"tps_mean": 321.88057686801585
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
@@ -233,10 +114,11 @@
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 274.7000183491961
|
||||
"tps_mean": 274.46004720922855
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
@@ -247,10 +129,176 @@
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 167.00232766189475
|
||||
"tps_mean": 182.16229690959702
|
||||
},
|
||||
{
|
||||
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "Triton",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 107.62460878889469
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 590.0492703672895
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
"model_clean": "google/gemma-3-12b-it",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 285.8275921888489
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "AWQ",
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 346.2003835540928
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
"model_clean": "openai/gpt-oss-20b",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 607.216674264294
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
"model_clean": "openai/gpt-oss-120b",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 122.62029501860121
|
||||
},
|
||||
{
|
||||
"model": "zai-org/GLM-4.7-Flash",
|
||||
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 366.77052981888835
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 320.0197833991106
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 279.7240042842149
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 196.2262690032198
|
||||
},
|
||||
{
|
||||
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 98.85048345093716
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
@@ -260,25 +308,12 @@
|
||||
"quant": "BF16",
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 452.7925628873698
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
"model_clean": "google/gemma-3-12b-it",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "ROCm",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 292.4074663029466
|
||||
"tps_mean": 369.23212230245684
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
@@ -288,11 +323,12 @@
|
||||
"quant": "AWQ",
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 190.86242019407229
|
||||
"tps_mean": 159.8857312165796
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
@@ -302,11 +338,12 @@
|
||||
"quant": "BF16",
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 440.75738846836555
|
||||
"tps_mean": 411.03557062490586
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
@@ -316,11 +353,27 @@
|
||||
"quant": "BF16",
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 76.44313314138553
|
||||
"tps_mean": 75.0407548829671
|
||||
},
|
||||
{
|
||||
"model": "zai-org/GLM-4.7-Flash",
|
||||
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 239.57478116575834
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
@@ -330,11 +383,12 @@
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 229.9835374194385
|
||||
"tps_mean": 213.74630950782364
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
@@ -344,11 +398,12 @@
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 203.38751203489863
|
||||
"tps_mean": 186.03115379827653
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
@@ -358,11 +413,27 @@
|
||||
"quant": "GPTQ",
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "ROCm",
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 135.3839809398758
|
||||
"tps_mean": 125.65027253668944
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
"model_clean": "google/gemma-3-12b-it",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "Triton",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 159.95620436815713
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
@@ -373,10 +444,11 @@
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 649.02791593759
|
||||
"tps_mean": 682.4360360233941
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
@@ -387,10 +459,11 @@
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 403.09652364564084
|
||||
"tps_mean": 398.09474781142933
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
@@ -401,10 +474,11 @@
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 371.4058491591393
|
||||
"tps_mean": 295.31575874126105
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
@@ -415,10 +489,11 @@
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 597.5787987620997
|
||||
"tps_mean": 490.93757442090305
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
@@ -429,10 +504,11 @@
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 111.8113988472388
|
||||
"tps_mean": 86.0910643999307
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
@@ -443,10 +519,11 @@
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 315.906032423287
|
||||
"tps_mean": 321.6166453306162
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
@@ -457,10 +534,11 @@
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 292.0384117325289
|
||||
"tps_mean": 283.6309502128471
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
@@ -471,10 +549,266 @@
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 165.5348293928834
|
||||
"tps_mean": 182.9186467257061
|
||||
},
|
||||
{
|
||||
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": null,
|
||||
"name_params_b": null,
|
||||
"backend": "ROCm",
|
||||
"network": "Ethernet",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 98.67941666807306
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 650.1471716939323
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
"model_clean": "google/gemma-3-12b-it",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 397.9710386242193
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "AWQ",
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 367.5704596781314
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
"model_clean": "openai/gpt-oss-20b",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 601.2567608739705
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
"model_clean": "openai/gpt-oss-120b",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "BF16",
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 118.34229353876268
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 333.147212194374
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 295.0301359026215
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"env": "TP2",
|
||||
"gpu_config": "dual",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 2,
|
||||
"tps_mean": 193.87438091607942
|
||||
},
|
||||
{
|
||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 8.0,
|
||||
"name_params_b": 8.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 433.5736973626181
|
||||
},
|
||||
{
|
||||
"model": "Qwen/Qwen3-14B-AWQ",
|
||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "AWQ",
|
||||
"params_b": 14.0,
|
||||
"name_params_b": 14.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 180.43566315423652
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-20b",
|
||||
"model_clean": "openai/gpt-oss-20b",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 20.0,
|
||||
"name_params_b": 20.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 405.0974544317216
|
||||
},
|
||||
{
|
||||
"model": "openai/gpt-oss-120b",
|
||||
"model_clean": "openai/gpt-oss-120b",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 120.0,
|
||||
"name_params_b": 120.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 74.75385852312364
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 214.65152188564062
|
||||
},
|
||||
{
|
||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 30.0,
|
||||
"name_params_b": 30.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 188.17083503449163
|
||||
},
|
||||
{
|
||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "GPTQ",
|
||||
"params_b": 80.0,
|
||||
"name_params_b": 80.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 128.30078036872973
|
||||
},
|
||||
{
|
||||
"model": "google/gemma-3-12b-it",
|
||||
"model_clean": "google/gemma-3-12b-it",
|
||||
"env": "TP1",
|
||||
"gpu_config": "single",
|
||||
"quant": "BF16",
|
||||
"params_b": 12.0,
|
||||
"name_params_b": 12.0,
|
||||
"backend": "ROCm",
|
||||
"network": "RoCE",
|
||||
"error": false,
|
||||
"test": "Throughput",
|
||||
"tp": 1,
|
||||
"tps_mean": 267.99881204205957
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user