updated benchmarks, fix start-vllm

This commit is contained in:
Donato Capitella
2026-02-23 19:39:19 +00:00
szülő e0fadf426b
commit e726d406fa
59 fájl változott, egészen pontosan 886 új sor hozzáadva és 309 régi sor törölve
+21 -9
Fájl megtekintése
@@ -469,6 +469,10 @@
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
<input type="checkbox" id="toggleTP2" checked> TP2
</label>
<label
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
<input type="checkbox" id="toggleTP2Eth" checked> TP2 (Eth)
</label>
</div>
<!-- Attention Group -->
@@ -544,6 +548,7 @@
activeTab: "Throughput",
showTP1: true,
showTP2: true,
showTP2Eth: true,
showTriton: true,
showRocm: false
};
@@ -615,6 +620,7 @@
// Toggles
$('toggleTP1').addEventListener('change', e => { state.showTP1 = e.target.checked; render(); });
$('toggleTP2').addEventListener('change', e => { state.showTP2 = e.target.checked; render(); });
$('toggleTP2Eth').addEventListener('change', e => { state.showTP2Eth = e.target.checked; render(); });
$('toggleTriton').addEventListener('change', e => { state.showTriton = e.target.checked; render(); });
$('toggleRocm').addEventListener('change', e => { state.showRocm = e.target.checked; render(); });
}
@@ -636,13 +642,17 @@
params: run.params_b || run.name_params_b,
results: {
1: { triton: null, rocm: null },
2: { triton: null, rocm: null }
2: { triton: null, rocm: null },
"2_eth": { triton: null, rocm: null }
}
};
}
const m = testGroups[testName].models[modelName];
const tp = run.tp || 1;
let tp = run.tp || 1;
if (tp === 2 && run.network === "Ethernet") {
tp = "2_eth";
}
if (!m.results[tp]) m.results[tp] = { triton: null, rocm: null };
@@ -749,8 +759,12 @@
if (state.showRocm) cols.push({ id: "tp1_rocm", label: "TP1 ROCm" });
}
if (state.showTP2) {
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 Triton" });
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 ROCm" });
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 RoCE Triton" });
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 RoCE ROCm" });
}
if (state.showTP2Eth) {
if (state.showTriton) cols.push({ id: "tp2_eth_triton", label: "TP2 Eth Triton" });
if (state.showRocm) cols.push({ id: "tp2_eth_rocm", label: "TP2 Eth ROCm" });
}
// Thead
@@ -790,11 +804,7 @@
// Data Cells
cols.forEach(c => {
let val = null;
if (c.id === "tp1_triton") val = m.results[1]?.triton;
if (c.id === "tp1_rocm") val = m.results[1]?.rocm;
if (c.id === "tp2_triton") val = m.results[2]?.triton;
if (c.id === "tp2_rocm") val = m.results[2]?.rocm;
let val = getVal(m, c.id);
const bg = c.id.startsWith("tp2") ? 'style="background:#fbfdff;"' : "";
rowHtml += `<td class="col-data" ${bg}>${formatVal(val, unit)}</td>`;
@@ -823,6 +833,8 @@
if (colId === "tp1_rocm") return m.results[1]?.rocm;
if (colId === "tp2_triton") return m.results[2]?.triton;
if (colId === "tp2_rocm") return m.results[2]?.rocm;
if (colId === "tp2_eth_triton") return m.results["2_eth"]?.triton;
if (colId === "tp2_eth_rocm") return m.results["2_eth"]?.rocm;
return null;
}
+6
Fájl megtekintése
@@ -66,6 +66,11 @@ def parse_logs():
if not tp_match: continue
tp = int(tp_match.group(1))
# Network
network = "RoCE"
if "_eth" in rest:
network = "Ethernet"
# Model Name
if "_" in model_part:
model_display = model_part.replace("_", "/", 1)
@@ -87,6 +92,7 @@ def parse_logs():
"params_b": params_b,
"name_params_b": params_b,
"backend": backend_name, # "Triton" or "ROCm"
"network": network,
"error": False
}
+505 -171
Fájl megtekintése
@@ -1,131 +1,5 @@
{
"runs": [
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 383.3285005130725
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 169.75141153501525
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 168.9724830093454
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 430.8537270233502
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 77.93077982357597
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 246.02994547299596
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 227.90361622402403
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 200.1426829468909
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 132.2599488751683
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -135,10 +9,11 @@
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 596.5891209659404
"tps_mean": 613.9321196754427
},
{
"model": "google/gemma-3-12b-it",
@@ -149,10 +24,11 @@
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 294.9472545848014
"tps_mean": 291.5155379231269
},
{
"model": "Qwen/Qwen3-14B-AWQ",
@@ -163,10 +39,11 @@
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 348.0799308087054
"tps_mean": 280.05330212131406
},
{
"model": "openai/gpt-oss-20b",
@@ -177,10 +54,11 @@
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 593.7843034224891
"tps_mean": 602.6345456319963
},
{
"model": "openai/gpt-oss-120b",
@@ -191,10 +69,11 @@
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 112.4781801162827
"tps_mean": 85.2809331488931
},
{
"model": "zai-org/GLM-4.7-Flash",
@@ -205,10 +84,11 @@
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 346.0061963818796
"tps_mean": 300.773560320048
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
@@ -219,10 +99,11 @@
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 320.69249844623016
"tps_mean": 321.88057686801585
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
@@ -233,10 +114,11 @@
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 274.7000183491961
"tps_mean": 274.46004720922855
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
@@ -247,10 +129,176 @@
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 167.00232766189475
"tps_mean": 182.16229690959702
},
{
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 107.62460878889469
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 590.0492703672895
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 285.8275921888489
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 346.2003835540928
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 607.216674264294
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 122.62029501860121
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 366.77052981888835
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 320.0197833991106
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 279.7240042842149
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 196.2262690032198
},
{
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 98.85048345093716
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -260,25 +308,12 @@
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 452.7925628873698
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 292.4074663029466
"tps_mean": 369.23212230245684
},
{
"model": "Qwen/Qwen3-14B-AWQ",
@@ -288,11 +323,12 @@
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 190.86242019407229
"tps_mean": 159.8857312165796
},
{
"model": "openai/gpt-oss-20b",
@@ -302,11 +338,12 @@
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 440.75738846836555
"tps_mean": 411.03557062490586
},
{
"model": "openai/gpt-oss-120b",
@@ -316,11 +353,27 @@
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 76.44313314138553
"tps_mean": 75.0407548829671
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 239.57478116575834
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
@@ -330,11 +383,12 @@
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 229.9835374194385
"tps_mean": 213.74630950782364
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
@@ -344,11 +398,12 @@
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 203.38751203489863
"tps_mean": 186.03115379827653
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
@@ -358,11 +413,27 @@
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 135.3839809398758
"tps_mean": 125.65027253668944
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 159.95620436815713
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -373,10 +444,11 @@
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 649.02791593759
"tps_mean": 682.4360360233941
},
{
"model": "google/gemma-3-12b-it",
@@ -387,10 +459,11 @@
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 403.09652364564084
"tps_mean": 398.09474781142933
},
{
"model": "Qwen/Qwen3-14B-AWQ",
@@ -401,10 +474,11 @@
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 371.4058491591393
"tps_mean": 295.31575874126105
},
{
"model": "openai/gpt-oss-20b",
@@ -415,10 +489,11 @@
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 597.5787987620997
"tps_mean": 490.93757442090305
},
{
"model": "openai/gpt-oss-120b",
@@ -429,10 +504,11 @@
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 111.8113988472388
"tps_mean": 86.0910643999307
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
@@ -443,10 +519,11 @@
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 315.906032423287
"tps_mean": 321.6166453306162
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
@@ -457,10 +534,11 @@
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 292.0384117325289
"tps_mean": 283.6309502128471
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
@@ -471,10 +549,266 @@
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 165.5348293928834
"tps_mean": 182.9186467257061
},
{
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "ROCm",
"network": "Ethernet",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 98.67941666807306
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 650.1471716939323
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 397.9710386242193
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 367.5704596781314
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 601.2567608739705
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 118.34229353876268
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 333.147212194374
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 295.0301359026215
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 193.87438091607942
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 433.5736973626181
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 180.43566315423652
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 405.0974544317216
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 74.75385852312364
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 214.65152188564062
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 188.17083503449163
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 128.30078036872973
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"network": "RoCE",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 267.99881204205957
}
]
}