1172 rader
30 KiB
JSON
1172 rader
30 KiB
JSON
{
|
|
"runs": [
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 613.9321196754427
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 291.5155379231269
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 280.05330212131406
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 602.6345456319963
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 85.2809331488931
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 300.773560320048
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 321.88057686801585
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 274.46004720922855
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 182.16229690959702
|
|
},
|
|
{
|
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 107.62460878889469
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 590.0492703672895
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 285.8275921888489
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 346.2003835540928
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 607.216674264294
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 122.62029501860121
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 366.77052981888835
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 320.0197833991106
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 279.7240042842149
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 196.2262690032198
|
|
},
|
|
{
|
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 98.85048345093716
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 369.23212230245684
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 159.8857312165796
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 411.03557062490586
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 75.0407548829671
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 239.57478116575834
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 213.74630950782364
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 186.03115379827653
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 125.65027253668944
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 159.95620436815713
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 302.43330149293365
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 291.6914862601304
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 256.91287782898553
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 178.05541683872298
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 260.10671430704866
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 513.235753981134
|
|
},
|
|
{
|
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 99.38780646163637
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 109.82173992256857
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 521.2677328949931
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 320.9476287228403
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 682.4360360233941
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 398.09474781142933
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 295.31575874126105
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 490.93757442090305
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 86.0910643999307
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 321.6166453306162
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 283.6309502128471
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 182.9186467257061
|
|
},
|
|
{
|
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 98.67941666807306
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 650.1471716939323
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 397.9710386242193
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 367.5704596781314
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 601.2567608739705
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 118.34229353876268
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 333.147212194374
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 295.0301359026215
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 193.87438091607942
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 433.5736973626181
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 180.43566315423652
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 405.0974544317216
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 74.75385852312364
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 214.65152188564062
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 188.17083503449163
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 128.30078036872973
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"network": "RoCE",
|
|
"tag": "",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 267.99881204205957
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 311.59947769256274
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 299.24102990342374
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 267.6893183038276
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 174.78952518165474
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 342.7783704164132
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 550.8856190566602
|
|
},
|
|
{
|
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 98.10548978313048
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 111.32822174858649
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"network": "Ethernet",
|
|
"tag": "usb",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 519.7896706376232
|
|
}
|
|
]
|
|
} |