{ "runs": [ { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 613.9321196754427 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 291.5155379231269 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 280.05330212131406 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 602.6345456319963 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 85.2809331488931 }, { "model": "zai-org/GLM-4.7-Flash", "model_clean": "zai-org/GLM-4.7-Flash", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 300.773560320048 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 321.88057686801585 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 274.46004720922855 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 182.16229690959702 }, { "model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 107.62460878889469 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 590.0492703672895 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 285.8275921888489 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 346.2003835540928 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 607.216674264294 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 122.62029501860121 }, { "model": "zai-org/GLM-4.7-Flash", "model_clean": "zai-org/GLM-4.7-Flash", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 366.77052981888835 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 320.0197833991106 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 279.7240042842149 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 196.2262690032198 }, { "model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 98.85048345093716 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 369.23212230245684 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP1", "gpu_config": "single", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 159.8857312165796 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 411.03557062490586 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 75.0407548829671 }, { "model": "zai-org/GLM-4.7-Flash", "model_clean": "zai-org/GLM-4.7-Flash", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 239.57478116575834 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 213.74630950782364 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 186.03115379827653 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 125.65027253668944 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "Triton", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 159.95620436815713 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 302.43330149293365 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 291.6914862601304 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 256.91287782898553 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 178.05541683872298 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 260.10671430704866 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 513.235753981134 }, { "model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 99.38780646163637 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 109.82173992256857 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 521.2677328949931 }, { "model": "zai-org/GLM-4.7-Flash", "model_clean": "zai-org/GLM-4.7-Flash", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "Triton", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 320.9476287228403 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 682.4360360233941 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 398.09474781142933 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 295.31575874126105 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 490.93757442090305 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 86.0910643999307 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 321.6166453306162 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 283.6309502128471 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 182.9186467257061 }, { "model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "ROCm", "network": "Ethernet", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 98.67941666807306 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 650.1471716939323 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 397.9710386242193 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 367.5704596781314 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 601.2567608739705 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 118.34229353876268 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 333.147212194374 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 295.0301359026215 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 193.87438091607942 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 433.5736973626181 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP1", "gpu_config": "single", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 180.43566315423652 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 405.0974544317216 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 74.75385852312364 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 214.65152188564062 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 188.17083503449163 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 128.30078036872973 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "ROCm", "network": "RoCE", "tag": "", "error": false, "test": "Throughput", "tp": 1, "tps_mean": 267.99881204205957 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 311.59947769256274 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 299.24102990342374 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 267.6893183038276 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP2", "gpu_config": "dual", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 174.78952518165474 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 342.7783704164132 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 550.8856190566602 }, { "model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": null, "name_params_b": null, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 98.10548978313048 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 111.32822174858649 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP2", "gpu_config": "dual", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "ROCm", "network": "Ethernet", "tag": "usb", "error": false, "test": "Throughput", "tp": 2, "tps_mean": 519.7896706376232 } ] }