480 Zeilen
12 KiB
JSON
480 Zeilen
12 KiB
JSON
{
|
|
"runs": [
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 238.92735772921657
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 138.28298716107304
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 83.68710295019198
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 185.3668705592303
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 70.98811455236003
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 133.7890668441555
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 117.75790760733192
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 86.97761646092924
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 71.52372211759099
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 385.96636603292677
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 244.97127724647316
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 328.49937859629955
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 276.48063742763867
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 117.11822522607781
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 200.2551557455423
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 170.28562676904787
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 130.40765123003763
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 98.44491320703105
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 241.1478963701438
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 180.0947121704009
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 86.05445772580717
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 185.01465145154089
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 67.92779311425312
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 121.19378360261106
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 89.76589360109976
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 70.75103526522766
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 381.7346838742502
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 263.9412467646666
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 321.2769089381971
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 287.95959208958226
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 120.90976614975652
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 171.7665512553609
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 131.2917933257513
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 100.8896816189698
|
|
}
|
|
]
|
|
} |