Files
amd-strix-halo-vllm-toolboxes/docs/results.json
T

480 строки
12 KiB
JSON

{
"runs": [
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 238.92735772921657
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 138.28298716107304
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 83.68710295019198
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 185.3668705592303
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 70.98811455236003
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 133.7890668441555
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 117.75790760733192
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 86.97761646092924
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 71.52372211759099
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 385.96636603292677
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 244.97127724647316
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 328.49937859629955
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 276.48063742763867
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 117.11822522607781
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 200.2551557455423
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 170.28562676904787
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 130.40765123003763
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 98.44491320703105
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 241.1478963701438
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 180.0947121704009
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 86.05445772580717
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 185.01465145154089
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 67.92779311425312
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 121.19378360261106
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 89.76589360109976
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 70.75103526522766
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 381.7346838742502
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 263.9412467646666
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 321.2769089381971
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 287.95959208958226
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 120.90976614975652
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 171.7665512553609
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 131.2917933257513
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 100.8896816189698
}
]
}