Files
amd-strix-halo-vllm-toolboxes/docs/results.json
T
2025-12-20 11:49:03 +00:00

160 wiersze
4.1 KiB
JSON

{
"runs": [
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 112.69232830266365
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 278.99494393048457
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 162.71078485804028
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 112.62418795067208
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 313.85817605876395
},
{
"model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
"model_clean": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 271.7264154071495
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tps_mean": 109.73523843987172
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tps_mean": 118.62544339374007
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tps_mean": 320.4458308584372
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tps_mean": 275.34859975563967
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tps_mean": 318.9683005103833
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tps_mean": 114.91339037290285
}
]
}