160 строки
4.1 KiB
JSON
160 строки
4.1 KiB
JSON
{
|
|
"runs": [
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 112.69232830266365
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 278.99494393048457
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 162.71078485804028
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 112.62418795067208
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 313.85817605876395
|
|
},
|
|
{
|
|
"model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
|
|
"model_clean": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 271.7264154071495
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 109.73523843987172
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 118.62544339374007
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 320.4458308584372
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 275.34859975563967
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 318.9683005103833
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tps_mean": 114.91339037290285
|
|
}
|
|
]
|
|
} |