Files
amd-strix-halo-vllm-toolboxes/docs/results.json
T

480 строки
12 KiB
JSON

{
"runs": [
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 383.3285005130725
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 169.75141153501525
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 168.9724830093454
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 430.8537270233502
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 77.93077982357597
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 246.02994547299596
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 227.90361622402403
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 200.1426829468909
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 132.2599488751683
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 596.5891209659404
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 294.9472545848014
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 348.0799308087054
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 593.7843034224891
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 112.4781801162827
},
{
"model": "zai-org/GLM-4.7-Flash",
"model_clean": "zai-org/GLM-4.7-Flash",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": null,
"name_params_b": null,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 346.0061963818796
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 320.69249844623016
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 274.7000183491961
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "Triton",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 167.00232766189475
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 452.7925628873698
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 292.4074663029466
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP1",
"gpu_config": "single",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 190.86242019407229
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 440.75738846836555
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP1",
"gpu_config": "single",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 76.44313314138553
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 229.9835374194385
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 203.38751203489863
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP1",
"gpu_config": "single",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 1,
"tps_mean": 135.3839809398758
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 8.0,
"name_params_b": 8.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 649.02791593759
},
{
"model": "google/gemma-3-12b-it",
"model_clean": "google/gemma-3-12b-it",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 12.0,
"name_params_b": 12.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 403.09652364564084
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"model_clean": "Qwen/Qwen3-14B-AWQ",
"env": "TP2",
"gpu_config": "dual",
"quant": "AWQ",
"params_b": 14.0,
"name_params_b": 14.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 371.4058491591393
},
{
"model": "openai/gpt-oss-20b",
"model_clean": "openai/gpt-oss-20b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 20.0,
"name_params_b": 20.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 597.5787987620997
},
{
"model": "openai/gpt-oss-120b",
"model_clean": "openai/gpt-oss-120b",
"env": "TP2",
"gpu_config": "dual",
"quant": "BF16",
"params_b": 120.0,
"name_params_b": 120.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 111.8113988472388
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 315.906032423287
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 30.0,
"name_params_b": 30.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 292.0384117325289
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"env": "TP2",
"gpu_config": "dual",
"quant": "GPTQ",
"params_b": 80.0,
"name_params_b": 80.0,
"backend": "ROCm",
"error": false,
"test": "Throughput",
"tp": 2,
"tps_mean": 165.5348293928834
}
]
}