480 satır
12 KiB
JSON
480 satır
12 KiB
JSON
{
|
|
"runs": [
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 383.3285005130725
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 169.75141153501525
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 168.9724830093454
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 430.8537270233502
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 77.93077982357597
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 246.02994547299596
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 227.90361622402403
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 200.1426829468909
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 132.2599488751683
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 596.5891209659404
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 294.9472545848014
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 348.0799308087054
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 593.7843034224891
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 112.4781801162827
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": null,
|
|
"name_params_b": null,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 346.0061963818796
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 320.69249844623016
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 274.7000183491961
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "Triton",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 167.00232766189475
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 452.7925628873698
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 292.4074663029466
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 190.86242019407229
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 440.75738846836555
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 76.44313314138553
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 229.9835374194385
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 203.38751203489863
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP1",
|
|
"gpu_config": "single",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 1,
|
|
"tps_mean": 135.3839809398758
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 8.0,
|
|
"name_params_b": 8.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 649.02791593759
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"model_clean": "google/gemma-3-12b-it",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 12.0,
|
|
"name_params_b": 12.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 403.09652364564084
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "AWQ",
|
|
"params_b": 14.0,
|
|
"name_params_b": 14.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 371.4058491591393
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"model_clean": "openai/gpt-oss-20b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 20.0,
|
|
"name_params_b": 20.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 597.5787987620997
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"model_clean": "openai/gpt-oss-120b",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "BF16",
|
|
"params_b": 120.0,
|
|
"name_params_b": 120.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 111.8113988472388
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 315.906032423287
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 30.0,
|
|
"name_params_b": 30.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 292.0384117325289
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"env": "TP2",
|
|
"gpu_config": "dual",
|
|
"quant": "GPTQ",
|
|
"params_b": 80.0,
|
|
"name_params_b": 80.0,
|
|
"backend": "ROCm",
|
|
"error": false,
|
|
"test": "Throughput",
|
|
"tp": 2,
|
|
"tps_mean": 165.5348293928834
|
|
}
|
|
]
|
|
} |