{ "runs": [ { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP1", "gpu_config": "single", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 112.69232830266365 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 278.99494393048457 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 162.71078485804028 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 80.0, "name_params_b": 80.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 112.62418795067208 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 313.85817605876395 }, { "model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit", "model_clean": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit", "env": "TP1", "gpu_config": "single", "quant": "GPTQ", "params_b": 30.0, "name_params_b": 30.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 271.7264154071495 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "Triton", "error": false, "test": "Throughput", "tps_mean": 109.73523843987172 }, { "model": "Qwen/Qwen3-14B-AWQ", "model_clean": "Qwen/Qwen3-14B-AWQ", "env": "TP1", "gpu_config": "single", "quant": "AWQ", "params_b": 14.0, "name_params_b": 14.0, "backend": "ROCm", "error": false, "test": "Throughput", "tps_mean": 118.62544339374007 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 8.0, "name_params_b": 8.0, "backend": "ROCm", "error": false, "test": "Throughput", "tps_mean": 320.4458308584372 }, { "model": "google/gemma-3-12b-it", "model_clean": "google/gemma-3-12b-it", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 12.0, "name_params_b": 12.0, "backend": "ROCm", "error": false, "test": "Throughput", "tps_mean": 275.34859975563967 }, { "model": "openai/gpt-oss-20b", "model_clean": "openai/gpt-oss-20b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 20.0, "name_params_b": 20.0, "backend": "ROCm", "error": false, "test": "Throughput", "tps_mean": 318.9683005103833 }, { "model": "openai/gpt-oss-120b", "model_clean": "openai/gpt-oss-120b", "env": "TP1", "gpu_config": "single", "quant": "BF16", "params_b": 120.0, "name_params_b": 120.0, "backend": "ROCm", "error": false, "test": "Throughput", "tps_mean": 114.91339037290285 } ] }