diff --git a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json index 2be46b6..39c9888 100644 --- a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 229.17851571500069, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.4363410753753066, - "tokens_per_second": 328.49937859629955 + "elapsed_time": 421.75657659699937, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.4742071875054738, + "tokens_per_second": 348.0799308087054 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json index be3df95..964d4fc 100644 --- a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json +++ b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 899.6009820629988, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.11116039443473733, - "tokens_per_second": 83.68710295019198 + "elapsed_time": 868.8101008250001, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.2301999019234296, + "tokens_per_second": 168.9724830093454 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json index f267aff..dcc2f2e 100644 --- a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 442.1101265470061, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.2261879880043141, - "tokens_per_second": 170.28562676904787 + "elapsed_time": 457.7749735690013, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.4368958801760569, + "tokens_per_second": 320.69249844623016 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json index 70d2836..f96f2c5 100644 --- a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 639.3201232059982, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.15641616206061223, - "tokens_per_second": 117.75790760733192 + "elapsed_time": 644.1538858940003, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.3104848148551126, + "tokens_per_second": 227.90361622402403 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json index 24ead88..f371702 100644 --- a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 577.3050836349939, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.1732186374842766, - "tokens_per_second": 130.40765123003763 + "elapsed_time": 534.4193308840004, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.3742379596733028, + "tokens_per_second": 274.7000183491961 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json index 667e938..3036af0 100644 --- a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 865.5675225800005, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.115531136960788, - "tokens_per_second": 86.97761646092924 + "elapsed_time": 733.5017090729998, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.2726646680247824, + "tokens_per_second": 200.1426829468909 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json index 9360b46..832db98 100644 --- a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 764.7424081899953, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.13076298493329488, - "tokens_per_second": 98.44491320703105 + "elapsed_time": 879.0596038709991, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.22751585799106944, + "tokens_per_second": 167.00232766189475 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json index 22eb3c4..d0f0fcd 100644 --- a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json +++ b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1052.5878375879984, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.09500394782173208, - "tokens_per_second": 71.52372211759099 + "elapsed_time": 1109.9732099440007, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.18018452896722634, + "tokens_per_second": 132.2599488751683 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json b/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json index 7ada1ea..0e851a2 100644 --- a/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 310.4935437940003, - "num_requests": 100, - "total_num_tokens": 76062, - "requests_per_second": 0.3220678883627477, - "tokens_per_second": 244.97127724647316 + "elapsed_time": 504.69023761399876, + "num_requests": 200, + "total_num_tokens": 148857, + "requests_per_second": 0.39628268013570256, + "tokens_per_second": 294.9472545848014 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json b/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json index 1cd45b9..9a618f6 100644 --- a/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json +++ b/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 550.0459713920009, - "num_requests": 100, - "total_num_tokens": 76062, - "requests_per_second": 0.18180298593393945, - "tokens_per_second": 138.28298716107304 + "elapsed_time": 876.911706677, + "num_requests": 200, + "total_num_tokens": 148857, + "requests_per_second": 0.22807313265081958, + "tokens_per_second": 169.75141153501525 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json index fa194ba..b3657d4 100644 --- a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 193.03236384499905, - "num_requests": 100, - "total_num_tokens": 74504, - "requests_per_second": 0.5180478444552329, - "tokens_per_second": 385.96636603292677 + "elapsed_time": 244.51837097500174, + "num_requests": 200, + "total_num_tokens": 145877, + "requests_per_second": 0.8179344529513773, + "tokens_per_second": 596.5891209659404 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json index 89251dc..c439d54 100644 --- a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json +++ b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 311.826995067001, - "num_requests": 100, - "total_num_tokens": 74504, - "requests_per_second": 0.3206906444341466, - "tokens_per_second": 238.92735772921657 + "elapsed_time": 380.55349342600005, + "num_requests": 200, + "total_num_tokens": 145877, + "requests_per_second": 0.5255502930730307, + "tokens_per_second": 383.3285005130725 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json index 4fb2c8a..5e00991 100644 --- a/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 639.9174838529943, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.15627014814143225, - "tokens_per_second": 117.11822522607781 + "elapsed_time": 1307.2402118169994, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.15299406963775225, + "tokens_per_second": 112.4781801162827 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json index 868843d..82e552b 100644 --- a/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json +++ b/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1055.754198749999, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.09471901709545542, - "tokens_per_second": 70.98811455236003 + "elapsed_time": 1886.751298176, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.10600231211890418, + "tokens_per_second": 77.93077982357597 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json index 4d612e7..4ae2cb0 100644 --- a/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 271.0714236530039, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.36890646255655896, - "tokens_per_second": 276.48063742763867 + "elapsed_time": 247.62527259899798, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.8076720033495051, + "tokens_per_second": 593.7843034224891 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json index 39de487..2220af1 100644 --- a/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json +++ b/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 404.31172934999995, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.24733390782594175, - "tokens_per_second": 185.3668705592303 + "elapsed_time": 341.2666312900001, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.5860520240258851, + "tokens_per_second": 430.8537270233502 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json index d4dab53..4ca4bb0 100644 --- a/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 374.65702054300345, - "num_requests": 100, - "total_num_tokens": 75027, - "requests_per_second": 0.2669107864442698, - "tokens_per_second": 200.2551557455423 + "elapsed_time": 422.7612150579989, + "num_requests": 200, + "total_num_tokens": 146278, + "requests_per_second": 0.47308029420949094, + "tokens_per_second": 346.0061963818796 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json index 837981e..de26852 100644 --- a/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json +++ b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 560.7857336160014, - "num_requests": 100, - "total_num_tokens": 75027, - "requests_per_second": 0.17832122681721982, - "tokens_per_second": 133.7890668441555 + "elapsed_time": 594.5536415039987, + "num_requests": 200, + "total_num_tokens": 146278, + "requests_per_second": 0.33638680522429343, + "tokens_per_second": 246.02994547299596 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json index 11dd77c..1a4060e 100644 --- a/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 234.33056626700272, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.42674757114723666, - "tokens_per_second": 321.2769089381971 + "elapsed_time": 395.26841144900027, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.5059852854591319, + "tokens_per_second": 371.4058491591393 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json index d967516..84a4499 100644 --- a/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 874.8529941339984, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.11430491827828541, - "tokens_per_second": 86.05445772580717 + "elapsed_time": 769.1666062429999, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.260021688898978, + "tokens_per_second": 190.86242019407229 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json index 048c4b9..58e09ea 100644 --- a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 438.29837328500435, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.2281550790401287, - "tokens_per_second": 171.7665512553609 + "elapsed_time": 464.71097393700256, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.43037503139986644, + "tokens_per_second": 315.906032423287 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json index ec3251f..ca60feb 100644 --- a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 621.1952276929987, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.16097998751758127, - "tokens_per_second": 121.19378360261106 + "elapsed_time": 638.3282979609994, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.31331839844615444, + "tokens_per_second": 229.9835374194385 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json index b000cae..7b2b31b 100644 --- a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 573.4174093670008, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.17439303091685104, - "tokens_per_second": 131.2917933257513 + "elapsed_time": 502.6907218439992, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.3978589444944367, + "tokens_per_second": 292.0384117325289 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json index c8da1d6..d048712 100644 --- a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 838.6815635629973, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.11923476602390883, - "tokens_per_second": 89.76589360109976 + "elapsed_time": 721.7994779089986, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.2770852655357769, + "tokens_per_second": 203.38751203489863 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json index 4186c55..89f11e0 100644 --- a/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 746.2110970310023, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.13401033621434522, - "tokens_per_second": 100.8896816189698 + "elapsed_time": 886.8526372269989, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.2255166096425645, + "tokens_per_second": 165.5348293928834 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json index 097b399..f5d2546 100644 --- a/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1064.0833694909998, - "num_requests": 100, - "total_num_tokens": 75285, - "requests_per_second": 0.09397759881148657, - "tokens_per_second": 70.75103526522766 + "elapsed_time": 1084.3601952080007, + "num_requests": 200, + "total_num_tokens": 146805, + "requests_per_second": 0.18444055848217136, + "tokens_per_second": 135.3839809398758 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json index cee3264..0e86f1e 100644 --- a/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 288.17777036500047, - "num_requests": 100, - "total_num_tokens": 76062, - "requests_per_second": 0.3470080286669645, - "tokens_per_second": 263.9412467646666 + "elapsed_time": 369.2837602610016, + "num_requests": 200, + "total_num_tokens": 148857, + "requests_per_second": 0.5415889392445647, + "tokens_per_second": 403.09652364564084 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json index 7e88a40..e96b629 100644 --- a/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 422.3444380089968, - "num_requests": 100, - "total_num_tokens": 76062, - "requests_per_second": 0.23677356915463818, - "tokens_per_second": 180.0947121704009 + "elapsed_time": 509.0738683320001, + "num_requests": 200, + "total_num_tokens": 148857, + "requests_per_second": 0.39287029337276264, + "tokens_per_second": 292.4074663029466 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json index 435fa83..efbb6ad 100644 --- a/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 195.17220506099693, - "num_requests": 100, - "total_num_tokens": 74504, - "requests_per_second": 0.5123680391311207, - "tokens_per_second": 381.7346838742502 + "elapsed_time": 224.76228898300178, + "num_requests": 200, + "total_num_tokens": 145877, + "requests_per_second": 0.8898289873490544, + "tokens_per_second": 649.02791593759 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json index 7efa523..4a572c5 100644 --- a/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 308.955628978998, - "num_requests": 100, - "total_num_tokens": 74504, - "requests_per_second": 0.32367107319089417, - "tokens_per_second": 241.1478963701438 + "elapsed_time": 322.171811016, + "num_requests": 200, + "total_num_tokens": 145877, + "requests_per_second": 0.620786776376495, + "tokens_per_second": 452.7925628873698 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json index ffd2b2b..0041800 100644 --- a/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 619.8506736600029, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.16132917854155862, - "tokens_per_second": 120.90976614975652 + "elapsed_time": 1315.035868578001, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.15208710635115047, + "tokens_per_second": 111.8113988472388 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json index df78dc3..e488eec 100644 --- a/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1103.3186353329984, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.09063564848591402, - "tokens_per_second": 67.92779311425312 + "elapsed_time": 1923.4690410719995, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.10397879858182421, + "tokens_per_second": 76.44313314138553 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json index 4c0bd36..ef3781e 100644 --- a/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 260.26568330699956, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.38422276317559617, - "tokens_per_second": 287.95959208958226 + "elapsed_time": 246.0529060009976, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.8128333180474167, + "tokens_per_second": 597.5787987620997 } \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json index 5e1de5c..2f47f62 100644 --- a/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 405.08143226500033, - "num_requests": 100, - "total_num_tokens": 74946, - "requests_per_second": 0.24686394397504988, - "tokens_per_second": 185.01465145154089 + "elapsed_time": 333.59849170300004, + "num_requests": 200, + "total_num_tokens": 147036, + "requests_per_second": 0.5995230943012126, + "tokens_per_second": 440.75738846836555 } \ No newline at end of file diff --git a/docs/results.json b/docs/results.json index 876284c..e457025 100644 --- a/docs/results.json +++ b/docs/results.json @@ -12,7 +12,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 238.92735772921657 + "tps_mean": 383.3285005130725 }, { "model": "google/gemma-3-12b-it", @@ -26,7 +26,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 138.28298716107304 + "tps_mean": 169.75141153501525 }, { "model": "Qwen/Qwen3-14B-AWQ", @@ -40,7 +40,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 83.68710295019198 + "tps_mean": 168.9724830093454 }, { "model": "openai/gpt-oss-20b", @@ -54,7 +54,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 185.3668705592303 + "tps_mean": 430.8537270233502 }, { "model": "openai/gpt-oss-120b", @@ -68,7 +68,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 70.98811455236003 + "tps_mean": 77.93077982357597 }, { "model": "zai-org/GLM-4.7-Flash", @@ -82,7 +82,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 133.7890668441555 + "tps_mean": 246.02994547299596 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", @@ -96,7 +96,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 117.75790760733192 + "tps_mean": 227.90361622402403 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", @@ -110,7 +110,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 86.97761646092924 + "tps_mean": 200.1426829468909 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", @@ -124,7 +124,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 71.52372211759099 + "tps_mean": 132.2599488751683 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", @@ -138,7 +138,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 385.96636603292677 + "tps_mean": 596.5891209659404 }, { "model": "google/gemma-3-12b-it", @@ -152,7 +152,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 244.97127724647316 + "tps_mean": 294.9472545848014 }, { "model": "Qwen/Qwen3-14B-AWQ", @@ -166,7 +166,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 328.49937859629955 + "tps_mean": 348.0799308087054 }, { "model": "openai/gpt-oss-20b", @@ -180,7 +180,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 276.48063742763867 + "tps_mean": 593.7843034224891 }, { "model": "openai/gpt-oss-120b", @@ -194,7 +194,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 117.11822522607781 + "tps_mean": 112.4781801162827 }, { "model": "zai-org/GLM-4.7-Flash", @@ -208,7 +208,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 200.2551557455423 + "tps_mean": 346.0061963818796 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", @@ -222,7 +222,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 170.28562676904787 + "tps_mean": 320.69249844623016 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", @@ -236,7 +236,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 130.40765123003763 + "tps_mean": 274.7000183491961 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", @@ -250,7 +250,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 98.44491320703105 + "tps_mean": 167.00232766189475 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", @@ -264,7 +264,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 241.1478963701438 + "tps_mean": 452.7925628873698 }, { "model": "google/gemma-3-12b-it", @@ -278,7 +278,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 180.0947121704009 + "tps_mean": 292.4074663029466 }, { "model": "Qwen/Qwen3-14B-AWQ", @@ -292,7 +292,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 86.05445772580717 + "tps_mean": 190.86242019407229 }, { "model": "openai/gpt-oss-20b", @@ -306,7 +306,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 185.01465145154089 + "tps_mean": 440.75738846836555 }, { "model": "openai/gpt-oss-120b", @@ -320,7 +320,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 67.92779311425312 + "tps_mean": 76.44313314138553 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", @@ -334,7 +334,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 121.19378360261106 + "tps_mean": 229.9835374194385 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", @@ -348,7 +348,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 89.76589360109976 + "tps_mean": 203.38751203489863 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", @@ -362,7 +362,7 @@ "error": false, "test": "Throughput", "tp": 1, - "tps_mean": 70.75103526522766 + "tps_mean": 135.3839809398758 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", @@ -376,7 +376,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 381.7346838742502 + "tps_mean": 649.02791593759 }, { "model": "google/gemma-3-12b-it", @@ -390,7 +390,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 263.9412467646666 + "tps_mean": 403.09652364564084 }, { "model": "Qwen/Qwen3-14B-AWQ", @@ -404,7 +404,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 321.2769089381971 + "tps_mean": 371.4058491591393 }, { "model": "openai/gpt-oss-20b", @@ -418,7 +418,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 287.95959208958226 + "tps_mean": 597.5787987620997 }, { "model": "openai/gpt-oss-120b", @@ -432,7 +432,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 120.90976614975652 + "tps_mean": 111.8113988472388 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", @@ -446,7 +446,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 171.7665512553609 + "tps_mean": 315.906032423287 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", @@ -460,7 +460,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 131.2917933257513 + "tps_mean": 292.0384117325289 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", @@ -474,7 +474,7 @@ "error": false, "test": "Throughput", "tp": 2, - "tps_mean": 100.8896816189698 + "tps_mean": 165.5348293928834 } ] } \ No newline at end of file