diff --git a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json new file mode 100644 index 0000000..2be46b6 --- /dev/null +++ b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 229.17851571500069, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.4363410753753066, + "tokens_per_second": 328.49937859629955 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json index d63771d..be3df95 100644 --- a/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json +++ b/benchmarks/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1302.7062463890015, - "num_requests": 200, - "total_num_tokens": 146805, - "requests_per_second": 0.15352655332265747, - "tokens_per_second": 112.69232830266365 + "elapsed_time": 899.6009820629988, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.11116039443473733, + "tokens_per_second": 83.68710295019198 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json new file mode 100644 index 0000000..f267aff --- /dev/null +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 442.1101265470061, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.2261879880043141, + "tokens_per_second": 170.28562676904787 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json new file mode 100644 index 0000000..70d2836 --- /dev/null +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 639.3201232059982, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.15641616206061223, + "tokens_per_second": 117.75790760733192 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json new file mode 100644 index 0000000..24ead88 --- /dev/null +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 577.3050836349939, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.1732186374842766, + "tokens_per_second": 130.40765123003763 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json new file mode 100644 index 0000000..667e938 --- /dev/null +++ b/benchmarks/benchmark_results/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 865.5675225800005, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.115531136960788, + "tokens_per_second": 86.97761646092924 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/cpatonn_Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit_tp1_throughput.json b/benchmarks/benchmark_results/cpatonn_Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit_tp1_throughput.json deleted file mode 100644 index ecc0309..0000000 --- a/benchmarks/benchmark_results/cpatonn_Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit_tp1_throughput.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "elapsed_time": 540.2676798280002, - "num_requests": 200, - "total_num_tokens": 146805, - "requests_per_second": 0.37018686748700586, - "tokens_per_second": 271.7264154071495 -} \ No newline at end of file diff --git a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json new file mode 100644 index 0000000..9360b46 --- /dev/null +++ b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 764.7424081899953, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.13076298493329488, + "tokens_per_second": 98.44491320703105 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json index 2e5e0ea..22eb3c4 100644 --- a/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json +++ b/benchmarks/benchmark_results/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1303.4944151099999, - "num_requests": 200, - "total_num_tokens": 146805, - "requests_per_second": 0.15343372221746138, - "tokens_per_second": 112.62418795067208 + "elapsed_time": 1052.5878375879984, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.09500394782173208, + "tokens_per_second": 71.52372211759099 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json b/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json new file mode 100644 index 0000000..7ada1ea --- /dev/null +++ b/benchmarks/benchmark_results/google_gemma-3-12b-it_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 310.4935437940003, + "num_requests": 100, + "total_num_tokens": 76062, + "requests_per_second": 0.3220678883627477, + "tokens_per_second": 244.97127724647316 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json b/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json index db3260c..1cd45b9 100644 --- a/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json +++ b/benchmarks/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 914.8563823220001, - "num_requests": 200, - "total_num_tokens": 148857, - "requests_per_second": 0.21861354838273012, - "tokens_per_second": 162.71078485804028 + "elapsed_time": 550.0459713920009, + "num_requests": 100, + "total_num_tokens": 76062, + "requests_per_second": 0.18180298593393945, + "tokens_per_second": 138.28298716107304 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json new file mode 100644 index 0000000..fa194ba --- /dev/null +++ b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 193.03236384499905, + "num_requests": 100, + "total_num_tokens": 74504, + "requests_per_second": 0.5180478444552329, + "tokens_per_second": 385.96636603292677 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json index 36031a5..89251dc 100644 --- a/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json +++ b/benchmarks/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 522.8661062630126, - "num_requests": 200, - "total_num_tokens": 145877, - "requests_per_second": 0.38250710383471637, - "tokens_per_second": 278.99494393048457 + "elapsed_time": 311.826995067001, + "num_requests": 100, + "total_num_tokens": 74504, + "requests_per_second": 0.3206906444341466, + "tokens_per_second": 238.92735772921657 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json new file mode 100644 index 0000000..4fb2c8a --- /dev/null +++ b/benchmarks/benchmark_results/openai_gpt-oss-120b_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 639.9174838529943, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.15627014814143225, + "tokens_per_second": 117.11822522607781 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json index b2ee048..868843d 100644 --- a/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json +++ b/benchmarks/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 1339.915984058, - "num_requests": 200, - "total_num_tokens": 147036, - "requests_per_second": 0.14926308990977954, - "tokens_per_second": 109.73523843987172 + "elapsed_time": 1055.754198749999, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.09471901709545542, + "tokens_per_second": 70.98811455236003 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json new file mode 100644 index 0000000..4d612e7 --- /dev/null +++ b/benchmarks/benchmark_results/openai_gpt-oss-20b_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 271.0714236530039, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.36890646255655896, + "tokens_per_second": 276.48063742763867 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json b/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json index ce8eecc..39de487 100644 --- a/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json +++ b/benchmarks/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json @@ -1,7 +1,7 @@ { - "elapsed_time": 468.4791132300161, - "num_requests": 200, - "total_num_tokens": 147036, - "requests_per_second": 0.42691337639593563, - "tokens_per_second": 313.85817605876395 + "elapsed_time": 404.31172934999995, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.24733390782594175, + "tokens_per_second": 185.3668705592303 } \ No newline at end of file diff --git a/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json new file mode 100644 index 0000000..d4dab53 --- /dev/null +++ b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 374.65702054300345, + "num_requests": 100, + "total_num_tokens": 75027, + "requests_per_second": 0.2669107864442698, + "tokens_per_second": 200.2551557455423 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json new file mode 100644 index 0000000..837981e --- /dev/null +++ b/benchmarks/benchmark_results/zai-org_GLM-4.7-Flash_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 560.7857336160014, + "num_requests": 100, + "total_num_tokens": 75027, + "requests_per_second": 0.17832122681721982, + "tokens_per_second": 133.7890668441555 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json new file mode 100644 index 0000000..11dd77c --- /dev/null +++ b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 234.33056626700272, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.42674757114723666, + "tokens_per_second": 321.2769089381971 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json new file mode 100644 index 0000000..d967516 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/Qwen_Qwen3-14B-AWQ_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 874.8529941339984, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.11430491827828541, + "tokens_per_second": 86.05445772580717 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json new file mode 100644 index 0000000..048c4b9 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 438.29837328500435, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.2281550790401287, + "tokens_per_second": 171.7665512553609 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json new file mode 100644 index 0000000..ec3251f --- /dev/null +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-4bit_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 621.1952276929987, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.16097998751758127, + "tokens_per_second": 121.19378360261106 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json new file mode 100644 index 0000000..b000cae --- /dev/null +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 573.4174093670008, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.17439303091685104, + "tokens_per_second": 131.2917933257513 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json new file mode 100644 index 0000000..c8da1d6 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/btbtyler09_Qwen3-Coder-30B-A3B-Instruct-gptq-8bit_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 838.6815635629973, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.11923476602390883, + "tokens_per_second": 89.76589360109976 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json new file mode 100644 index 0000000..4186c55 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 746.2110970310023, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.13401033621434522, + "tokens_per_second": 100.8896816189698 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json new file mode 100644 index 0000000..097b399 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/dazipe_Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 1064.0833694909998, + "num_requests": 100, + "total_num_tokens": 75285, + "requests_per_second": 0.09397759881148657, + "tokens_per_second": 70.75103526522766 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json new file mode 100644 index 0000000..cee3264 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 288.17777036500047, + "num_requests": 100, + "total_num_tokens": 76062, + "requests_per_second": 0.3470080286669645, + "tokens_per_second": 263.9412467646666 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json new file mode 100644 index 0000000..7e88a40 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/google_gemma-3-12b-it_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 422.3444380089968, + "num_requests": 100, + "total_num_tokens": 76062, + "requests_per_second": 0.23677356915463818, + "tokens_per_second": 180.0947121704009 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json new file mode 100644 index 0000000..435fa83 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 195.17220506099693, + "num_requests": 100, + "total_num_tokens": 74504, + "requests_per_second": 0.5123680391311207, + "tokens_per_second": 381.7346838742502 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json new file mode 100644 index 0000000..7efa523 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 308.955628978998, + "num_requests": 100, + "total_num_tokens": 74504, + "requests_per_second": 0.32367107319089417, + "tokens_per_second": 241.1478963701438 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json new file mode 100644 index 0000000..ffd2b2b --- /dev/null +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 619.8506736600029, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.16132917854155862, + "tokens_per_second": 120.90976614975652 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json new file mode 100644 index 0000000..df78dc3 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-120b_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 1103.3186353329984, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.09063564848591402, + "tokens_per_second": 67.92779311425312 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json new file mode 100644 index 0000000..4c0bd36 --- /dev/null +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_cluster_tp2_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 260.26568330699956, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.38422276317559617, + "tokens_per_second": 287.95959208958226 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json new file mode 100644 index 0000000..5e1de5c --- /dev/null +++ b/benchmarks/benchmark_results_rocm/openai_gpt-oss-20b_tp1_throughput.json @@ -0,0 +1,7 @@ +{ + "elapsed_time": 405.08143226500033, + "num_requests": 100, + "total_num_tokens": 74946, + "requests_per_second": 0.24686394397504988, + "tokens_per_second": 185.01465145154089 +} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm_attn/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json b/benchmarks/benchmark_results_rocm_attn/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json deleted file mode 100644 index b622487..0000000 --- a/benchmarks/benchmark_results_rocm_attn/benchmark_results/Qwen_Qwen3-14B-AWQ_tp1_throughput.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "elapsed_time": 1237.550695703001, - "num_requests": 200, - "total_num_tokens": 146805, - "requests_per_second": 0.16160954108339642, - "tokens_per_second": 118.62544339374007 -} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm_attn/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json b/benchmarks/benchmark_results_rocm_attn/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json deleted file mode 100644 index 0b96338..0000000 --- a/benchmarks/benchmark_results_rocm_attn/benchmark_results/google_gemma-3-12b-it_tp1_throughput.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "elapsed_time": 540.6128817510034, - "num_requests": 200, - "total_num_tokens": 148857, - "requests_per_second": 0.36995048906754757, - "tokens_per_second": 275.34859975563967 -} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm_attn/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json b/benchmarks/benchmark_results_rocm_attn/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json deleted file mode 100644 index 969237d..0000000 --- a/benchmarks/benchmark_results_rocm_attn/benchmark_results/meta-llama_Meta-Llama-3.1-8B-Instruct_tp1_throughput.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "elapsed_time": 455.23138687500614, - "num_requests": 200, - "total_num_tokens": 145877, - "requests_per_second": 0.43933701797875907, - "tokens_per_second": 320.4458308584372 -} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm_attn/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json b/benchmarks/benchmark_results_rocm_attn/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json deleted file mode 100644 index 6918030..0000000 --- a/benchmarks/benchmark_results_rocm_attn/benchmark_results/openai_gpt-oss-120b_tp1_throughput.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "elapsed_time": 1279.5375675789983, - "num_requests": 200, - "total_num_tokens": 147036, - "requests_per_second": 0.15630646967124087, - "tokens_per_second": 114.91339037290285 -} \ No newline at end of file diff --git a/benchmarks/benchmark_results_rocm_attn/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json b/benchmarks/benchmark_results_rocm_attn/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json deleted file mode 100644 index f16a219..0000000 --- a/benchmarks/benchmark_results_rocm_attn/benchmark_results/openai_gpt-oss-20b_tp1_throughput.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "elapsed_time": 460.97370730798866, - "num_requests": 200, - "total_num_tokens": 147036, - "requests_per_second": 0.43386422442175154, - "tokens_per_second": 318.9683005103833 -} \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index 0563960..ec5195a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -202,6 +202,26 @@ color: var(--primary); } + /* Diff Styling */ + .val-pos { + color: #16a34a; + font-weight: 600; + } + + .val-neg { + color: #dc2626; + font-weight: 600; + } + + .val-neu { + color: #9ca3af; + } + + .col-diff { + background: #f9fafb; + font-size: 0.9rem; + } + /* Modal/Overlay */ #loading { text-align: center; @@ -433,11 +453,40 @@
- - + + + +
+ + +
+ + +
+ Attention +
+ + +
+