feat: Add new benchmark results for various models and configurations, and update documentation UI with filtering for attention and tensor parallelism.
Tento commit je obsažen v:
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 229.17851571500069,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.4363410753753066,
|
||||||
|
"tokens_per_second": 328.49937859629955
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1302.7062463890015,
|
"elapsed_time": 899.6009820629988,
|
||||||
"num_requests": 200,
|
"num_requests": 100,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 75285,
|
||||||
"requests_per_second": 0.15352655332265747,
|
"requests_per_second": 0.11116039443473733,
|
||||||
"tokens_per_second": 112.69232830266365
|
"tokens_per_second": 83.68710295019198
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 442.1101265470061,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.2261879880043141,
|
||||||
|
"tokens_per_second": 170.28562676904787
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 639.3201232059982,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.15641616206061223,
|
||||||
|
"tokens_per_second": 117.75790760733192
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 577.3050836349939,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.1732186374842766,
|
||||||
|
"tokens_per_second": 130.40765123003763
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 865.5675225800005,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.115531136960788,
|
||||||
|
"tokens_per_second": 86.97761646092924
|
||||||
|
}
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"elapsed_time": 540.2676798280002,
|
|
||||||
"num_requests": 200,
|
|
||||||
"total_num_tokens": 146805,
|
|
||||||
"requests_per_second": 0.37018686748700586,
|
|
||||||
"tokens_per_second": 271.7264154071495
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 764.7424081899953,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.13076298493329488,
|
||||||
|
"tokens_per_second": 98.44491320703105
|
||||||
|
}
|
||||||
+5
-5
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1303.4944151099999,
|
"elapsed_time": 1052.5878375879984,
|
||||||
"num_requests": 200,
|
"num_requests": 100,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 75285,
|
||||||
"requests_per_second": 0.15343372221746138,
|
"requests_per_second": 0.09500394782173208,
|
||||||
"tokens_per_second": 112.62418795067208
|
"tokens_per_second": 71.52372211759099
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 310.4935437940003,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 76062,
|
||||||
|
"requests_per_second": 0.3220678883627477,
|
||||||
|
"tokens_per_second": 244.97127724647316
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 914.8563823220001,
|
"elapsed_time": 550.0459713920009,
|
||||||
"num_requests": 200,
|
"num_requests": 100,
|
||||||
"total_num_tokens": 148857,
|
"total_num_tokens": 76062,
|
||||||
"requests_per_second": 0.21861354838273012,
|
"requests_per_second": 0.18180298593393945,
|
||||||
"tokens_per_second": 162.71078485804028
|
"tokens_per_second": 138.28298716107304
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 193.03236384499905,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74504,
|
||||||
|
"requests_per_second": 0.5180478444552329,
|
||||||
|
"tokens_per_second": 385.96636603292677
|
||||||
|
}
|
||||||
+5
-5
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 522.8661062630126,
|
"elapsed_time": 311.826995067001,
|
||||||
"num_requests": 200,
|
"num_requests": 100,
|
||||||
"total_num_tokens": 145877,
|
"total_num_tokens": 74504,
|
||||||
"requests_per_second": 0.38250710383471637,
|
"requests_per_second": 0.3206906444341466,
|
||||||
"tokens_per_second": 278.99494393048457
|
"tokens_per_second": 238.92735772921657
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 639.9174838529943,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74946,
|
||||||
|
"requests_per_second": 0.15627014814143225,
|
||||||
|
"tokens_per_second": 117.11822522607781
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1339.915984058,
|
"elapsed_time": 1055.754198749999,
|
||||||
"num_requests": 200,
|
"num_requests": 100,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 74946,
|
||||||
"requests_per_second": 0.14926308990977954,
|
"requests_per_second": 0.09471901709545542,
|
||||||
"tokens_per_second": 109.73523843987172
|
"tokens_per_second": 70.98811455236003
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 271.0714236530039,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74946,
|
||||||
|
"requests_per_second": 0.36890646255655896,
|
||||||
|
"tokens_per_second": 276.48063742763867
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 468.4791132300161,
|
"elapsed_time": 404.31172934999995,
|
||||||
"num_requests": 200,
|
"num_requests": 100,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 74946,
|
||||||
"requests_per_second": 0.42691337639593563,
|
"requests_per_second": 0.24733390782594175,
|
||||||
"tokens_per_second": 313.85817605876395
|
"tokens_per_second": 185.3668705592303
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 374.65702054300345,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75027,
|
||||||
|
"requests_per_second": 0.2669107864442698,
|
||||||
|
"tokens_per_second": 200.2551557455423
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 560.7857336160014,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75027,
|
||||||
|
"requests_per_second": 0.17832122681721982,
|
||||||
|
"tokens_per_second": 133.7890668441555
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 234.33056626700272,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.42674757114723666,
|
||||||
|
"tokens_per_second": 321.2769089381971
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 874.8529941339984,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.11430491827828541,
|
||||||
|
"tokens_per_second": 86.05445772580717
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 438.29837328500435,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.2281550790401287,
|
||||||
|
"tokens_per_second": 171.7665512553609
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 621.1952276929987,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.16097998751758127,
|
||||||
|
"tokens_per_second": 121.19378360261106
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 573.4174093670008,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.17439303091685104,
|
||||||
|
"tokens_per_second": 131.2917933257513
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 838.6815635629973,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.11923476602390883,
|
||||||
|
"tokens_per_second": 89.76589360109976
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 746.2110970310023,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.13401033621434522,
|
||||||
|
"tokens_per_second": 100.8896816189698
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1064.0833694909998,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 75285,
|
||||||
|
"requests_per_second": 0.09397759881148657,
|
||||||
|
"tokens_per_second": 70.75103526522766
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 288.17777036500047,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 76062,
|
||||||
|
"requests_per_second": 0.3470080286669645,
|
||||||
|
"tokens_per_second": 263.9412467646666
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 422.3444380089968,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 76062,
|
||||||
|
"requests_per_second": 0.23677356915463818,
|
||||||
|
"tokens_per_second": 180.0947121704009
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 195.17220506099693,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74504,
|
||||||
|
"requests_per_second": 0.5123680391311207,
|
||||||
|
"tokens_per_second": 381.7346838742502
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 308.955628978998,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74504,
|
||||||
|
"requests_per_second": 0.32367107319089417,
|
||||||
|
"tokens_per_second": 241.1478963701438
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 619.8506736600029,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74946,
|
||||||
|
"requests_per_second": 0.16132917854155862,
|
||||||
|
"tokens_per_second": 120.90976614975652
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1103.3186353329984,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74946,
|
||||||
|
"requests_per_second": 0.09063564848591402,
|
||||||
|
"tokens_per_second": 67.92779311425312
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 260.26568330699956,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74946,
|
||||||
|
"requests_per_second": 0.38422276317559617,
|
||||||
|
"tokens_per_second": 287.95959208958226
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 405.08143226500033,
|
||||||
|
"num_requests": 100,
|
||||||
|
"total_num_tokens": 74946,
|
||||||
|
"requests_per_second": 0.24686394397504988,
|
||||||
|
"tokens_per_second": 185.01465145154089
|
||||||
|
}
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"elapsed_time": 1237.550695703001,
|
|
||||||
"num_requests": 200,
|
|
||||||
"total_num_tokens": 146805,
|
|
||||||
"requests_per_second": 0.16160954108339642,
|
|
||||||
"tokens_per_second": 118.62544339374007
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"elapsed_time": 540.6128817510034,
|
|
||||||
"num_requests": 200,
|
|
||||||
"total_num_tokens": 148857,
|
|
||||||
"requests_per_second": 0.36995048906754757,
|
|
||||||
"tokens_per_second": 275.34859975563967
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"elapsed_time": 455.23138687500614,
|
|
||||||
"num_requests": 200,
|
|
||||||
"total_num_tokens": 145877,
|
|
||||||
"requests_per_second": 0.43933701797875907,
|
|
||||||
"tokens_per_second": 320.4458308584372
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"elapsed_time": 1279.5375675789983,
|
|
||||||
"num_requests": 200,
|
|
||||||
"total_num_tokens": 147036,
|
|
||||||
"requests_per_second": 0.15630646967124087,
|
|
||||||
"tokens_per_second": 114.91339037290285
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"elapsed_time": 460.97370730798866,
|
|
||||||
"num_requests": 200,
|
|
||||||
"total_num_tokens": 147036,
|
|
||||||
"requests_per_second": 0.43386422442175154,
|
|
||||||
"tokens_per_second": 318.9683005103833
|
|
||||||
}
|
|
||||||
+255
-238
@@ -202,6 +202,26 @@
|
|||||||
color: var(--primary);
|
color: var(--primary);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Diff Styling */
|
||||||
|
.val-pos {
|
||||||
|
color: #16a34a;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.val-neg {
|
||||||
|
color: #dc2626;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.val-neu {
|
||||||
|
color: #9ca3af;
|
||||||
|
}
|
||||||
|
|
||||||
|
.col-diff {
|
||||||
|
background: #f9fafb;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
/* Modal/Overlay */
|
/* Modal/Overlay */
|
||||||
#loading {
|
#loading {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
@@ -433,11 +453,40 @@
|
|||||||
</header>
|
</header>
|
||||||
|
|
||||||
<div class="controls">
|
<div class="controls">
|
||||||
<input type="text" id="searchInput" class="search" placeholder="Search models (e.g. 'llama', 'fp8')..."
|
<input type="text" id="searchInput" class="search" placeholder="Search models..." autocomplete="off">
|
||||||
autocomplete="off">
|
<select id="quantFilter" style="max-width: 150px;">
|
||||||
<select id="quantFilter">
|
<option value="">All Quants</option>
|
||||||
<option value="">All Quantizations</option>
|
|
||||||
</select>
|
</select>
|
||||||
|
|
||||||
|
<!-- Toggles -->
|
||||||
|
<div
|
||||||
|
style="display: flex; gap: 12px; align-items: center; border-left: 1px solid #e5e7eb; padding-left: 12px;">
|
||||||
|
<label
|
||||||
|
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||||
|
<input type="checkbox" id="toggleTP1" checked> TP1
|
||||||
|
</label>
|
||||||
|
<label
|
||||||
|
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||||
|
<input type="checkbox" id="toggleTP2" checked> TP2
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Attention Group -->
|
||||||
|
<div
|
||||||
|
style="display: flex; align-items: center; gap: 8px; border-left: 1px solid #e5e7eb; padding-left: 12px;">
|
||||||
|
<span
|
||||||
|
style="font-size: 0.8rem; font-weight: 600; text-transform: uppercase; color: #9ca3af; letter-spacing: 0.05em;">Attention</span>
|
||||||
|
<div style="display: flex; gap: 12px;">
|
||||||
|
<label
|
||||||
|
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||||
|
<input type="checkbox" id="toggleTriton" checked> Triton
|
||||||
|
</label>
|
||||||
|
<label
|
||||||
|
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||||
|
<input type="checkbox" id="toggleRocm"> ROCm
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<nav id="tabNav" class="tab-nav">
|
<nav id="tabNav" class="tab-nav">
|
||||||
@@ -469,6 +518,7 @@
|
|||||||
|
|
||||||
<!-- Modal Overlay -->
|
<!-- Modal Overlay -->
|
||||||
<div id="modalOverlay" class="modal-overlay">
|
<div id="modalOverlay" class="modal-overlay">
|
||||||
|
<!-- ... modal content ... -->
|
||||||
<div class="modal">
|
<div class="modal">
|
||||||
<div class="modal-header">
|
<div class="modal-header">
|
||||||
<h3 id="modalTitle">Benchmark Info</h3>
|
<h3 id="modalTitle">Benchmark Info</h3>
|
||||||
@@ -480,116 +530,74 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Script Logic Updates Below -->
|
||||||
<script>
|
<script>
|
||||||
|
// Helper - Defined at top to avoid ReferenceError
|
||||||
|
const $ = id => document.getElementById(id);
|
||||||
|
|
||||||
// State
|
// State
|
||||||
let rawRuns = [];
|
let rawRuns = [];
|
||||||
let tests = [];
|
let tests = [];
|
||||||
let state = {
|
let state = {
|
||||||
search: "",
|
search: "",
|
||||||
quant: "",
|
quant: "",
|
||||||
activeTab: "Throughput"
|
activeTab: "Throughput",
|
||||||
|
showTP1: true,
|
||||||
|
showTP2: true,
|
||||||
|
showTriton: true,
|
||||||
|
showRocm: false
|
||||||
};
|
};
|
||||||
|
|
||||||
// Benchmark Metadata
|
// Metadata
|
||||||
const BENCHMARK_INFO = {
|
const BENCHMARK_INFO = {
|
||||||
"Throughput": {
|
"Throughput": {
|
||||||
short: "Maximum raw compute capacity (Tokens/Sec).",
|
short: "Maximum raw compute capacity (Tokens/Sec).",
|
||||||
desc: "Measures the absolute maximum number of tokens the system can generate per second by fully saturating the GPU compute capability.",
|
desc: "Measures the absolute maximum number of tokens the system can generate per second by fully saturating the GPU compute capability.",
|
||||||
usecase: "Demonstrates the raw horsepower and architectural efficiency of the hardware/model combo under Heavy Load. This is the theoretical speed limit of the system.",
|
usecase: "Demonstrates the raw horsepower and architectural efficiency.",
|
||||||
details: "Command: `vllm bench throughput`\nParams: --num-prompts 100 --output-len 512\nMetric: Tokens per Second (higher is better).",
|
details: `
|
||||||
|
**Test Configuration:**
|
||||||
|
• <b>Dataset:</b> ShareGPT (Random Sample, 100 Prompts)
|
||||||
|
• <b>Output Length:</b> 512 Tokens (Fixed)
|
||||||
|
• <b>Batch Budget:</b> 8192 - 32768 Tokens (Dynamic per model)
|
||||||
|
• <b>GPU Alloc:</b> 90% VRAM per GPU
|
||||||
|
• <b>Pipeline:</b> <code>vllm bench throughput</code> (Offline)
|
||||||
|
• <b>Cluster Config:</b> Ray Distributed (RoCE v2 RDMA, TP=2)
|
||||||
|
|
||||||
|
<b>Metric:</b> Tokens per Second (higher is better).`,
|
||||||
unit: " tok/s"
|
unit: " tok/s"
|
||||||
},
|
},
|
||||||
"TTFT": {
|
"TTFT": {
|
||||||
short: "Time To First Token (Response Latency).",
|
short: "Time To First Token (Response Latency).",
|
||||||
desc: "The 'Time To First Token' is the delay between sending a request and seeing the first character of the response.",
|
desc: "Delay between sending a request and seeing the first character.",
|
||||||
usecase: "<b>Responsiveness</b>. Low TTFT makes the AI feel 'snappy' and instant. High TTFT feels like the AI is ignoring you or lagging. We measure at different QPS loads to ensure the server doesn't 'choke' when busy.",
|
usecase: "Responsiveness. Low TTFT makes the AI feel 'snappy'.",
|
||||||
context: "<b>QPS = Queries Per Second (Traffic Load)</b>.<br>• QPS 1.0 = 1 user sending a request every second.<br>• QPS 4.0 = 4 users sending requests every second (Simulates High Load).",
|
details: "Command: `vllm bench serve`\nMetric: Milliseconds (lower is better).",
|
||||||
details: "Command: `vllm bench serve`\nParams: --random-input-len 1024 --random-output-len 512\nMetric: Milliseconds (lower is better).",
|
|
||||||
unit: " ms"
|
unit: " ms"
|
||||||
},
|
},
|
||||||
"TPOT": {
|
"TPOT": {
|
||||||
short: "Time Per Output Token (Streaming Speed).",
|
short: "Time Per Output Token (Streaming Speed).",
|
||||||
desc: "The 'Time Per Output Token' measures how fast the text generates *after* the first token appears.",
|
desc: "Measures how fast the text generates *after* the first token.",
|
||||||
usecase: "<b>1. Fluidity</b>: Industry standard is <50ms (>20 tok/s) for a 'fluid' feeling. Slower feels laggy.<br><b>2. Bottlenecks</b>: We test at <b>QPS 4.0</b> to find memory bandwidth bottlenecks where the GPU can't keep up with multiple users.",
|
usecase: "Fluidity. Industry standard is <50ms (>20 tok/s).",
|
||||||
context: "<b>QPS = Queries Per Second (Traffic Load)</b>.<br>• QPS 1.0 = Light Load (Ideal conditions)<br>• QPS 4.0 = Heavy Load (Stress Test)",
|
details: "Command: `vllm bench serve`\nMetric: Milliseconds (lower is better).",
|
||||||
details: "Command: `vllm bench serve`\nParams: --random-input-len 1024 --random-output-len 512\nMetric: Milliseconds (lower is better).",
|
|
||||||
unit: " ms"
|
unit: " ms"
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const $ = id => document.getElementById(id);
|
|
||||||
|
|
||||||
async function init() {
|
async function init() {
|
||||||
try {
|
try {
|
||||||
const res = await fetch('results.json');
|
const res = await fetch('results.json');
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
rawRuns = data.runs || [];
|
rawRuns = data.runs || [];
|
||||||
processData();
|
processData();
|
||||||
|
setupControls();
|
||||||
render();
|
render();
|
||||||
populateFilters();
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
$('loading').textContent = "Error loading results.json: " + e.message;
|
$('loading').textContent = "Error loading results.json: " + e.message;
|
||||||
console.error(e);
|
console.error(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function processData() {
|
function setupControls() {
|
||||||
const testGroups = {};
|
// Filters
|
||||||
|
|
||||||
rawRuns.forEach(run => {
|
|
||||||
if (!run.test) return;
|
|
||||||
if (!testGroups[run.test]) {
|
|
||||||
testGroups[run.test] = {
|
|
||||||
name: run.test,
|
|
||||||
models: {}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize model name
|
|
||||||
const modelName = run.model_clean || run.model;
|
|
||||||
|
|
||||||
if (!testGroups[run.test].models[modelName]) {
|
|
||||||
testGroups[run.test].models[modelName] = {
|
|
||||||
name: modelName,
|
|
||||||
quant: run.quant,
|
|
||||||
params: run.params_b || run.name_params_b,
|
|
||||||
triton: null,
|
|
||||||
rocm: null
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
const m = testGroups[run.test].models[modelName];
|
|
||||||
|
|
||||||
// Assign Backend value
|
|
||||||
if (run.backend === "Triton") m.triton = run.tps_mean;
|
|
||||||
if (run.backend === "ROCm") m.rocm = run.tps_mean;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Convert map to array for sorting
|
|
||||||
tests = Object.values(testGroups).map(group => {
|
|
||||||
return {
|
|
||||||
name: group.name,
|
|
||||||
models: Object.values(group.models)
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
// Sort tests: Throughput first, then others alphabetically
|
|
||||||
tests.sort((a, b) => {
|
|
||||||
const aTp = a.name.startsWith("Throughput");
|
|
||||||
const bTp = b.name.startsWith("Throughput");
|
|
||||||
|
|
||||||
if (aTp && !bTp) return -1;
|
|
||||||
if (!aTp && bTp) return 1;
|
|
||||||
return a.name.localeCompare(b.name);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Set default tab if not set
|
|
||||||
if (!state.activeTab && tests.length > 0) {
|
|
||||||
state.activeTab = tests[0].name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function populateFilters() {
|
|
||||||
const quants = new Set(rawRuns.map(r => r.quant).filter(Boolean));
|
const quants = new Set(rawRuns.map(r => r.quant).filter(Boolean));
|
||||||
const sel = $('quantFilter');
|
const sel = $('quantFilter');
|
||||||
[...quants].sort().forEach(q => {
|
[...quants].sort().forEach(q => {
|
||||||
@@ -599,22 +607,74 @@
|
|||||||
sel.appendChild(opt);
|
sel.appendChild(opt);
|
||||||
});
|
});
|
||||||
|
|
||||||
$('searchInput').addEventListener('input', e => {
|
$('searchInput').addEventListener('input', e => { state.search = e.target.value.toLowerCase(); render(); });
|
||||||
state.search = e.target.value.toLowerCase();
|
sel.addEventListener('change', e => { state.quant = e.target.value; render(); });
|
||||||
render();
|
|
||||||
});
|
|
||||||
|
|
||||||
sel.addEventListener('change', e => {
|
// Toggles
|
||||||
state.quant = e.target.value;
|
$('toggleTP1').addEventListener('change', e => { state.showTP1 = e.target.checked; render(); });
|
||||||
render();
|
$('toggleTP2').addEventListener('change', e => { state.showTP2 = e.target.checked; render(); });
|
||||||
});
|
$('toggleTriton').addEventListener('change', e => { state.showTriton = e.target.checked; render(); });
|
||||||
|
$('toggleRocm').addEventListener('change', e => { state.showRocm = e.target.checked; render(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
function getBenchmarkMeta(testName) {
|
function processData() {
|
||||||
if (testName.includes("Throughput")) return BENCHMARK_INFO["Throughput"];
|
const testGroups = {};
|
||||||
if (testName.includes("TTFT")) return BENCHMARK_INFO["TTFT"];
|
|
||||||
if (testName.includes("TPOT")) return BENCHMARK_INFO["TPOT"];
|
rawRuns.forEach(run => {
|
||||||
return null;
|
let testName = run.test;
|
||||||
|
if (!testGroups[testName]) {
|
||||||
|
testGroups[testName] = { name: testName, models: {} };
|
||||||
|
}
|
||||||
|
|
||||||
|
const modelName = run.model_clean || run.model;
|
||||||
|
if (!testGroups[testName].models[modelName]) {
|
||||||
|
testGroups[testName].models[modelName] = {
|
||||||
|
name: modelName,
|
||||||
|
quant: run.quant,
|
||||||
|
params: run.params_b || run.name_params_b,
|
||||||
|
results: {
|
||||||
|
1: { triton: null, rocm: null },
|
||||||
|
2: { triton: null, rocm: null }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const m = testGroups[testName].models[modelName];
|
||||||
|
const tp = run.tp || 1;
|
||||||
|
|
||||||
|
if (!m.results[tp]) m.results[tp] = { triton: null, rocm: null };
|
||||||
|
|
||||||
|
if (run.backend === "Triton") m.results[tp].triton = run.tps_mean;
|
||||||
|
if (run.backend === "ROCm") m.results[tp].rocm = run.tps_mean;
|
||||||
|
});
|
||||||
|
|
||||||
|
tests = Object.values(testGroups).map(g => ({
|
||||||
|
name: g.name,
|
||||||
|
models: Object.values(g.models)
|
||||||
|
}));
|
||||||
|
|
||||||
|
tests.sort((a, b) => {
|
||||||
|
const aTp = a.name.includes("Throughput");
|
||||||
|
const bTp = b.name.includes("Throughput");
|
||||||
|
if (aTp && !bTp) return -1;
|
||||||
|
if (!aTp && bTp) return 1;
|
||||||
|
return a.name.localeCompare(b.name);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (tests.length > 0) state.activeTab = tests[0].name;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatVal(v, unit) {
|
||||||
|
if (v === null || v === undefined) return '<span class="val-na">-</span>';
|
||||||
|
if (v === 0) return '<span class="val-na" style="color:#ef4444;font-weight:bold;">X</span>';
|
||||||
|
return `<span class="val">${v.toFixed(2)}<span style="font-size:0.75em; color:#9ca3af; margin-left:2px;">${unit}</span></span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getMeta(name) {
|
||||||
|
if (name.includes("Throughput")) return BENCHMARK_INFO["Throughput"];
|
||||||
|
if (name.includes("TTFT")) return BENCHMARK_INFO["TTFT"];
|
||||||
|
if (name.includes("TPOT")) return BENCHMARK_INFO["TPOT"];
|
||||||
|
return { short: "", desc: "", unit: "" };
|
||||||
}
|
}
|
||||||
|
|
||||||
function render() {
|
function render() {
|
||||||
@@ -627,208 +687,165 @@
|
|||||||
const btn = document.createElement('button');
|
const btn = document.createElement('button');
|
||||||
btn.className = `tab-btn ${test.name === state.activeTab ? 'active' : ''}`;
|
btn.className = `tab-btn ${test.name === state.activeTab ? 'active' : ''}`;
|
||||||
btn.textContent = test.name;
|
btn.textContent = test.name;
|
||||||
btn.onclick = () => {
|
btn.onclick = () => { state.activeTab = test.name; render(); };
|
||||||
state.activeTab = test.name;
|
|
||||||
render();
|
|
||||||
};
|
|
||||||
tabNav.appendChild(btn);
|
tabNav.appendChild(btn);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Ensure active tab exists (if search filtered it out logic?)
|
|
||||||
// Actually tabs are based on 'tests' which is processed from raw data, so they exist regardless of filters unless we want to hide tabs with no results.
|
|
||||||
// For now, let's keep tabs static based on available data types.
|
|
||||||
|
|
||||||
container.innerHTML = "";
|
container.innerHTML = "";
|
||||||
|
|
||||||
// Find active test
|
|
||||||
const activeTest = tests.find(t => t.name === state.activeTab);
|
const activeTest = tests.find(t => t.name === state.activeTab);
|
||||||
|
|
||||||
if (!activeTest) {
|
if (!activeTest) {
|
||||||
// If invalid tab (e.g. on first load if default doesn't exist), switch to first
|
container.innerHTML = '<div id="loading">No Data</div>';
|
||||||
if (tests.length > 0) {
|
|
||||||
state.activeTab = tests[0].name;
|
|
||||||
// Re-render immediately
|
|
||||||
setTimeout(render, 0);
|
|
||||||
}
|
|
||||||
container.innerHTML = '<div id="loading">No data available.</div>';
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render Active Tab Content
|
// Simplified Info Box logic
|
||||||
const test = activeTest;
|
if (state.showTP2) {
|
||||||
|
|
||||||
// Cluster Info Box Logic
|
|
||||||
// If test name implies Tensor Parallelism > 1 (e.g. "Cluster", "TP=2", etc.)
|
|
||||||
// We default to checking if it's the "Throughput (Cluster)" tab or similar
|
|
||||||
if (test.name.toLowerCase().includes("tp=2") || test.name.toLowerCase().includes("cluster")) {
|
|
||||||
const infoBox = document.createElement('div');
|
const infoBox = document.createElement('div');
|
||||||
infoBox.className = 'info-box';
|
infoBox.style.cssText = "background:#f8fafc; border:1px solid #e2e8f0; border-radius:6px; padding:10px 16px; margin-bottom:20px; font-size:0.9rem; color:#64748b; display:flex; justify-content:space-between; align-items:center;";
|
||||||
infoBox.innerHTML = `
|
infoBox.innerHTML = `
|
||||||
<div style="font-size:1.2rem;">ℹ️</div>
|
<span><b>TP2</b> = Distributed Cluster (2x Strix Halo, RDMA RoCE v2).</span>
|
||||||
<div>
|
<a href="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/blob/main/rdma_cluster/setup_guide.md" target="_blank" style="color:#3b82f6; text-decoration:none; font-weight:500;">Cluster Setup Guide →</a>
|
||||||
<div style="font-weight:600; margin-bottom:4px;">Distributed Cluster (Tensor Parallelism = 2)</div>
|
|
||||||
This benchmark runs on <b>2x Strix Halo nodes</b> connected via <b>Low-Latency RDMA (RoCE v2)</b>.
|
|
||||||
The model is split across both APUs, effectively using 256GB of Unified Memory.
|
|
||||||
<br><br>
|
|
||||||
<a href="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/blob/main/rdma_cluster/setup_guide.md" target="_blank">View Cluster Setup Guide →</a>
|
|
||||||
</div>
|
|
||||||
`;
|
`;
|
||||||
container.appendChild(infoBox);
|
container.appendChild(infoBox);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter models within this test
|
// Models Filter & Sort
|
||||||
const models = test.models.filter(m => {
|
const models = activeTest.models.filter(m => {
|
||||||
const s = state.search;
|
const matchS = !state.search || m.name.toLowerCase().includes(state.search);
|
||||||
const matchSearch = !s || m.name.toLowerCase().includes(s);
|
const matchQ = !state.quant || m.quant === state.quant;
|
||||||
|
return matchS && matchQ;
|
||||||
const q = state.quant;
|
}).sort((a, b) => (parseFloat(a.params) || 0) - (parseFloat(b.params) || 0) || a.name.localeCompare(b.name));
|
||||||
const matchQuant = !q || m.quant === q;
|
|
||||||
|
|
||||||
return matchSearch && matchQuant;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (models.length === 0) {
|
|
||||||
container.innerHTML += '<div id="loading">No models match current filters in this category.</div>';
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sorting models by size (small to large), then name
|
|
||||||
models.sort((a, b) => {
|
|
||||||
const pA = parseFloat(a.params) || 0;
|
|
||||||
const pB = parseFloat(b.params) || 0;
|
|
||||||
if (pA !== pB) return pA - pB;
|
|
||||||
return a.name.localeCompare(b.name);
|
|
||||||
});
|
|
||||||
|
|
||||||
|
// Create Table
|
||||||
const card = document.createElement('div');
|
const card = document.createElement('div');
|
||||||
card.className = "section-card";
|
card.className = "section-card";
|
||||||
|
|
||||||
// Metadata resolution
|
const meta = getMeta(activeTest.name);
|
||||||
const meta = getBenchmarkMeta(test.name);
|
const unit = meta.unit || "";
|
||||||
const shortDesc = meta ? `<span class="section-desc">${meta.short}</span>` : "";
|
|
||||||
const helpBtn = meta ? `<button class="btn-help" onclick="openModal('${test.name}')">?</button>` : "";
|
|
||||||
|
|
||||||
|
// Header
|
||||||
const header = document.createElement('div');
|
const header = document.createElement('div');
|
||||||
header.className = "section-header";
|
header.className = "section-header";
|
||||||
header.innerHTML = `
|
header.innerHTML = `
|
||||||
<div class="section-title-row">
|
<div class="section-title-row">
|
||||||
<h2>${test.name}</h2>
|
<h2>${activeTest.name}</h2>
|
||||||
${helpBtn}
|
<button class="btn-help" onclick="openModal('${activeTest.name}')">?</button>
|
||||||
</div>
|
</div>
|
||||||
${shortDesc}
|
<span class="section-desc">${meta.short}</span>
|
||||||
`;
|
`;
|
||||||
card.appendChild(header);
|
card.appendChild(header);
|
||||||
|
|
||||||
|
// Table
|
||||||
const tableResp = document.createElement('div');
|
const tableResp = document.createElement('div');
|
||||||
tableResp.className = "table-responsive";
|
tableResp.className = "table-responsive";
|
||||||
|
|
||||||
const table = document.createElement('table');
|
const table = document.createElement('table');
|
||||||
const thead = document.createElement('thead');
|
|
||||||
thead.innerHTML = `
|
|
||||||
<tr>
|
|
||||||
<th class="col-model">Model</th>
|
|
||||||
<th class="col-data">Triton Attention</th>
|
|
||||||
<th class="col-data">ROCm Attention</th>
|
|
||||||
</tr>
|
|
||||||
`;
|
|
||||||
table.appendChild(thead);
|
|
||||||
|
|
||||||
|
// Build Dynamic Columns
|
||||||
|
let cols = [];
|
||||||
|
if (state.showTP1) {
|
||||||
|
if (state.showTriton) cols.push({ id: "tp1_triton", label: "TP1 Triton" });
|
||||||
|
if (state.showRocm) cols.push({ id: "tp1_rocm", label: "TP1 ROCm" });
|
||||||
|
}
|
||||||
|
if (state.showTP2) {
|
||||||
|
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 Triton" });
|
||||||
|
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 ROCm" });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Thead
|
||||||
|
let theadHtml = `<thead><tr><th class="col-model">Model</th>`;
|
||||||
|
cols.forEach(c => {
|
||||||
|
// Style differentiation for TP2
|
||||||
|
const style = c.id.startsWith("tp2") ? "background:#eff6ff; color:#1e40af;" : "";
|
||||||
|
theadHtml += `<th class="col-data" style="${style}">${c.label}</th>`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Diff Column Header
|
||||||
|
if (cols.length === 2) {
|
||||||
|
theadHtml += `<th class="col-data col-diff">Diff</th>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
theadHtml += `</tr></thead>`;
|
||||||
|
table.innerHTML = theadHtml;
|
||||||
|
|
||||||
|
// Tbody
|
||||||
const tbody = document.createElement('tbody');
|
const tbody = document.createElement('tbody');
|
||||||
models.forEach(m => {
|
models.forEach(m => {
|
||||||
const tr = document.createElement('tr');
|
const tr = document.createElement('tr');
|
||||||
|
|
||||||
// Meta tags
|
// Model Name Cell
|
||||||
let metaHtml = "";
|
let metaHtml = "";
|
||||||
if (m.quant) metaHtml += `<span class="tag">${m.quant}</span>`;
|
if (m.quant) metaHtml += `<span class="tag">${m.quant}</span>`;
|
||||||
if (m.params) metaHtml += `<span class="tag">${m.params}B</span>`;
|
if (m.params) metaHtml += `<span class="tag">${m.params}B</span>`;
|
||||||
|
|
||||||
// Values
|
let rowHtml = `
|
||||||
// Pass unit from meta
|
<td>
|
||||||
const unit = meta ? meta.unit : "";
|
<div class="model-cell">
|
||||||
const val1 = formatVal(m.triton, unit);
|
<a href="https://huggingface.co/${m.name}" target="_blank" class="model-name" style="text-decoration:none;color:inherit;">${m.name}</a>
|
||||||
|
<div class="model-meta">${metaHtml}</div>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
`;
|
||||||
|
|
||||||
// Special handling for ROCm column where we want 'X' for crashes/missing if Triton has data
|
// Data Cells
|
||||||
let val2;
|
cols.forEach(c => {
|
||||||
if ((m.rocm === null || m.rocm === 0) && m.triton > 0) {
|
let val = null;
|
||||||
val2 = '<span class="val-na" style="color: #ef4444; font-weight:bold;">X</span>';
|
if (c.id === "tp1_triton") val = m.results[1]?.triton;
|
||||||
} else {
|
if (c.id === "tp1_rocm") val = m.results[1]?.rocm;
|
||||||
val2 = formatVal(m.rocm, unit);
|
if (c.id === "tp2_triton") val = m.results[2]?.triton;
|
||||||
|
if (c.id === "tp2_rocm") val = m.results[2]?.rocm;
|
||||||
|
|
||||||
|
const bg = c.id.startsWith("tp2") ? 'style="background:#fbfdff;"' : "";
|
||||||
|
rowHtml += `<td class="col-data" ${bg}>${formatVal(val, unit)}</td>`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Diff Column Data
|
||||||
|
if (cols.length === 2) {
|
||||||
|
const v1 = getVal(m, cols[0].id);
|
||||||
|
const v2 = getVal(m, cols[1].id);
|
||||||
|
rowHtml += `<td class="col-data col-diff">${formatDiff(v1, v2)}</td>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
tr.innerHTML = `
|
tr.innerHTML = rowHtml;
|
||||||
<td>
|
|
||||||
<div class="model-cell">
|
|
||||||
<a href="https://huggingface.co/${m.name}" target="_blank" class="model-name" style="text-decoration: none; color: inherit; border-bottom: 1px dotted #ccc;">${m.name}</a>
|
|
||||||
<div class="model-meta">${metaHtml}</div>
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
<td class="col-data">${val1}</td>
|
|
||||||
<td class="col-data">${val2}</td>
|
|
||||||
`;
|
|
||||||
tbody.appendChild(tr);
|
tbody.appendChild(tr);
|
||||||
});
|
});
|
||||||
|
|
||||||
table.appendChild(tbody);
|
table.appendChild(tbody);
|
||||||
tableResp.appendChild(table);
|
tableResp.appendChild(table);
|
||||||
card.appendChild(tableResp);
|
card.appendChild(tableResp);
|
||||||
|
|
||||||
container.appendChild(card);
|
container.appendChild(card);
|
||||||
}
|
}
|
||||||
|
|
||||||
function formatVal(v, unit) {
|
// Helper to get value safely
|
||||||
if (v === null || v === undefined) return '<span class="val-na">N/A</span>';
|
function getVal(m, colId) {
|
||||||
if (v === 0) return '<span class="val-na">FAIL</span>';
|
if (colId === "tp1_triton") return m.results[1]?.triton;
|
||||||
return `<span class="val">${v.toFixed(2)}<span style="font-size:0.8em; color:#888;">${unit}</span></span>`;
|
if (colId === "tp1_rocm") return m.results[1]?.rocm;
|
||||||
|
if (colId === "tp2_triton") return m.results[2]?.triton;
|
||||||
|
if (colId === "tp2_rocm") return m.results[2]?.rocm;
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modal Logic
|
function formatDiff(v1, v2) {
|
||||||
function openModal(testName) {
|
if (v1 === null || v2 === null || v1 === undefined || v2 === undefined || v1 === 0) return '<span class="val-na">-</span>';
|
||||||
const meta = getBenchmarkMeta(testName);
|
const diff = ((v2 - v1) / v1) * 100;
|
||||||
if (!meta) return;
|
const sign = diff > 0 ? "+" : "";
|
||||||
|
const cls = diff > 0.5 ? "val-pos" : (diff < -0.5 ? "val-neg" : "val-neu");
|
||||||
|
return `<span class="${cls}">${sign}${diff.toFixed(1)}%</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
$('modalTitle').textContent = testName;
|
// --- Basic Modal Implementation ---
|
||||||
|
function openModal(name) {
|
||||||
let content = `
|
const m = getMeta(name);
|
||||||
<div class="modal-section">
|
$('modalTitle').textContent = name;
|
||||||
<h4>What is this?</h4>
|
$('modalContent').innerHTML = `
|
||||||
<p>${meta.desc}</p>
|
<div class="modal-section"><h4>About</h4><p>${m.desc}</p></div>
|
||||||
</div>
|
<div class="modal-section"><h4>Usage</h4><p>${m.usecase}</p></div>
|
||||||
<div class="modal-section">
|
<div class="modal-section"><h4>Details</h4><div class="code-block">${m.details}</div></div>
|
||||||
<h4>Why it matters?</h4>
|
`;
|
||||||
<p>${meta.usecase}</p>
|
|
||||||
</div>`;
|
|
||||||
|
|
||||||
if (meta.context) {
|
|
||||||
content += `
|
|
||||||
<div class="modal-section">
|
|
||||||
<h4>Terminology</h4>
|
|
||||||
<p>${meta.context}</p>
|
|
||||||
</div>`;
|
|
||||||
}
|
|
||||||
|
|
||||||
content += `
|
|
||||||
<div class="modal-section">
|
|
||||||
<h4>Technical Details</h4>
|
|
||||||
<div class="code-block">${meta.details}</div>
|
|
||||||
</div>
|
|
||||||
`;
|
|
||||||
|
|
||||||
$('modalContent').innerHTML = content;
|
|
||||||
$('modalOverlay').classList.add('active');
|
$('modalOverlay').classList.add('active');
|
||||||
}
|
}
|
||||||
|
function closeModal() { $('modalOverlay').classList.remove('active'); }
|
||||||
function closeModal() {
|
$('modalOverlay').addEventListener('click', e => { if (e.target === $('modalOverlay')) closeModal(); });
|
||||||
$('modalOverlay').classList.remove('active');
|
document.addEventListener('keydown', e => { if (e.key === "Escape") closeModal(); });
|
||||||
}
|
|
||||||
|
|
||||||
// Close on click outside
|
|
||||||
$('modalOverlay').addEventListener('click', e => {
|
|
||||||
if (e.target === $('modalOverlay')) closeModal();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Close on Escape
|
|
||||||
document.addEventListener('keydown', e => {
|
|
||||||
if (e.key === "Escape") closeModal();
|
|
||||||
});
|
|
||||||
|
|
||||||
init();
|
init();
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -71,6 +71,10 @@ def parse_logs():
|
|||||||
model_display = model_part.replace("_", "/", 1)
|
model_display = model_part.replace("_", "/", 1)
|
||||||
else:
|
else:
|
||||||
model_display = model_part
|
model_display = model_part
|
||||||
|
|
||||||
|
# Normalize: Remove _cluster suffix if present so grouping works
|
||||||
|
if model_display.endswith("_cluster"):
|
||||||
|
model_display = model_display[:-8]
|
||||||
|
|
||||||
params_b, quant = extract_meta(model_display)
|
params_b, quant = extract_meta(model_display)
|
||||||
|
|
||||||
@@ -89,7 +93,8 @@ def parse_logs():
|
|||||||
if "throughput" in fname:
|
if "throughput" in fname:
|
||||||
tps = data.get("tokens_per_second", 0)
|
tps = data.get("tokens_per_second", 0)
|
||||||
run = base_run.copy()
|
run = base_run.copy()
|
||||||
run["test"] = f"Throughput (TP{tp})"
|
run["test"] = "Throughput"
|
||||||
|
run["tp"] = tp
|
||||||
run["tps_mean"] = tps
|
run["tps_mean"] = tps
|
||||||
if tps == 0 or (isinstance(data, dict) and "error" in str(data).lower()): # checking if error string is in json dump
|
if tps == 0 or (isinstance(data, dict) and "error" in str(data).lower()): # checking if error string is in json dump
|
||||||
run["error"] = True
|
run["error"] = True
|
||||||
@@ -111,13 +116,15 @@ def parse_logs():
|
|||||||
|
|
||||||
# TTFT
|
# TTFT
|
||||||
r1 = base_run.copy()
|
r1 = base_run.copy()
|
||||||
r1["test"] = f"TTFT (TP{tp}) @ QPS {qps}"
|
r1["test"] = f"TTFT (QPS {qps})"
|
||||||
|
r1["tp"] = tp
|
||||||
r1["tps_mean"] = ttft
|
r1["tps_mean"] = ttft
|
||||||
runs.append(r1)
|
runs.append(r1)
|
||||||
|
|
||||||
# TPOT
|
# TPOT
|
||||||
r2 = base_run.copy()
|
r2 = base_run.copy()
|
||||||
r2["test"] = f"TPOT (TP{tp}) @ QPS {qps}"
|
r2["test"] = f"TPOT (QPS {qps})"
|
||||||
|
r2["tp"] = tp
|
||||||
r2["tps_mean"] = tpot
|
r2["tps_mean"] = tpot
|
||||||
runs.append(r2)
|
runs.append(r2)
|
||||||
|
|
||||||
|
|||||||
+368
-48
@@ -1,18 +1,5 @@
|
|||||||
{
|
{
|
||||||
"runs": [
|
"runs": [
|
||||||
{
|
|
||||||
"model": "Qwen/Qwen3-14B-AWQ",
|
|
||||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "AWQ",
|
|
||||||
"params_b": 14.0,
|
|
||||||
"name_params_b": 14.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tps_mean": 112.69232830266365
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
@@ -24,7 +11,8 @@
|
|||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 278.99494393048457
|
"tp": 1,
|
||||||
|
"tps_mean": 238.92735772921657
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "google/gemma-3-12b-it",
|
"model": "google/gemma-3-12b-it",
|
||||||
@@ -37,7 +25,92 @@
|
|||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 162.71078485804028
|
"tp": 1,
|
||||||
|
"tps_mean": 138.28298716107304
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 83.68710295019198
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-20b",
|
||||||
|
"model_clean": "openai/gpt-oss-20b",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 20.0,
|
||||||
|
"name_params_b": 20.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 185.3668705592303
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-120b",
|
||||||
|
"model_clean": "openai/gpt-oss-120b",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 120.0,
|
||||||
|
"name_params_b": 120.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 70.98811455236003
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "zai-org/GLM-4.7-Flash",
|
||||||
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": null,
|
||||||
|
"name_params_b": null,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 133.7890668441555
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 117.75790760733192
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 86.97761646092924
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
@@ -50,59 +123,134 @@
|
|||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 112.62418795067208
|
"tp": 1,
|
||||||
|
"tps_mean": 71.52372211759099
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 8.0,
|
||||||
|
"name_params_b": 8.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 385.96636603292677
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "google/gemma-3-12b-it",
|
||||||
|
"model_clean": "google/gemma-3-12b-it",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 12.0,
|
||||||
|
"name_params_b": 12.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 244.97127724647316
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 328.49937859629955
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-20b",
|
"model": "openai/gpt-oss-20b",
|
||||||
"model_clean": "openai/gpt-oss-20b",
|
"model_clean": "openai/gpt-oss-20b",
|
||||||
"env": "TP1",
|
"env": "TP2",
|
||||||
"gpu_config": "single",
|
"gpu_config": "dual",
|
||||||
"quant": "BF16",
|
"quant": "BF16",
|
||||||
"params_b": 20.0,
|
"params_b": 20.0,
|
||||||
"name_params_b": 20.0,
|
"name_params_b": 20.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 313.85817605876395
|
"tp": 2,
|
||||||
},
|
"tps_mean": 276.48063742763867
|
||||||
{
|
|
||||||
"model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
|
|
||||||
"model_clean": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "GPTQ",
|
|
||||||
"params_b": 30.0,
|
|
||||||
"name_params_b": 30.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tps_mean": 271.7264154071495
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-120b",
|
"model": "openai/gpt-oss-120b",
|
||||||
"model_clean": "openai/gpt-oss-120b",
|
"model_clean": "openai/gpt-oss-120b",
|
||||||
"env": "TP1",
|
"env": "TP2",
|
||||||
"gpu_config": "single",
|
"gpu_config": "dual",
|
||||||
"quant": "BF16",
|
"quant": "BF16",
|
||||||
"params_b": 120.0,
|
"params_b": 120.0,
|
||||||
"name_params_b": 120.0,
|
"name_params_b": 120.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 109.73523843987172
|
"tp": 2,
|
||||||
|
"tps_mean": 117.11822522607781
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "Qwen/Qwen3-14B-AWQ",
|
"model": "zai-org/GLM-4.7-Flash",
|
||||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||||
"env": "TP1",
|
"env": "TP2",
|
||||||
"gpu_config": "single",
|
"gpu_config": "dual",
|
||||||
"quant": "AWQ",
|
"quant": "BF16",
|
||||||
"params_b": 14.0,
|
"params_b": null,
|
||||||
"name_params_b": 14.0,
|
"name_params_b": null,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 118.62544339374007
|
"tp": 2,
|
||||||
|
"tps_mean": 200.2551557455423
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 170.28562676904787
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 130.40765123003763
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 80.0,
|
||||||
|
"name_params_b": 80.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 98.44491320703105
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
@@ -115,7 +263,8 @@
|
|||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 320.4458308584372
|
"tp": 1,
|
||||||
|
"tps_mean": 241.1478963701438
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "google/gemma-3-12b-it",
|
"model": "google/gemma-3-12b-it",
|
||||||
@@ -128,7 +277,22 @@
|
|||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 275.34859975563967
|
"tp": 1,
|
||||||
|
"tps_mean": 180.0947121704009
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 86.05445772580717
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-20b",
|
"model": "openai/gpt-oss-20b",
|
||||||
@@ -141,7 +305,8 @@
|
|||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 318.9683005103833
|
"tp": 1,
|
||||||
|
"tps_mean": 185.01465145154089
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-120b",
|
"model": "openai/gpt-oss-120b",
|
||||||
@@ -154,7 +319,162 @@
|
|||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tps_mean": 114.91339037290285
|
"tp": 1,
|
||||||
|
"tps_mean": 67.92779311425312
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 121.19378360261106
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 89.76589360109976
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 80.0,
|
||||||
|
"name_params_b": 80.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 70.75103526522766
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 8.0,
|
||||||
|
"name_params_b": 8.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 381.7346838742502
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "google/gemma-3-12b-it",
|
||||||
|
"model_clean": "google/gemma-3-12b-it",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 12.0,
|
||||||
|
"name_params_b": 12.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 263.9412467646666
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 321.2769089381971
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-20b",
|
||||||
|
"model_clean": "openai/gpt-oss-20b",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 20.0,
|
||||||
|
"name_params_b": 20.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 287.95959208958226
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-120b",
|
||||||
|
"model_clean": "openai/gpt-oss-120b",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 120.0,
|
||||||
|
"name_params_b": 120.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 120.90976614975652
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 171.7665512553609
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 131.2917933257513
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 80.0,
|
||||||
|
"name_params_b": 80.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 100.8896816189698
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
Odkázat v novém úkolu
Zablokovat Uživatele