2025-12-20 11:37:06 +00:00
<!DOCTYPE html>
< html lang = "en" >
< head >
< meta charset = "UTF-8" >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0" >
< title > AMD Strix Halo (gfx1151) vLLM Benchmarks< / title >
< style >
: root {
--bg-body : #f9fafb ;
--bg-card : #ffffff ;
--text-main : #111827 ;
--text-muted : #6b7280 ;
--border : #e5e7eb ;
--primary : #ef4444 ;
/* AMD Red-ish */
--primary-bg : #fef2f2 ;
--font-sans : - apple-system , BlinkMacSystemFont , "Segoe UI" , Roboto , "Helvetica Neue" , Arial , sans-serif ;
--font-mono : ui-monospace , SFMono-Regular , Menlo , Monaco , Consolas , monospace ;
}
body {
background-color : var ( - - bg - body ) ;
color : var ( - - text - main ) ;
font-family : var ( - - font - sans ) ;
margin : 0 ;
padding : 20 px ;
line-height : 1.5 ;
}
. container {
max-width : 1000 px ;
margin : 20 px auto ;
}
/* Header */
header {
margin-bottom : 20 px ;
text-align : center ;
}
h1 {
font-size : 2.25 rem ;
font-weight : 800 ;
margin : 0 0 10 px 0 ;
letter-spacing : -0.05 rem ;
}
p . subtitle {
color : var ( - - text - muted ) ;
font-size : 1.1 rem ;
margin : 0 ;
}
/* Controls */
. controls {
display : flex ;
gap : 16 px ;
margin-bottom : 24 px ;
background : var ( - - bg - card ) ;
padding : 16 px ;
border-radius : 12 px ;
box-shadow : 0 1 px 3 px rgba ( 0 , 0 , 0 , 0.05 ) ;
border : 1 px solid var ( - - border ) ;
align-items : center ;
flex-wrap : wrap ;
}
input [ type = "text" ] ,
select {
padding : 10 px 14 px ;
border : 1 px solid var ( - - border ) ;
border-radius : 8 px ;
font-size : 0.95 rem ;
outline : none ;
transition : border-color 0.15 s ;
}
input [ type = "text" ] : focus ,
select : focus {
border-color : var ( - - primary ) ;
box-shadow : 0 0 0 2 px var ( - - primary - bg ) ;
}
. search {
flex : 1 ;
min-width : 200 px ;
}
/* Section Cards */
. section-card {
background : var ( - - bg - card ) ;
border-radius : 12 px ;
box-shadow : 0 1 px 3 px rgba ( 0 , 0 , 0 , 0.05 ) ;
border : 1 px solid var ( - - border ) ;
margin-bottom : 32 px ;
overflow : hidden ;
}
. section-header {
padding : 12 px 16 px ;
border-bottom : 1 px solid var ( - - border ) ;
background : #fcfcfc ;
display : flex ;
justify-content : space-between ;
align-items : center ;
}
. section-header h2 {
margin : 0 ;
font-size : 1.1 rem ;
font-weight : 600 ;
}
/* Table */
. table-responsive {
overflow-x : auto ;
}
table {
width : 100 % ;
border-collapse : collapse ;
font-size : 0.95 rem ;
}
th ,
td {
padding : 8 px 12 px ;
text-align : left ;
border-bottom : 1 px solid var ( - - border ) ;
}
th {
background : #f9fafb ;
color : var ( - - text - muted ) ;
font-weight : 600 ;
font-size : 0.75 rem ;
text-transform : uppercase ;
letter-spacing : 0.05 em ;
}
tr : last-child td {
border-bottom : none ;
}
/* Columns */
. col-model {
width : auto ;
}
. col-data {
text-align : right ;
width : 1 % ;
white-space : nowrap ;
font-family : var ( - - font - mono ) ;
font-feature-settings : "tnum" ;
font-variant-numeric : tabular-nums ;
}
/* Model Cell Styling */
. model-cell {
display : flex ;
flex-direction : column ;
}
. model-name {
font-weight : 600 ;
color : var ( - - text - main ) ;
}
. model-meta {
font-size : 0.8 rem ;
color : var ( - - text - muted ) ;
margin-top : 4 px ;
display : flex ;
gap : 8 px ;
align-items : center ;
}
/* Tags */
. tag {
display : inline-block ;
padding : 2 px 6 px ;
border-radius : 4 px ;
background : #f3f4f6 ;
color : #4b5563 ;
font-size : 0.7 rem ;
font-weight : 500 ;
}
/* Data Styling */
. val {
font-weight : 600 ;
}
. val-na {
color : #d1d5db ;
font-weight : 400 ;
}
. highlight {
color : var ( - - primary ) ;
}
/* Modal/Overlay */
# loading {
text-align : center ;
padding : 40 px ;
color : var ( - - text - muted ) ;
}
/* Modal Styles */
. modal-overlay {
position : fixed ;
top : 0 ;
left : 0 ;
width : 100 % ;
height : 100 % ;
background : rgba ( 0 , 0 , 0 , 0.5 ) ;
display : flex ;
justify-content : center ;
align-items : center ;
z-index : 1000 ;
opacity : 0 ;
pointer-events : none ;
transition : opacity 0.2 s ease ;
}
. modal-overlay . active {
opacity : 1 ;
pointer-events : auto ;
}
. modal {
background : var ( - - bg - card ) ;
width : 90 % ;
max-width : 600 px ;
border-radius : 12 px ;
box-shadow : 0 10 px 25 px rgba ( 0 , 0 , 0 , 0.1 ) ;
display : flex ;
flex-direction : column ;
max-height : 85 vh ;
overflow : hidden ;
}
. modal-header {
padding : 20 px 24 px ;
border-bottom : 1 px solid var ( - - border ) ;
display : flex ;
justify-content : space-between ;
align-items : center ;
background : #f9fafb ;
}
. modal-header h3 {
margin : 0 ;
font-size : 1.25 rem ;
}
. modal-close {
background : none ;
border : none ;
font-size : 1.5 rem ;
cursor : pointer ;
color : var ( - - text - muted ) ;
line-height : 1 ;
}
. modal-body {
padding : 24 px ;
overflow-y : auto ;
}
. modal-section {
margin-bottom : 24 px ;
}
. modal-section h4 {
margin : 0 0 8 px 0 ;
font-size : 0.9 rem ;
text-transform : uppercase ;
color : var ( - - text - muted ) ;
letter-spacing : 0.05 em ;
}
. modal-section p {
margin : 0 ;
font-size : 0.95 rem ;
color : var ( - - text - main ) ;
}
. code-block {
background : #f3f4f6 ;
padding : 12 px ;
border-radius : 6 px ;
font-family : var ( - - font - mono ) ;
font-size : 0.85 rem ;
color : #374151 ;
margin-top : 8 px ;
white-space : pre-wrap ;
}
/* Help Button */
. btn-help {
background : none ;
border : 1 px solid var ( - - border ) ;
color : var ( - - text - muted ) ;
width : 24 px ;
height : 24 px ;
border-radius : 50 % ;
display : inline-flex ;
align-items : center ;
justify-content : center ;
font-size : 0.85 rem ;
font-weight : 600 ;
cursor : pointer ;
margin-left : 10 px ;
transition : all 0.2 s ;
}
. btn-help : hover {
border-color : var ( - - primary ) ;
color : var ( - - primary ) ;
background : var ( - - primary - bg ) ;
}
. section-title-row {
display : flex ;
align-items : center ;
}
. section-desc {
color : var ( - - text - muted ) ;
font-size : 0.9 rem ;
font-weight : 400 ;
margin-left : 12 px ;
}
/* Footer */
footer {
margin-top : 60 px ;
padding-top : 20 px ;
border-top : 1 px solid var ( - - border ) ;
color : var ( - - text - muted ) ;
font-size : 0.85 rem ;
line-height : 1.6 ;
}
. sys-config {
display : flex ;
flex-direction : column ;
gap : 8 px ;
margin-top : 12 px ;
max-width : 800 px ;
}
. sys-item {
display : grid ;
grid-template-columns : 140 px 1 fr ;
align-items : baseline ;
}
. sys-label {
font-weight : 600 ;
font-size : 0.75 rem ;
text-transform : uppercase ;
letter-spacing : 0.05 em ;
color : #9ca3af ;
}
/* Tabs */
. tab-nav {
display : flex ;
gap : 8 px ;
margin-bottom : 24 px ;
border-bottom : 1 px solid var ( - - border ) ;
padding-bottom : 0 px ;
}
. tab-btn {
background : none ;
border : none ;
padding : 12 px 20 px ;
font-size : 1 rem ;
font-weight : 500 ;
color : var ( - - text - muted ) ;
cursor : pointer ;
border-bottom : 2 px solid transparent ;
transition : all 0.2 s ;
}
. tab-btn : hover {
color : var ( - - text - main ) ;
}
. tab-btn . active {
color : var ( - - primary ) ;
border-bottom-color : var ( - - primary ) ;
font-weight : 600 ;
}
< / style >
< / head >
< body >
< div class = "container" >
< header >
< h1 > AMD Strix Halo (gfx1151) vLLM Benchmarks< / h1 >
< p style = "margin: 4px 0 0 0; font-size: 0.9rem;" >
< a href = "https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/" target = "_blank"
style = "color: var(--primary); text-decoration: none;" > View on GitHub → < / a >
< / p >
< / header >
< div class = "controls" >
< input type = "text" id = "searchInput" class = "search" placeholder = "Search models (e.g. 'llama', 'fp8')..."
autocomplete = "off" >
< select id = "quantFilter" >
< option value = "" > All Quantizations< / option >
< / select >
< / div >
< nav id = "tabNav" class = "tab-nav" >
<!-- Dynamic Tabs -->
< / nav >
< div id = "dashboard" >
< div id = "loading" > Loading benchmark results...< / div >
< / div >
< footer >
< div style = "font-weight: 600; margin-bottom: 8px;" > System Configuration< / div >
< div class = "sys-config" >
< div class = "sys-item" >
< span class = "sys-label" > System< / span >
< span > Framework Desktop · AMD Ryzen AI MAX 395+ · 128GB unified RAM< / span >
< / div >
< div class = "sys-item" >
< span class = "sys-label" > OS/Kernel< / span >
< span > Fedora 42 · Linux 6.18.0-0.rc6.243.vanilla.fc42.x86_64< / span >
< / div >
< / div >
< / footer >
< / div >
<!-- Modal Overlay -->
< div id = "modalOverlay" class = "modal-overlay" >
< div class = "modal" >
< div class = "modal-header" >
< h3 id = "modalTitle" > Benchmark Info< / h3 >
< button class = "modal-close" onclick = "closeModal()" > × < / button >
< / div >
< div class = "modal-body" id = "modalContent" >
<!-- Dynamic Content -->
< / div >
< / div >
< / div >
< script >
// State
let rawRuns = [ ] ;
let tests = [ ] ;
let state = {
search : "" ,
quant : "" ,
activeTab : "Throughput"
} ;
// Benchmark Metadata
const BENCHMARK _INFO = {
"Throughput" : {
short : "Maximum raw compute capacity (Tokens/Sec)." ,
desc : "Measures the absolute maximum number of tokens the system can generate per second by fully saturating the GPU compute capability." ,
usecase : "Demonstrates the raw horsepower and architectural efficiency of the hardware/model combo under Heavy Load. This is the theoretical speed limit of the system." ,
details : "Command: `vllm bench throughput`\nParams: --num-prompts 100 --output-len 512\nMetric: Tokens per Second (higher is better)." ,
unit : " tok/s"
} ,
"TTFT" : {
short : "Time To First Token (Response Latency)." ,
desc : "The 'Time To First Token' is the delay between sending a request and seeing the first character of the response." ,
usecase : "<b>Responsiveness</b>. Low TTFT makes the AI feel 'snappy' and instant. High TTFT feels like the AI is ignoring you or lagging. We measure at different QPS loads to ensure the server doesn't 'choke' when busy." ,
context : "<b>QPS = Queries Per Second (Traffic Load)</b>.<br>• QPS 1.0 = 1 user sending a request every second.<br>• QPS 4.0 = 4 users sending requests every second (Simulates High Load)." ,
details : "Command: `vllm bench serve`\nParams: --random-input-len 1024 --random-output-len 512\nMetric: Milliseconds (lower is better)." ,
unit : " ms"
} ,
"TPOT" : {
short : "Time Per Output Token (Streaming Speed)." ,
desc : "The 'Time Per Output Token' measures how fast the text generates *after* the first token appears." ,
usecase : "<b>1. Fluidity</b>: Industry standard is <50ms (>20 tok/s) for a 'fluid' feeling. Slower feels laggy.<br><b>2. Bottlenecks</b>: We test at <b>QPS 4.0</b> to find memory bandwidth bottlenecks where the GPU can't keep up with multiple users." ,
context : "<b>QPS = Queries Per Second (Traffic Load)</b>.<br>• QPS 1.0 = Light Load (Ideal conditions)<br>• QPS 4.0 = Heavy Load (Stress Test)" ,
details : "Command: `vllm bench serve`\nParams: --random-input-len 1024 --random-output-len 512\nMetric: Milliseconds (lower is better)." ,
unit : " ms"
}
} ;
const $ = id => document . getElementById ( id ) ;
async function init ( ) {
try {
const res = await fetch ( 'results.json' ) ;
const data = await res . json ( ) ;
rawRuns = data . runs || [ ] ;
processData ( ) ;
render ( ) ;
populateFilters ( ) ;
} catch ( e ) {
$ ( 'loading' ) . textContent = "Error loading results.json: " + e . message ;
console . error ( e ) ;
}
}
function processData ( ) {
const testGroups = { } ;
rawRuns . forEach ( run => {
if ( ! run . test ) return ;
if ( ! testGroups [ run . test ] ) {
testGroups [ run . test ] = {
name : run . test ,
models : { }
} ;
}
// Normalize model name
const modelName = run . model _clean || run . model ;
if ( ! testGroups [ run . test ] . models [ modelName ] ) {
testGroups [ run . test ] . models [ modelName ] = {
name : modelName ,
quant : run . quant ,
params : run . params _b || run . name _params _b ,
2025-12-20 11:49:03 +00:00
triton : null ,
rocm : null
2025-12-20 11:37:06 +00:00
} ;
}
const m = testGroups [ run . test ] . models [ modelName ] ;
2025-12-20 11:49:03 +00:00
// Assign Backend value
if ( run . backend === "Triton" ) m . triton = run . tps _mean ;
if ( run . backend === "ROCm" ) m . rocm = run . tps _mean ;
2025-12-20 11:37:06 +00:00
} ) ;
// Convert map to array for sorting
tests = Object . values ( testGroups ) . map ( group => {
return {
name : group . name ,
models : Object . values ( group . models )
} ;
} ) ;
// Sort tests: Throughput first, then others alphabetically
tests . sort ( ( a , b ) => {
if ( a . name === "Throughput" ) return - 1 ;
if ( b . name === "Throughput" ) return 1 ;
return a . name . localeCompare ( b . name ) ;
} ) ;
// Set default tab if not set
if ( ! state . activeTab && tests . length > 0 ) {
state . activeTab = tests [ 0 ] . name ;
}
}
function populateFilters ( ) {
const quants = new Set ( rawRuns . map ( r => r . quant ) . filter ( Boolean ) ) ;
const sel = $ ( 'quantFilter' ) ;
[ ... quants ] . sort ( ) . forEach ( q => {
const opt = document . createElement ( 'option' ) ;
opt . value = q ;
opt . textContent = q ;
sel . appendChild ( opt ) ;
} ) ;
$ ( 'searchInput' ) . addEventListener ( 'input' , e => {
state . search = e . target . value . toLowerCase ( ) ;
render ( ) ;
} ) ;
sel . addEventListener ( 'change' , e => {
state . quant = e . target . value ;
render ( ) ;
} ) ;
}
function getBenchmarkMeta ( testName ) {
if ( testName . includes ( "Throughput" ) ) return BENCHMARK _INFO [ "Throughput" ] ;
if ( testName . includes ( "TTFT" ) ) return BENCHMARK _INFO [ "TTFT" ] ;
if ( testName . includes ( "TPOT" ) ) return BENCHMARK _INFO [ "TPOT" ] ;
return null ;
}
function render ( ) {
const container = $ ( 'dashboard' ) ;
const tabNav = $ ( 'tabNav' ) ;
// Render Tabs
tabNav . innerHTML = "" ;
tests . forEach ( test => {
const btn = document . createElement ( 'button' ) ;
btn . className = ` tab-btn ${ test . name === state . activeTab ? 'active' : '' } ` ;
btn . textContent = test . name ;
btn . onclick = ( ) => {
state . activeTab = test . name ;
render ( ) ;
} ;
tabNav . appendChild ( btn ) ;
} ) ;
// Ensure active tab exists (if search filtered it out logic?)
// Actually tabs are based on 'tests' which is processed from raw data, so they exist regardless of filters unless we want to hide tabs with no results.
// For now, let's keep tabs static based on available data types.
container . innerHTML = "" ;
// Find active test
const activeTest = tests . find ( t => t . name === state . activeTab ) ;
if ( ! activeTest ) {
// If invalid tab (e.g. on first load if default doesn't exist), switch to first
if ( tests . length > 0 ) {
state . activeTab = tests [ 0 ] . name ;
// Re-render immediately
setTimeout ( render , 0 ) ;
}
container . innerHTML = '<div id="loading">No data available.</div>' ;
return ;
}
// Render Active Tab Content
const test = activeTest ;
// Filter models within this test
const models = test . models . filter ( m => {
const s = state . search ;
const matchSearch = ! s || m . name . toLowerCase ( ) . includes ( s ) ;
const q = state . quant ;
const matchQuant = ! q || m . quant === q ;
return matchSearch && matchQuant ;
} ) ;
if ( models . length === 0 ) {
container . innerHTML = '<div id="loading">No models match current filters in this category.</div>' ;
return ;
}
// Sorting models by size (small to large), then name
models . sort ( ( a , b ) => {
const pA = parseFloat ( a . params ) || 0 ;
const pB = parseFloat ( b . params ) || 0 ;
if ( pA !== pB ) return pA - pB ;
return a . name . localeCompare ( b . name ) ;
} ) ;
const card = document . createElement ( 'div' ) ;
card . className = "section-card" ;
// Metadata resolution
const meta = getBenchmarkMeta ( test . name ) ;
const shortDesc = meta ? ` <span class="section-desc"> ${ meta . short } </span> ` : "" ;
const helpBtn = meta ? ` <button class="btn-help" onclick="openModal(' ${ test . name } ')">?</button> ` : "" ;
const header = document . createElement ( 'div' ) ;
header . className = "section-header" ;
header . innerHTML = `
<div class="section-title-row">
<h2> ${ test . name } </h2>
${ helpBtn }
</div>
${ shortDesc }
` ;
card . appendChild ( header ) ;
const tableResp = document . createElement ( 'div' ) ;
tableResp . className = "table-responsive" ;
const table = document . createElement ( 'table' ) ;
const thead = document . createElement ( 'thead' ) ;
thead . innerHTML = `
<tr>
<th class="col-model">Model</th>
2025-12-20 11:49:03 +00:00
<th class="col-data">Triton Attention</th>
<th class="col-data">ROCm Attention</th>
2025-12-20 11:37:06 +00:00
</tr>
` ;
table . appendChild ( thead ) ;
const tbody = document . createElement ( 'tbody' ) ;
models . forEach ( m => {
const tr = document . createElement ( 'tr' ) ;
// Meta tags
let metaHtml = "" ;
if ( m . quant ) metaHtml += ` <span class="tag"> ${ m . quant } </span> ` ;
if ( m . params ) metaHtml += ` <span class="tag"> ${ m . params } B</span> ` ;
// Values
// Pass unit from meta
const unit = meta ? meta . unit : "" ;
2025-12-20 11:49:03 +00:00
const val1 = formatVal ( m . triton , unit ) ;
// Special handling for ROCm column where we want 'X' for crashes/missing if Triton has data
let val2 ;
if ( ( m . rocm === null || m . rocm === 0 ) && m . triton > 0 ) {
val2 = '<span class="val-na" style="color: #ef4444; font-weight:bold;">X</span>' ;
} else {
val2 = formatVal ( m . rocm , unit ) ;
}
2025-12-20 11:37:06 +00:00
tr . innerHTML = `
<td>
<div class="model-cell">
<a href="https://huggingface.co/ ${ m . name } " target="_blank" class="model-name" style="text-decoration: none; color: inherit; border-bottom: 1px dotted #ccc;"> ${ m . name } </a>
<div class="model-meta"> ${ metaHtml } </div>
</div>
</td>
<td class="col-data"> ${ val1 } </td>
2025-12-20 11:49:03 +00:00
<td class="col-data"> ${ val2 } </td>
2025-12-20 11:37:06 +00:00
` ;
tbody . appendChild ( tr ) ;
} ) ;
table . appendChild ( tbody ) ;
tableResp . appendChild ( table ) ;
card . appendChild ( tableResp ) ;
container . appendChild ( card ) ;
}
function formatVal ( v , unit ) {
if ( v === null || v === undefined ) return '<span class="val-na">N/A</span>' ;
if ( v === 0 ) return '<span class="val-na">FAIL</span>' ;
return ` <span class="val"> ${ v . toFixed ( 2 ) } <span style="font-size:0.8em; color:#888;"> ${ unit } </span></span> ` ;
}
// Modal Logic
function openModal ( testName ) {
const meta = getBenchmarkMeta ( testName ) ;
if ( ! meta ) return ;
$ ( 'modalTitle' ) . textContent = testName ;
let content = `
<div class="modal-section">
<h4>What is this?</h4>
<p> ${ meta . desc } </p>
</div>
<div class="modal-section">
<h4>Why it matters?</h4>
<p> ${ meta . usecase } </p>
</div> ` ;
if ( meta . context ) {
content += `
<div class="modal-section">
<h4>Terminology</h4>
<p> ${ meta . context } </p>
</div> ` ;
}
content += `
<div class="modal-section">
<h4>Technical Details</h4>
<div class="code-block"> ${ meta . details } </div>
</div>
` ;
$ ( 'modalContent' ) . innerHTML = content ;
$ ( 'modalOverlay' ) . classList . add ( 'active' ) ;
}
function closeModal ( ) {
$ ( 'modalOverlay' ) . classList . remove ( 'active' ) ;
}
// Close on click outside
$ ( 'modalOverlay' ) . addEventListener ( 'click' , e => {
if ( e . target === $ ( 'modalOverlay' ) ) closeModal ( ) ;
} ) ;
// Close on Escape
document . addEventListener ( 'keydown' , e => {
if ( e . key === "Escape" ) closeModal ( ) ;
} ) ;
init ( ) ;
< / script >
< / body >
< / html >