Files
amd-strix-halo-vllm-toolboxes/docs/index.html
T

855 rindas
28 KiB
HTML
Neapstrādāts Vainot Vēsture

Šis fails satur neviennozīmīgus unikoda simbolus
Šis fails satur unikoda simbolus, kas var tikt sajauktas ar citām rakstzīmēm. Ja šķiet, ka tas ir ar nolūku, šo brīdinājumu var droši neņemt vērā. Jāizmanto atsoļa taustiņš (Esc), lai atklātu tās.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AMD Strix Halo (gfx1151) vLLM Benchmarks</title>
<style>
:root {
--bg-body: #f9fafb;
--bg-card: #ffffff;
--text-main: #111827;
--text-muted: #6b7280;
--border: #e5e7eb;
--primary: #ef4444;
/* AMD Red-ish */
--primary-bg: #fef2f2;
--font-sans: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
--font-mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;
}
body {
background-color: var(--bg-body);
color: var(--text-main);
font-family: var(--font-sans);
margin: 0;
padding: 20px;
line-height: 1.5;
}
.container {
max-width: 1000px;
margin: 20px auto;
}
/* Header */
header {
margin-bottom: 20px;
text-align: center;
}
h1 {
font-size: 2.25rem;
font-weight: 800;
margin: 0 0 10px 0;
letter-spacing: -0.05rem;
}
p.subtitle {
color: var(--text-muted);
font-size: 1.1rem;
margin: 0;
}
/* Controls */
.controls {
display: flex;
gap: 16px;
margin-bottom: 24px;
background: var(--bg-card);
padding: 16px;
border-radius: 12px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
border: 1px solid var(--border);
align-items: center;
flex-wrap: wrap;
}
input[type="text"],
select {
padding: 10px 14px;
border: 1px solid var(--border);
border-radius: 8px;
font-size: 0.95rem;
outline: none;
transition: border-color 0.15s;
}
input[type="text"]:focus,
select:focus {
border-color: var(--primary);
box-shadow: 0 0 0 2px var(--primary-bg);
}
.search {
flex: 1;
min-width: 200px;
}
/* Section Cards */
.section-card {
background: var(--bg-card);
border-radius: 12px;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
border: 1px solid var(--border);
margin-bottom: 32px;
overflow: hidden;
}
.section-header {
padding: 12px 16px;
border-bottom: 1px solid var(--border);
background: #fcfcfc;
display: flex;
justify-content: space-between;
align-items: center;
}
.section-header h2 {
margin: 0;
font-size: 1.1rem;
font-weight: 600;
}
/* Table */
.table-responsive {
overflow-x: auto;
}
table {
width: 100%;
border-collapse: collapse;
font-size: 0.95rem;
}
th,
td {
padding: 8px 12px;
text-align: left;
border-bottom: 1px solid var(--border);
}
th {
background: #f9fafb;
color: var(--text-muted);
font-weight: 600;
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
}
tr:last-child td {
border-bottom: none;
}
/* Columns */
.col-model {
width: auto;
}
.col-data {
text-align: right;
width: 1%;
white-space: nowrap;
font-family: var(--font-mono);
font-feature-settings: "tnum";
font-variant-numeric: tabular-nums;
}
/* Model Cell Styling */
.model-cell {
display: flex;
flex-direction: column;
}
.model-name {
font-weight: 600;
color: var(--text-main);
}
.model-meta {
font-size: 0.8rem;
color: var(--text-muted);
margin-top: 4px;
display: flex;
gap: 8px;
align-items: center;
}
/* Tags */
.tag {
display: inline-block;
padding: 2px 6px;
border-radius: 4px;
background: #f3f4f6;
color: #4b5563;
font-size: 0.7rem;
font-weight: 500;
}
/* Data Styling */
.val {
font-weight: 600;
}
.val-na {
color: #d1d5db;
font-weight: 400;
}
.highlight {
color: var(--primary);
}
/* Diff Styling */
.val-pos {
color: #16a34a;
font-weight: 600;
}
.val-neg {
color: #dc2626;
font-weight: 600;
}
.val-neu {
color: #9ca3af;
}
.col-diff {
background: #f9fafb;
font-size: 0.9rem;
}
/* Modal/Overlay */
#loading {
text-align: center;
padding: 40px;
color: var(--text-muted);
}
/* Modal Styles */
.modal-overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.5);
display: flex;
justify-content: center;
align-items: center;
z-index: 1000;
opacity: 0;
pointer-events: none;
transition: opacity 0.2s ease;
}
.modal-overlay.active {
opacity: 1;
pointer-events: auto;
}
.modal {
background: var(--bg-card);
width: 90%;
max-width: 600px;
border-radius: 12px;
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.1);
display: flex;
flex-direction: column;
max-height: 85vh;
overflow: hidden;
}
.modal-header {
padding: 20px 24px;
border-bottom: 1px solid var(--border);
display: flex;
justify-content: space-between;
align-items: center;
background: #f9fafb;
}
.modal-header h3 {
margin: 0;
font-size: 1.25rem;
}
.modal-close {
background: none;
border: none;
font-size: 1.5rem;
cursor: pointer;
color: var(--text-muted);
line-height: 1;
}
.modal-body {
padding: 24px;
overflow-y: auto;
}
.modal-section {
margin-bottom: 24px;
}
.modal-section h4 {
margin: 0 0 8px 0;
font-size: 0.9rem;
text-transform: uppercase;
color: var(--text-muted);
letter-spacing: 0.05em;
}
.modal-section p {
margin: 0;
font-size: 0.95rem;
color: var(--text-main);
}
.code-block {
background: #f3f4f6;
padding: 12px;
border-radius: 6px;
font-family: var(--font-mono);
font-size: 0.85rem;
color: #374151;
margin-top: 8px;
white-space: pre-wrap;
}
/* Help Button */
.btn-help {
background: none;
border: 1px solid var(--border);
color: var(--text-muted);
width: 24px;
height: 24px;
border-radius: 50%;
display: inline-flex;
align-items: center;
justify-content: center;
font-size: 0.85rem;
font-weight: 600;
cursor: pointer;
margin-left: 10px;
transition: all 0.2s;
}
.btn-help:hover {
border-color: var(--primary);
color: var(--primary);
background: var(--primary-bg);
}
.section-title-row {
display: flex;
align-items: center;
}
.section-desc {
color: var(--text-muted);
font-size: 0.9rem;
font-weight: 400;
margin-left: 12px;
}
/* Info Box for Cluster */
.info-box {
background: #eff6ff;
border: 1px solid #bfdbfe;
border-radius: 8px;
padding: 16px;
margin-bottom: 24px;
display: flex;
align-items: flex-start;
gap: 12px;
color: #1e40af;
font-size: 0.95rem;
}
.info-box a {
color: #1d4ed8;
font-weight: 600;
text-decoration: underline;
}
/* Footer */
footer {
margin-top: 60px;
padding-top: 20px;
border-top: 1px solid var(--border);
color: var(--text-muted);
font-size: 0.85rem;
line-height: 1.6;
}
.sys-config {
display: flex;
flex-direction: column;
gap: 8px;
margin-top: 12px;
max-width: 800px;
}
.sys-item {
display: grid;
grid-template-columns: 140px 1fr;
align-items: baseline;
}
.sys-label {
font-weight: 600;
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #9ca3af;
}
/* Tabs */
.tab-nav {
display: flex;
gap: 8px;
margin-bottom: 24px;
border-bottom: 1px solid var(--border);
padding-bottom: 0px;
}
.tab-btn {
background: none;
border: none;
padding: 12px 20px;
font-size: 1rem;
font-weight: 500;
color: var(--text-muted);
cursor: pointer;
border-bottom: 2px solid transparent;
transition: all 0.2s;
}
.tab-btn:hover {
color: var(--text-main);
}
.tab-btn.active {
color: var(--primary);
border-bottom-color: var(--primary);
font-weight: 600;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>AMD Strix Halo (gfx1151) vLLM Benchmarks</h1>
<p style="margin: 4px 0 0 0; font-size: 0.9rem;">
<a href="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/" target="_blank"
style="color: var(--primary); text-decoration: none;">View on GitHub &rarr;</a>
</p>
</header>
<div class="controls">
<input type="text" id="searchInput" class="search" placeholder="Search models..." autocomplete="off">
<select id="quantFilter" style="max-width: 150px;">
<option value="">All Quants</option>
</select>
<!-- Toggles -->
<div
style="display: flex; gap: 12px; align-items: center; border-left: 1px solid #e5e7eb; padding-left: 12px;">
<label
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
<input type="checkbox" id="toggleTP1" checked> TP1
</label>
<label
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
<input type="checkbox" id="toggleTP2" checked> TP2
</label>
</div>
<!-- Attention Group -->
<div
style="display: flex; align-items: center; gap: 8px; border-left: 1px solid #e5e7eb; padding-left: 12px;">
<span
style="font-size: 0.8rem; font-weight: 600; text-transform: uppercase; color: #9ca3af; letter-spacing: 0.05em;">Attention</span>
<div style="display: flex; gap: 12px;">
<label
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
<input type="checkbox" id="toggleTriton" checked> Triton
</label>
<label
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
<input type="checkbox" id="toggleRocm"> ROCm
</label>
</div>
</div>
</div>
<nav id="tabNav" class="tab-nav">
<!-- Dynamic Tabs -->
</nav>
<div id="dashboard">
<div id="loading">Loading benchmark results...</div>
</div>
<footer>
<div style="font-weight: 600; margin-bottom: 8px;">System Configuration</div>
<div class="sys-config">
<div class="sys-item">
<span class="sys-label">Node 1 & 2</span>
<span>Framework Desktop Mainboard · AMD Ryzen AI MAX 395+ (Strix Halo) · 128GB Unified RAM</span>
</div>
<div class="sys-item">
<span class="sys-label">OS/Kernel</span>
<span>Fedora 43 (Rawhide) · Linux 6.18.5-200.fc43.x86_64</span>
</div>
<div class="sys-item">
<span class="sys-label">Interconnect</span>
<span>RDMA (RoCE v2) via Intel E810 (Direct Attach) · ~5µs Latency</span>
</div>
</div>
</footer>
</div>
<!-- Modal Overlay -->
<div id="modalOverlay" class="modal-overlay">
<!-- ... modal content ... -->
<div class="modal">
<div class="modal-header">
<h3 id="modalTitle">Benchmark Info</h3>
<button class="modal-close" onclick="closeModal()">×</button>
</div>
<div class="modal-body" id="modalContent">
<!-- Dynamic Content -->
</div>
</div>
</div>
<!-- Script Logic Updates Below -->
<script>
// Helper - Defined at top to avoid ReferenceError
const $ = id => document.getElementById(id);
// State
let rawRuns = [];
let tests = [];
let state = {
search: "",
quant: "",
activeTab: "Throughput",
showTP1: true,
showTP2: true,
showTriton: true,
showRocm: false
};
// Metadata
const BENCHMARK_INFO = {
"Throughput": {
short: "Maximum raw compute capacity (Tokens/Sec).",
desc: "Measures the absolute maximum number of tokens the system can generate per second by fully saturating the GPU compute capability.",
usecase: "Demonstrates the raw horsepower and architectural efficiency.",
details: `
**Test Configuration:**
• <b>Dataset:</b> ShareGPT (Random Sample, 100 Prompts)
• <b>Output Length:</b> 512 Tokens (Fixed)
• <b>Batch Budget:</b> 8192 - 32768 Tokens (Dynamic per model)
• <b>GPU Alloc:</b> 90% VRAM per GPU
• <b>Pipeline:</b> <code>vllm bench throughput</code> (Offline)
• <b>Cluster Config:</b> Ray Distributed (RoCE v2 RDMA, TP=2)
<b>Metric:</b> Tokens per Second (higher is better).`,
unit: " tok/s"
},
"TTFT": {
short: "Time To First Token (Response Latency).",
desc: "Delay between sending a request and seeing the first character.",
usecase: "Responsiveness. Low TTFT makes the AI feel 'snappy'.",
details: "Command: `vllm bench serve`\nMetric: Milliseconds (lower is better).",
unit: " ms"
},
"TPOT": {
short: "Time Per Output Token (Streaming Speed).",
desc: "Measures how fast the text generates *after* the first token.",
usecase: "Fluidity. Industry standard is <50ms (>20 tok/s).",
details: "Command: `vllm bench serve`\nMetric: Milliseconds (lower is better).",
unit: " ms"
}
};
async function init() {
try {
const res = await fetch('results.json');
const data = await res.json();
rawRuns = data.runs || [];
processData();
setupControls();
render();
} catch (e) {
$('loading').textContent = "Error loading results.json: " + e.message;
console.error(e);
}
}
function setupControls() {
// Filters
const quants = new Set(rawRuns.map(r => r.quant).filter(Boolean));
const sel = $('quantFilter');
[...quants].sort().forEach(q => {
const opt = document.createElement('option');
opt.value = q;
opt.textContent = q;
sel.appendChild(opt);
});
$('searchInput').addEventListener('input', e => { state.search = e.target.value.toLowerCase(); render(); });
sel.addEventListener('change', e => { state.quant = e.target.value; render(); });
// Toggles
$('toggleTP1').addEventListener('change', e => { state.showTP1 = e.target.checked; render(); });
$('toggleTP2').addEventListener('change', e => { state.showTP2 = e.target.checked; render(); });
$('toggleTriton').addEventListener('change', e => { state.showTriton = e.target.checked; render(); });
$('toggleRocm').addEventListener('change', e => { state.showRocm = e.target.checked; render(); });
}
function processData() {
const testGroups = {};
rawRuns.forEach(run => {
let testName = run.test;
if (!testGroups[testName]) {
testGroups[testName] = { name: testName, models: {} };
}
const modelName = run.model_clean || run.model;
if (!testGroups[testName].models[modelName]) {
testGroups[testName].models[modelName] = {
name: modelName,
quant: run.quant,
params: run.params_b || run.name_params_b,
results: {
1: { triton: null, rocm: null },
2: { triton: null, rocm: null }
}
};
}
const m = testGroups[testName].models[modelName];
const tp = run.tp || 1;
if (!m.results[tp]) m.results[tp] = { triton: null, rocm: null };
if (run.backend === "Triton") m.results[tp].triton = run.tps_mean;
if (run.backend === "ROCm") m.results[tp].rocm = run.tps_mean;
});
tests = Object.values(testGroups).map(g => ({
name: g.name,
models: Object.values(g.models)
}));
tests.sort((a, b) => {
const aTp = a.name.includes("Throughput");
const bTp = b.name.includes("Throughput");
if (aTp && !bTp) return -1;
if (!aTp && bTp) return 1;
return a.name.localeCompare(b.name);
});
if (tests.length > 0) state.activeTab = tests[0].name;
}
function formatVal(v, unit) {
if (v === null || v === undefined) return '<span class="val-na">-</span>';
if (v === 0) return '<span class="val-na" style="color:#ef4444;font-weight:bold;">X</span>';
return `<span class="val">${v.toFixed(2)}<span style="font-size:0.75em; color:#9ca3af; margin-left:2px;">${unit}</span></span>`;
}
function getMeta(name) {
if (name.includes("Throughput")) return BENCHMARK_INFO["Throughput"];
if (name.includes("TTFT")) return BENCHMARK_INFO["TTFT"];
if (name.includes("TPOT")) return BENCHMARK_INFO["TPOT"];
return { short: "", desc: "", unit: "" };
}
function render() {
const container = $('dashboard');
const tabNav = $('tabNav');
// Render Tabs
tabNav.innerHTML = "";
tests.forEach(test => {
const btn = document.createElement('button');
btn.className = `tab-btn ${test.name === state.activeTab ? 'active' : ''}`;
btn.textContent = test.name;
btn.onclick = () => { state.activeTab = test.name; render(); };
tabNav.appendChild(btn);
});
container.innerHTML = "";
const activeTest = tests.find(t => t.name === state.activeTab);
if (!activeTest) {
container.innerHTML = '<div id="loading">No Data</div>';
return;
}
// Simplified Info Box logic
if (state.showTP2) {
const infoBox = document.createElement('div');
infoBox.style.cssText = "background:#f8fafc; border:1px solid #e2e8f0; border-radius:6px; padding:10px 16px; margin-bottom:20px; font-size:0.9rem; color:#64748b; display:flex; justify-content:space-between; align-items:center;";
infoBox.innerHTML = `
<span><b>TP2</b> = Distributed Cluster (2x Strix Halo, RDMA RoCE v2).</span>
<a href="https://github.com/kyuz0/amd-strix-halo-vllm-toolboxes/blob/main/rdma_cluster/setup_guide.md" target="_blank" style="color:#3b82f6; text-decoration:none; font-weight:500;">Cluster Setup Guide &rarr;</a>
`;
container.appendChild(infoBox);
}
// Models Filter & Sort
const models = activeTest.models.filter(m => {
const matchS = !state.search || m.name.toLowerCase().includes(state.search);
const matchQ = !state.quant || m.quant === state.quant;
return matchS && matchQ;
}).sort((a, b) => (parseFloat(a.params) || 0) - (parseFloat(b.params) || 0) || a.name.localeCompare(b.name));
// Create Table
const card = document.createElement('div');
card.className = "section-card";
const meta = getMeta(activeTest.name);
const unit = meta.unit || "";
// Header
const header = document.createElement('div');
header.className = "section-header";
header.innerHTML = `
<div class="section-title-row">
<h2>${activeTest.name}</h2>
<button class="btn-help" onclick="openModal('${activeTest.name}')">?</button>
</div>
<span class="section-desc">${meta.short}</span>
`;
card.appendChild(header);
// Table
const tableResp = document.createElement('div');
tableResp.className = "table-responsive";
const table = document.createElement('table');
// Build Dynamic Columns
let cols = [];
if (state.showTP1) {
if (state.showTriton) cols.push({ id: "tp1_triton", label: "TP1 Triton" });
if (state.showRocm) cols.push({ id: "tp1_rocm", label: "TP1 ROCm" });
}
if (state.showTP2) {
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 Triton" });
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 ROCm" });
}
// Thead
let theadHtml = `<thead><tr><th class="col-model">Model</th>`;
cols.forEach(c => {
// Style differentiation for TP2
const style = c.id.startsWith("tp2") ? "background:#eff6ff; color:#1e40af;" : "";
theadHtml += `<th class="col-data" style="${style}">${c.label}</th>`;
});
// Diff Column Header
if (cols.length === 2) {
theadHtml += `<th class="col-data col-diff">Diff</th>`;
}
theadHtml += `</tr></thead>`;
table.innerHTML = theadHtml;
// Tbody
const tbody = document.createElement('tbody');
models.forEach(m => {
const tr = document.createElement('tr');
// Model Name Cell
let metaHtml = "";
if (m.quant) metaHtml += `<span class="tag">${m.quant}</span>`;
if (m.params) metaHtml += `<span class="tag">${m.params}B</span>`;
let rowHtml = `
<td>
<div class="model-cell">
<a href="https://huggingface.co/${m.name}" target="_blank" class="model-name" style="text-decoration:none;color:inherit;">${m.name}</a>
<div class="model-meta">${metaHtml}</div>
</div>
</td>
`;
// Data Cells
cols.forEach(c => {
let val = null;
if (c.id === "tp1_triton") val = m.results[1]?.triton;
if (c.id === "tp1_rocm") val = m.results[1]?.rocm;
if (c.id === "tp2_triton") val = m.results[2]?.triton;
if (c.id === "tp2_rocm") val = m.results[2]?.rocm;
const bg = c.id.startsWith("tp2") ? 'style="background:#fbfdff;"' : "";
rowHtml += `<td class="col-data" ${bg}>${formatVal(val, unit)}</td>`;
});
// Diff Column Data
if (cols.length === 2) {
const v1 = getVal(m, cols[0].id);
const v2 = getVal(m, cols[1].id);
rowHtml += `<td class="col-data col-diff">${formatDiff(v1, v2)}</td>`;
}
tr.innerHTML = rowHtml;
tbody.appendChild(tr);
});
table.appendChild(tbody);
tableResp.appendChild(table);
card.appendChild(tableResp);
container.appendChild(card);
}
// Helper to get value safely
function getVal(m, colId) {
if (colId === "tp1_triton") return m.results[1]?.triton;
if (colId === "tp1_rocm") return m.results[1]?.rocm;
if (colId === "tp2_triton") return m.results[2]?.triton;
if (colId === "tp2_rocm") return m.results[2]?.rocm;
return null;
}
function formatDiff(v1, v2) {
if (v1 === null || v2 === null || v1 === undefined || v2 === undefined || v1 === 0) return '<span class="val-na">-</span>';
const diff = ((v2 - v1) / v1) * 100;
const sign = diff > 0 ? "+" : "";
const cls = diff > 0.5 ? "val-pos" : (diff < -0.5 ? "val-neg" : "val-neu");
return `<span class="${cls}">${sign}${diff.toFixed(1)}%</span>`;
}
// --- Basic Modal Implementation ---
function openModal(name) {
const m = getMeta(name);
$('modalTitle').textContent = name;
$('modalContent').innerHTML = `
<div class="modal-section"><h4>About</h4><p>${m.desc}</p></div>
<div class="modal-section"><h4>Usage</h4><p>${m.usecase}</p></div>
<div class="modal-section"><h4>Details</h4><div class="code-block">${m.details}</div></div>
`;
$('modalOverlay').classList.add('active');
}
function closeModal() { $('modalOverlay').classList.remove('active'); }
$('modalOverlay').addEventListener('click', e => { if (e.target === $('modalOverlay')) closeModal(); });
document.addEventListener('keydown', e => { if (e.key === "Escape") closeModal(); });
init();
</script>
</body>
</html>