updated benchmarks including thunderbolt and configuratuion guides

Tento commit je obsažen v:
Donato Capitella
2026-02-25 10:48:42 +00:00
rodič 6875f62ccf
revize b035bcb482
26 změnil soubory, kde provedl 820 přidání a 83 odebrání
@@ -0,0 +1,7 @@
{
"elapsed_time": 485.412814248004,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.4120204373051785,
"tokens_per_second": 302.43330149293365
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 503.28860085096676,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.397386310084984,
"tokens_per_second": 291.6914862601304
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 571.4193902639672,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.35000562355367393,
"tokens_per_second": 256.91287782898553
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 824.4905019259895,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.2425740497104635,
"tokens_per_second": 178.05541683872298
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 572.292031740013,
"num_requests": 200,
"total_num_tokens": 148857,
"requests_per_second": 0.3494719285046033,
"tokens_per_second": 260.10671430704866
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 284.23000320699066,
"num_requests": 200,
"total_num_tokens": 145877,
"requests_per_second": 0.7036554823325597,
"tokens_per_second": 513.235753981134
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 1474.255295659008,
"num_requests": 200,
"total_num_tokens": 146523,
"requests_per_second": 0.13566171380825723,
"tokens_per_second": 99.38780646163637
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 1338.8605944840237,
"num_requests": 200,
"total_num_tokens": 147036,
"requests_per_second": 0.1493807501871223,
"tokens_per_second": 109.82173992256857
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 282.0738571010297,
"num_requests": 200,
"total_num_tokens": 147036,
"requests_per_second": 0.7090341588386423,
"tokens_per_second": 521.2677328949931
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 455.7690629530116,
"num_requests": 200,
"total_num_tokens": 146278,
"requests_per_second": 0.4388187269758136,
"tokens_per_second": 320.9476287228403
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 471.133652363962,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.4245079904534079,
"tokens_per_second": 311.59947769256274
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 490.5911466999678,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.40767144157681784,
"tokens_per_second": 299.24102990342374
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 548.4156070559984,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.3646869225214776,
"tokens_per_second": 267.6893183038276
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 839.8958681730437,
"num_requests": 200,
"total_num_tokens": 146805,
"requests_per_second": 0.23812475757863116,
"tokens_per_second": 174.78952518165474
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 434.26602390100015,
"num_requests": 200,
"total_num_tokens": 148857,
"requests_per_second": 0.46054719686197254,
"tokens_per_second": 342.7783704164132
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 264.80451649799943,
"num_requests": 200,
"total_num_tokens": 145877,
"requests_per_second": 0.7552741269105618,
"tokens_per_second": 550.8856190566602
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 1493.5249834020506,
"num_requests": 200,
"total_num_tokens": 146523,
"requests_per_second": 0.13391138562973798,
"tokens_per_second": 98.10548978313048
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 1320.7432732739835,
"num_requests": 200,
"total_num_tokens": 147036,
"requests_per_second": 0.1514298834959962,
"tokens_per_second": 111.32822174858649
}
@@ -0,0 +1,7 @@
{
"elapsed_time": 282.87595599895576,
"num_requests": 200,
"total_num_tokens": 147036,
"requests_per_second": 0.707023682142636,
"tokens_per_second": 519.7896706376232
}
+45 -17
Zobrazit soubor
@@ -179,31 +179,59 @@ def run_throughput(model, tp_size, backend_name="Default", output_dir=RESULTS_DI
def print_summary(tps):
print(f"\n{'MODEL':<40} | {'TP':<2} | {'Triton':<8} | {'ROCm':<8}")
print("-" * 75)
print(f"\n{'MODEL':<40} | {'TP':<2} | {'Tag':<15} | {'Triton':<8} | {'ROCm':<8}")
print("-" * 92)
for m in MODELS_TO_RUN:
msafe = m.replace("/", "_")
name_cell = m.split('/')[-1]
for tp in tps:
if tp not in MODEL_TABLE[m]["valid_tp"]: continue
# Default
try:
p1 = RESULTS_DIR / f"{msafe}_tp{tp}_throughput.json"
d1 = json.loads(p1.read_text())
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
except: val1 = "N/A"
prefix = f"{msafe}_tp{tp}"
# ROCm
try:
p2 = Path("benchmark_results_rocm") / f"{msafe}_tp{tp}_throughput.json"
d2 = json.loads(p2.read_text())
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
except: val2 = "N/A"
tags = set()
for p in RESULTS_DIR.glob(f"{prefix}*_throughput.json"):
name_part = p.name[len(prefix):-len("_throughput.json")]
tag = name_part.lstrip("_")
tags.add(tag)
for p in Path("benchmark_results_rocm").glob(f"{prefix}*_throughput.json"):
name_part = p.name[len(prefix):-len("_throughput.json")]
tag = name_part.lstrip("_")
tags.add(tag)
if not tags:
tags.add("") # Default empty tag if no files found
for tag in sorted(list(tags)):
tag_suffix = f"_{tag}" if tag else ""
# Default
try:
p1 = RESULTS_DIR / f"{prefix}{tag_suffix}_throughput.json"
if p1.exists():
d1 = json.loads(p1.read_text())
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
else:
val1 = "N/A"
except: val1 = "N/A"
# ROCm
try:
p2 = Path("benchmark_results_rocm") / f"{prefix}{tag_suffix}_throughput.json"
if p2.exists():
d2 = json.loads(p2.read_text())
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
else:
val2 = "N/A"
except: val2 = "N/A"
name_cell = m.split('/')[-1]
print(f"{name_cell:<40} | {tp:<2} | {val1:<8} | {val2:<8}")
print("-" * 75)
display_tag = tag if tag else "(Default)"
print(f"{name_cell:<40} | {tp:<2} | {display_tag:<15} | {val1:<8} | {val2:<8}")
print("-" * 92)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
+51 -20
Zobrazit soubor
@@ -213,7 +213,7 @@ def run_bench_set(model, backend_name, output_dir, extra_env=None, overrides=Non
log(f"START {model} [TP={CLUSTER_TP} | {backend_name}]...")
nuke_vllm_cache(HEAD_IP)
nuke_vllm_cache()
cmd = ["vllm", "bench", "throughput"] + get_model_args(model, overrides)
cmd.extend([
@@ -279,29 +279,60 @@ def run_cluster_throughput(model, overrides=None):
def print_summary():
eth_suffix = "_eth" if FORCE_ETH else ""
title_suffix = " (Ethernet ONLY)" if FORCE_ETH else ""
print(f"\n{f'MODEL (TP=2){title_suffix}':<50} | {'Triton':<8} | {'ROCm':<8}")
print("-" * 75)
print(f"\n{f'MODEL (TP={CLUSTER_TP}){title_suffix}':<50} | {'Tag':<15} | {'Triton':<8} | {'ROCm':<8}")
print("-" * 92)
for m in MODELS_TO_RUN:
msafe = m.replace("/", "_")
# Default
try:
p1 = RESULTS_DIR / f"{msafe}_cluster_tp{CLUSTER_TP}{eth_suffix}_throughput.json"
d1 = json.loads(p1.read_text())
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
except: val1 = "N/A"
# ROCm
try:
p2 = Path("benchmark_results_rocm") / f"{msafe}_cluster_tp{CLUSTER_TP}{eth_suffix}_throughput.json"
d2 = json.loads(p2.read_text())
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
except: val2 = "N/A"
name_cell = m.split('/')[-1]
print(f"{name_cell:<50} | {val1:<8} | {val2:<8}")
print("-" * 75)
# Find all tags used for this model by looking at the files in RESULTS_DIR
prefix = f"{msafe}_cluster_tp{CLUSTER_TP}{eth_suffix}"
# Gather all unique tags from both directories
tags = set()
for p in RESULTS_DIR.glob(f"{prefix}*_throughput.json"):
# Extract tag: {prefix}_{tag}_throughput.json or {prefix}_throughput.json
name_part = p.name[len(prefix):-len("_throughput.json")]
tag = name_part.lstrip("_")
tags.add(tag)
for p in Path("benchmark_results_rocm").glob(f"{prefix}*_throughput.json"):
name_part = p.name[len(prefix):-len("_throughput.json")]
tag = name_part.lstrip("_")
tags.add(tag)
if not tags:
tags.add("") # Default empty tag if no files found
# Sort so empty tag (Default) comes first
for tag in sorted(list(tags)):
tag_suffix = f"_{tag}" if tag else ""
# Default (Triton)
try:
p1 = RESULTS_DIR / f"{prefix}{tag_suffix}_throughput.json"
if p1.exists():
d1 = json.loads(p1.read_text())
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
else:
val1 = "N/A"
except: val1 = "N/A"
# ROCm
try:
p2 = Path("benchmark_results_rocm") / f"{prefix}{tag_suffix}_throughput.json"
if p2.exists():
d2 = json.loads(p2.read_text())
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
else:
val2 = "N/A"
except: val2 = "N/A"
display_tag = tag if tag else "(Default)"
print(f"{name_cell:<50} | {display_tag:<15} | {val1:<8} | {val2:<8}")
print("-" * 92)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="VLLM Cluster Benchmark")