updated benchmarks including thunderbolt and configuratuion guides
Tento commit je obsažen v:
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 485.412814248004,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.4120204373051785,
|
||||
"tokens_per_second": 302.43330149293365
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 503.28860085096676,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.397386310084984,
|
||||
"tokens_per_second": 291.6914862601304
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 571.4193902639672,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.35000562355367393,
|
||||
"tokens_per_second": 256.91287782898553
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 824.4905019259895,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.2425740497104635,
|
||||
"tokens_per_second": 178.05541683872298
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 572.292031740013,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 148857,
|
||||
"requests_per_second": 0.3494719285046033,
|
||||
"tokens_per_second": 260.10671430704866
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 284.23000320699066,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 145877,
|
||||
"requests_per_second": 0.7036554823325597,
|
||||
"tokens_per_second": 513.235753981134
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 1474.255295659008,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146523,
|
||||
"requests_per_second": 0.13566171380825723,
|
||||
"tokens_per_second": 99.38780646163637
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 1338.8605944840237,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 147036,
|
||||
"requests_per_second": 0.1493807501871223,
|
||||
"tokens_per_second": 109.82173992256857
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 282.0738571010297,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 147036,
|
||||
"requests_per_second": 0.7090341588386423,
|
||||
"tokens_per_second": 521.2677328949931
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 455.7690629530116,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146278,
|
||||
"requests_per_second": 0.4388187269758136,
|
||||
"tokens_per_second": 320.9476287228403
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 471.133652363962,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.4245079904534079,
|
||||
"tokens_per_second": 311.59947769256274
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 490.5911466999678,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.40767144157681784,
|
||||
"tokens_per_second": 299.24102990342374
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 548.4156070559984,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.3646869225214776,
|
||||
"tokens_per_second": 267.6893183038276
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 839.8958681730437,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146805,
|
||||
"requests_per_second": 0.23812475757863116,
|
||||
"tokens_per_second": 174.78952518165474
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 434.26602390100015,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 148857,
|
||||
"requests_per_second": 0.46054719686197254,
|
||||
"tokens_per_second": 342.7783704164132
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 264.80451649799943,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 145877,
|
||||
"requests_per_second": 0.7552741269105618,
|
||||
"tokens_per_second": 550.8856190566602
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 1493.5249834020506,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 146523,
|
||||
"requests_per_second": 0.13391138562973798,
|
||||
"tokens_per_second": 98.10548978313048
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 1320.7432732739835,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 147036,
|
||||
"requests_per_second": 0.1514298834959962,
|
||||
"tokens_per_second": 111.32822174858649
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"elapsed_time": 282.87595599895576,
|
||||
"num_requests": 200,
|
||||
"total_num_tokens": 147036,
|
||||
"requests_per_second": 0.707023682142636,
|
||||
"tokens_per_second": 519.7896706376232
|
||||
}
|
||||
@@ -179,31 +179,59 @@ def run_throughput(model, tp_size, backend_name="Default", output_dir=RESULTS_DI
|
||||
|
||||
|
||||
def print_summary(tps):
|
||||
print(f"\n{'MODEL':<40} | {'TP':<2} | {'Triton':<8} | {'ROCm':<8}")
|
||||
print("-" * 75)
|
||||
print(f"\n{'MODEL':<40} | {'TP':<2} | {'Tag':<15} | {'Triton':<8} | {'ROCm':<8}")
|
||||
print("-" * 92)
|
||||
|
||||
for m in MODELS_TO_RUN:
|
||||
msafe = m.replace("/", "_")
|
||||
name_cell = m.split('/')[-1]
|
||||
|
||||
for tp in tps:
|
||||
if tp not in MODEL_TABLE[m]["valid_tp"]: continue
|
||||
|
||||
# Default
|
||||
try:
|
||||
p1 = RESULTS_DIR / f"{msafe}_tp{tp}_throughput.json"
|
||||
d1 = json.loads(p1.read_text())
|
||||
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
|
||||
except: val1 = "N/A"
|
||||
prefix = f"{msafe}_tp{tp}"
|
||||
|
||||
# ROCm
|
||||
try:
|
||||
p2 = Path("benchmark_results_rocm") / f"{msafe}_tp{tp}_throughput.json"
|
||||
d2 = json.loads(p2.read_text())
|
||||
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
|
||||
except: val2 = "N/A"
|
||||
tags = set()
|
||||
for p in RESULTS_DIR.glob(f"{prefix}*_throughput.json"):
|
||||
name_part = p.name[len(prefix):-len("_throughput.json")]
|
||||
tag = name_part.lstrip("_")
|
||||
tags.add(tag)
|
||||
|
||||
for p in Path("benchmark_results_rocm").glob(f"{prefix}*_throughput.json"):
|
||||
name_part = p.name[len(prefix):-len("_throughput.json")]
|
||||
tag = name_part.lstrip("_")
|
||||
tags.add(tag)
|
||||
|
||||
if not tags:
|
||||
tags.add("") # Default empty tag if no files found
|
||||
|
||||
for tag in sorted(list(tags)):
|
||||
tag_suffix = f"_{tag}" if tag else ""
|
||||
|
||||
# Default
|
||||
try:
|
||||
p1 = RESULTS_DIR / f"{prefix}{tag_suffix}_throughput.json"
|
||||
if p1.exists():
|
||||
d1 = json.loads(p1.read_text())
|
||||
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
|
||||
else:
|
||||
val1 = "N/A"
|
||||
except: val1 = "N/A"
|
||||
|
||||
# ROCm
|
||||
try:
|
||||
p2 = Path("benchmark_results_rocm") / f"{prefix}{tag_suffix}_throughput.json"
|
||||
if p2.exists():
|
||||
d2 = json.loads(p2.read_text())
|
||||
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
|
||||
else:
|
||||
val2 = "N/A"
|
||||
except: val2 = "N/A"
|
||||
|
||||
name_cell = m.split('/')[-1]
|
||||
print(f"{name_cell:<40} | {tp:<2} | {val1:<8} | {val2:<8}")
|
||||
print("-" * 75)
|
||||
display_tag = tag if tag else "(Default)"
|
||||
print(f"{name_cell:<40} | {tp:<2} | {display_tag:<15} | {val1:<8} | {val2:<8}")
|
||||
|
||||
print("-" * 92)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
@@ -213,7 +213,7 @@ def run_bench_set(model, backend_name, output_dir, extra_env=None, overrides=Non
|
||||
|
||||
log(f"START {model} [TP={CLUSTER_TP} | {backend_name}]...")
|
||||
|
||||
nuke_vllm_cache(HEAD_IP)
|
||||
nuke_vllm_cache()
|
||||
|
||||
cmd = ["vllm", "bench", "throughput"] + get_model_args(model, overrides)
|
||||
cmd.extend([
|
||||
@@ -279,29 +279,60 @@ def run_cluster_throughput(model, overrides=None):
|
||||
def print_summary():
|
||||
eth_suffix = "_eth" if FORCE_ETH else ""
|
||||
title_suffix = " (Ethernet ONLY)" if FORCE_ETH else ""
|
||||
print(f"\n{f'MODEL (TP=2){title_suffix}':<50} | {'Triton':<8} | {'ROCm':<8}")
|
||||
print("-" * 75)
|
||||
print(f"\n{f'MODEL (TP={CLUSTER_TP}){title_suffix}':<50} | {'Tag':<15} | {'Triton':<8} | {'ROCm':<8}")
|
||||
print("-" * 92)
|
||||
|
||||
for m in MODELS_TO_RUN:
|
||||
msafe = m.replace("/", "_")
|
||||
|
||||
# Default
|
||||
try:
|
||||
p1 = RESULTS_DIR / f"{msafe}_cluster_tp{CLUSTER_TP}{eth_suffix}_throughput.json"
|
||||
d1 = json.loads(p1.read_text())
|
||||
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
|
||||
except: val1 = "N/A"
|
||||
|
||||
# ROCm
|
||||
try:
|
||||
p2 = Path("benchmark_results_rocm") / f"{msafe}_cluster_tp{CLUSTER_TP}{eth_suffix}_throughput.json"
|
||||
d2 = json.loads(p2.read_text())
|
||||
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
|
||||
except: val2 = "N/A"
|
||||
|
||||
name_cell = m.split('/')[-1]
|
||||
print(f"{name_cell:<50} | {val1:<8} | {val2:<8}")
|
||||
print("-" * 75)
|
||||
|
||||
# Find all tags used for this model by looking at the files in RESULTS_DIR
|
||||
prefix = f"{msafe}_cluster_tp{CLUSTER_TP}{eth_suffix}"
|
||||
|
||||
# Gather all unique tags from both directories
|
||||
tags = set()
|
||||
for p in RESULTS_DIR.glob(f"{prefix}*_throughput.json"):
|
||||
# Extract tag: {prefix}_{tag}_throughput.json or {prefix}_throughput.json
|
||||
name_part = p.name[len(prefix):-len("_throughput.json")]
|
||||
tag = name_part.lstrip("_")
|
||||
tags.add(tag)
|
||||
|
||||
for p in Path("benchmark_results_rocm").glob(f"{prefix}*_throughput.json"):
|
||||
name_part = p.name[len(prefix):-len("_throughput.json")]
|
||||
tag = name_part.lstrip("_")
|
||||
tags.add(tag)
|
||||
|
||||
if not tags:
|
||||
tags.add("") # Default empty tag if no files found
|
||||
|
||||
# Sort so empty tag (Default) comes first
|
||||
for tag in sorted(list(tags)):
|
||||
tag_suffix = f"_{tag}" if tag else ""
|
||||
|
||||
# Default (Triton)
|
||||
try:
|
||||
p1 = RESULTS_DIR / f"{prefix}{tag_suffix}_throughput.json"
|
||||
if p1.exists():
|
||||
d1 = json.loads(p1.read_text())
|
||||
val1 = f"{d1.get('tokens_per_second', 0):.1f}"
|
||||
else:
|
||||
val1 = "N/A"
|
||||
except: val1 = "N/A"
|
||||
|
||||
# ROCm
|
||||
try:
|
||||
p2 = Path("benchmark_results_rocm") / f"{prefix}{tag_suffix}_throughput.json"
|
||||
if p2.exists():
|
||||
d2 = json.loads(p2.read_text())
|
||||
val2 = f"{d2.get('tokens_per_second', 0):.1f}"
|
||||
else:
|
||||
val2 = "N/A"
|
||||
except: val2 = "N/A"
|
||||
|
||||
display_tag = tag if tag else "(Default)"
|
||||
print(f"{name_cell:<50} | {display_tag:<15} | {val1:<8} | {val2:<8}")
|
||||
|
||||
print("-" * 92)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="VLLM Cluster Benchmark")
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele