feat: Update ROCm benchmark result paths, improve cluster node discovery and cache clearing, and refine cluster benchmark result directory.

2026-02-02 07:35:50 +00:00
commit 6f118ff936
@@ -181,7 +181,7 @@ def print_summary(tps):
            
            # ROCm
            try:
-                p2 = Path("benchmark_results_rocm_attn/benchmark_results") / f"{msafe}_tp{tp}_throughput.json"
+                p2 = Path("benchmark_results_rocm") / f"{msafe}_tp{tp}_throughput.json"
                d2 = json.loads(p2.read_text())
                val2 = f"{d2.get('tokens_per_second', 0):.1f}"
            except: val2 = "N/A"
@@ -210,7 +210,7 @@ if __name__ == "__main__":
            run_throughput(m, tp, "Default", RESULTS_DIR)
            
            # 2. ROCm Attention
-            run_throughput(m, tp, "ROCm-Attn", "benchmark_results_rocm_attn/benchmark_results", {
+            run_throughput(m, tp, "ROCm-Attn", "benchmark_results_rocm", {
                "VLLM_V1_USE_PREFILL_DECODE_ATTENTION": "1",
                "VLLM_USE_TRITON_FLASH_ATTN": "0"
            })
@@ -16,7 +16,7 @@ OFF_NUM_PROMPTS      = 200
 OFF_FORCED_OUTPUT    = "512"
 DEFAULT_BATCH_TOKENS = "8192"

-RESULTS_DIR = Path("cluster_benchmark_results")
+RESULTS_DIR = Path("benchmark_results")
 RESULTS_DIR.mkdir(exist_ok=True)

 # Reuse the model table from the main benchmark script
@@ -93,7 +93,8 @@ def get_local_ip(iface):
    return cluster_manager.get_local_ip(iface)

 def nuke_vllm_cache():
-    cluster_manager.nuke_vllm_cache_cluster()
+    # We use explicit IPs because ray status might return Hex IDs which we can't SSH to.
+    cluster_manager.nuke_vllm_cache_cluster(nodes=[HEAD_IP, WORKER_IP])


 def get_dataset():
@@ -223,7 +224,7 @@ def run_cluster_throughput(model):
    run_bench_set(
        model,
        "ROCm-Attn",
-        "benchmark_results_rocm_attn/benchmark_results",
+        "benchmark_results_rocm",
        extra_env={
            "VLLM_V1_USE_PREFILL_DECODE_ATTENTION": "1",
            "VLLM_USE_TRITON_FLASH_ATTN": "0"
@@ -247,7 +248,7 @@ def print_summary():
        
        # ROCm
        try:
-            p2 = Path("benchmark_results_rocm_attn/benchmark_results") / f"{msafe}_cluster_tp{CLUSTER_TP}_throughput.json"
+            p2 = Path("benchmark_results_rocm") / f"{msafe}_cluster_tp{CLUSTER_TP}_throughput.json"
            d2 = json.loads(p2.read_text())
            val2 = f"{d2.get('tokens_per_second', 0):.1f}"
        except: val2 = "N/A"