updated benchmarks, fix start-vllm

2026-02-23 19:39:19 +00:00
@@ -36,6 +36,22 @@ else:
 HOST = os.getenv("HOST", "0.0.0.0")
 PORT = os.getenv("PORT", "8000")

+def detect_gpus():
+    """Detects AMD GPUs via rocm-smi or /dev/dri."""
+    try:
+        # Try rocm-smi first
+        res = subprocess.run(["rocm-smi", "--showid", "--csv"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        if res.returncode == 0:
+            count = res.stdout.count("GPU")
+            if count > 0: return count
+    except: pass
+    
+    # Fallback to /dev/dri/render*
+    try:
+        return len(list(Path("/dev/dri").glob("renderD*")))
+    except:
+        return 1
+
 def get_discovered_models():
    """
    Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
@@ -93,22 +109,6 @@ def check_dependencies():
        print("Error: 'dialog' is required. Please install it (apt-get install dialog).")
        sys.exit(1)

-def detect_gpus():
-    """Detects AMD GPUs via rocm-smi or /dev/dri."""
-    try:
-        # Try rocm-smi first
-        res = subprocess.run(["rocm-smi", "--showid", "--csv"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-        if res.returncode == 0:
-            count = res.stdout.count("GPU")
-            if count > 0: return count
-    except: pass
-    
-    # Fallback to /dev/dri/render*
-    try:
-        return len(list(Path("/dev/dri").glob("renderD*")))
-    except:
-        return 1
-
 def get_verified_config(model_id, tp_size, max_seqs):
    """
    Reads max_context_results.json to find the best verified configuration.
@@ -334,7 +334,15 @@ def configure_and_launch(model_idx, gpu_count):
    print(f" Backend:   {'ROCm' if use_rocm_attn else 'Triton'}")
    if clear_cache:
        print(f" Action:    Clearing vLLM Cache (~/.cache/vllm)")
-    print(f" Command:   {' '.join(cmd)}")
+        
+    # Variables that represent the custom environment overrides for models
+    custom_env = config.get("env", {})
+    if custom_env:
+        print("\n --- Environment Variables ---")
+        for k, v in custom_env.items():
+            print(f" export {k}={v}")
+            
+    print(f"\n Command:   {' '.join(cmd)}")
    print("="*60 + "\n")
    
    os.execvpe("vllm", cmd, env)