config: Add VLLM_DISABLE_COMPILE_CACHE=1 to environment variables across VLLM scripts.

2026-03-09 14:07:43 +00:00
commit 16405e8943
@@ -137,6 +137,7 @@ def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs):
    
    # Env Setup
    env = os.environ.copy()
+    env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
    env.update(config.get("env", {}))

    # CLUSTER / RAY LOGIC
@@ -163,6 +163,7 @@ def run_throughput(model, tp_size, backend_name="Default", output_dir=RESULTS_DI

    # ENV Setup: Global + Model Specific
    env = os.environ.copy()
+    env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
    
    # Inject model specific env vars (e.g. for AWQ)
    model_env = MODEL_TABLE[model].get("env", {})
@@ -140,6 +140,7 @@ def get_cluster_env():
    host_ip = get_local_ip(rdma_iface)
    
    env = os.environ.copy()
+    env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
    
    # Critical Cluster Envs (Match start_vllm_cluster.py)
    env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
@@ -322,6 +322,7 @@ def configure_and_launch(model_idx, gpu_count):
    
    # Env Vars
    env = os.environ.copy()
+    env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
    env.update(config.get("env", {}))
    
    if use_rocm_attn:
@@ -263,6 +263,7 @@ def configure_and_launch_vllm(model_idx, head_ip):
    print(f"Detected RDMA Interface: {rdma_iface}")
    
    env = os.environ.copy()
+    env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
    env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
    env["VLLM_HOST_IP"] = head_ip
    env["NCCL_SOCKET_IFNAME"] = rdma_iface