diff --git a/benchmarks/find_max_context.py b/benchmarks/find_max_context.py index a3eeed8..388dcf1 100644 --- a/benchmarks/find_max_context.py +++ b/benchmarks/find_max_context.py @@ -137,6 +137,7 @@ def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs): # Env Setup env = os.environ.copy() + env["VLLM_DISABLE_COMPILE_CACHE"] = "1" env.update(config.get("env", {})) # CLUSTER / RAY LOGIC diff --git a/benchmarks/run_vllm_bench.py b/benchmarks/run_vllm_bench.py index 8c335b3..5a0ec3e 100644 --- a/benchmarks/run_vllm_bench.py +++ b/benchmarks/run_vllm_bench.py @@ -163,6 +163,7 @@ def run_throughput(model, tp_size, backend_name="Default", output_dir=RESULTS_DI # ENV Setup: Global + Model Specific env = os.environ.copy() + env["VLLM_DISABLE_COMPILE_CACHE"] = "1" # Inject model specific env vars (e.g. for AWQ) model_env = MODEL_TABLE[model].get("env", {}) diff --git a/benchmarks/vllm_cluster_bench.py b/benchmarks/vllm_cluster_bench.py index 2f2b2e2..4d42851 100755 --- a/benchmarks/vllm_cluster_bench.py +++ b/benchmarks/vllm_cluster_bench.py @@ -140,6 +140,7 @@ def get_cluster_env(): host_ip = get_local_ip(rdma_iface) env = os.environ.copy() + env["VLLM_DISABLE_COMPILE_CACHE"] = "1" # Critical Cluster Envs (Match start_vllm_cluster.py) env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1" diff --git a/scripts/start_vllm.py b/scripts/start_vllm.py index 3eebc26..bac1c7e 100644 --- a/scripts/start_vllm.py +++ b/scripts/start_vllm.py @@ -322,6 +322,7 @@ def configure_and_launch(model_idx, gpu_count): # Env Vars env = os.environ.copy() + env["VLLM_DISABLE_COMPILE_CACHE"] = "1" env.update(config.get("env", {})) if use_rocm_attn: diff --git a/scripts/start_vllm_cluster.py b/scripts/start_vllm_cluster.py index 5b2c64d..74c7702 100755 --- a/scripts/start_vllm_cluster.py +++ b/scripts/start_vllm_cluster.py @@ -263,6 +263,7 @@ def configure_and_launch_vllm(model_idx, head_ip): print(f"Detected RDMA Interface: {rdma_iface}") env = os.environ.copy() + env["VLLM_DISABLE_COMPILE_CACHE"] = "1" env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1" env["VLLM_HOST_IP"] = head_ip env["NCCL_SOCKET_IFNAME"] = rdma_iface