config: Add VLLM_DISABLE_COMPILE_CACHE=1 to environment variables across VLLM scripts.
This commit is contained in:
@@ -137,6 +137,7 @@ def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs):
|
||||
|
||||
# Env Setup
|
||||
env = os.environ.copy()
|
||||
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||
env.update(config.get("env", {}))
|
||||
|
||||
# CLUSTER / RAY LOGIC
|
||||
|
||||
@@ -163,6 +163,7 @@ def run_throughput(model, tp_size, backend_name="Default", output_dir=RESULTS_DI
|
||||
|
||||
# ENV Setup: Global + Model Specific
|
||||
env = os.environ.copy()
|
||||
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||
|
||||
# Inject model specific env vars (e.g. for AWQ)
|
||||
model_env = MODEL_TABLE[model].get("env", {})
|
||||
|
||||
@@ -140,6 +140,7 @@ def get_cluster_env():
|
||||
host_ip = get_local_ip(rdma_iface)
|
||||
|
||||
env = os.environ.copy()
|
||||
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||
|
||||
# Critical Cluster Envs (Match start_vllm_cluster.py)
|
||||
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
||||
|
||||
@@ -322,6 +322,7 @@ def configure_and_launch(model_idx, gpu_count):
|
||||
|
||||
# Env Vars
|
||||
env = os.environ.copy()
|
||||
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||
env.update(config.get("env", {}))
|
||||
|
||||
if use_rocm_attn:
|
||||
|
||||
@@ -263,6 +263,7 @@ def configure_and_launch_vllm(model_idx, head_ip):
|
||||
print(f"Detected RDMA Interface: {rdma_iface}")
|
||||
|
||||
env = os.environ.copy()
|
||||
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
||||
env["VLLM_HOST_IP"] = head_ip
|
||||
env["NCCL_SOCKET_IFNAME"] = rdma_iface
|
||||
|
||||
Verwijs in nieuw issue
Block a user