config: Add VLLM_DISABLE_COMPILE_CACHE=1 to environment variables across VLLM scripts.
Tento commit je obsažen v:
@@ -137,6 +137,7 @@ def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs):
|
|||||||
|
|
||||||
# Env Setup
|
# Env Setup
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
|
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||||
env.update(config.get("env", {}))
|
env.update(config.get("env", {}))
|
||||||
|
|
||||||
# CLUSTER / RAY LOGIC
|
# CLUSTER / RAY LOGIC
|
||||||
|
|||||||
@@ -163,6 +163,7 @@ def run_throughput(model, tp_size, backend_name="Default", output_dir=RESULTS_DI
|
|||||||
|
|
||||||
# ENV Setup: Global + Model Specific
|
# ENV Setup: Global + Model Specific
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
|
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||||
|
|
||||||
# Inject model specific env vars (e.g. for AWQ)
|
# Inject model specific env vars (e.g. for AWQ)
|
||||||
model_env = MODEL_TABLE[model].get("env", {})
|
model_env = MODEL_TABLE[model].get("env", {})
|
||||||
|
|||||||
@@ -140,6 +140,7 @@ def get_cluster_env():
|
|||||||
host_ip = get_local_ip(rdma_iface)
|
host_ip = get_local_ip(rdma_iface)
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
|
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||||
|
|
||||||
# Critical Cluster Envs (Match start_vllm_cluster.py)
|
# Critical Cluster Envs (Match start_vllm_cluster.py)
|
||||||
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
||||||
|
|||||||
@@ -322,6 +322,7 @@ def configure_and_launch(model_idx, gpu_count):
|
|||||||
|
|
||||||
# Env Vars
|
# Env Vars
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
|
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||||
env.update(config.get("env", {}))
|
env.update(config.get("env", {}))
|
||||||
|
|
||||||
if use_rocm_attn:
|
if use_rocm_attn:
|
||||||
|
|||||||
@@ -263,6 +263,7 @@ def configure_and_launch_vllm(model_idx, head_ip):
|
|||||||
print(f"Detected RDMA Interface: {rdma_iface}")
|
print(f"Detected RDMA Interface: {rdma_iface}")
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
|
env["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||||
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
||||||
env["VLLM_HOST_IP"] = head_ip
|
env["VLLM_HOST_IP"] = head_ip
|
||||||
env["NCCL_SOCKET_IFNAME"] = rdma_iface
|
env["NCCL_SOCKET_IFNAME"] = rdma_iface
|
||||||
|
|||||||
Odkázat v novém úkolu
Zablokovat Uživatele