feat: Add RAY_DISABLE_METRICS=1 to disable Ray metrics across cluster configurations and scripts.
This commit is contained in:
@@ -117,6 +117,7 @@ export GLOO_SOCKET_IFNAME=$RDMA_IFACE
|
||||
export NCCL_SOCKET_IFNAME=$RDMA_IFACE
|
||||
|
||||
# Prevent Ray from masking the APU (Strix Halo Requirement)
|
||||
export RAY_DISABLE_METRICS=1
|
||||
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||
|
||||
# Start Head
|
||||
@@ -132,6 +133,7 @@ ray stop --force
|
||||
export RDMA_IFACE=$(ip -o addr show to 192.168.100.0/24 | awk '{print $2}' | head -n1)
|
||||
export GLOO_SOCKET_IFNAME=$RDMA_IFACE
|
||||
export NCCL_SOCKET_IFNAME=$RDMA_IFACE
|
||||
export RAY_DISABLE_METRICS=1
|
||||
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||
|
||||
ray start --address='192.168.100.1:6379' --num-gpus=1 --num-cpus=8 --disable-usage-stats
|
||||
|
||||
@@ -54,6 +54,7 @@ setup_head() {
|
||||
ray stop --force
|
||||
|
||||
# Critical Config
|
||||
export RAY_DISABLE_METRICS=1
|
||||
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||
export RAY_memory_monitor_refresh_ms=0
|
||||
export VLLM_HOST_IP=$HEAD_IP
|
||||
@@ -78,6 +79,7 @@ setup_worker() {
|
||||
ray stop --force
|
||||
|
||||
# Critical Config
|
||||
export RAY_DISABLE_METRICS=1
|
||||
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||
export RAY_memory_monitor_refresh_ms=0
|
||||
export VLLM_HOST_IP=$WORKER_IP
|
||||
|
||||
@@ -141,6 +141,7 @@ def setup_worker_node(worker_ip, head_ip):
|
||||
source /etc/profile
|
||||
# Silece the kill command
|
||||
ray stop --force > /dev/null 2>&1 || true
|
||||
export RAY_DISABLE_METRICS=1
|
||||
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||
export RAY_memory_monitor_refresh_ms=0
|
||||
export VLLM_HOST_IP={worker_ip}
|
||||
@@ -175,6 +176,7 @@ def setup_head_node(head_ip):
|
||||
script = f"""
|
||||
# Silence the kill command
|
||||
ray stop --force > /dev/null 2>&1 || true
|
||||
export RAY_DISABLE_METRICS=1
|
||||
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
|
||||
export RAY_memory_monitor_refresh_ms=0
|
||||
export VLLM_HOST_IP={head_ip}
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user