feat: Add RAY_DISABLE_METRICS=1 to disable Ray metrics across cluster configurations and scripts.

Esse commit está contido em:
Donato Capitella
2026-02-01 21:52:48 +00:00
commit 0d8afba093
3 arquivos alterados com 6 adições e 0 exclusões
+2
Ver Arquivo
@@ -117,6 +117,7 @@ export GLOO_SOCKET_IFNAME=$RDMA_IFACE
export NCCL_SOCKET_IFNAME=$RDMA_IFACE export NCCL_SOCKET_IFNAME=$RDMA_IFACE
# Prevent Ray from masking the APU (Strix Halo Requirement) # Prevent Ray from masking the APU (Strix Halo Requirement)
export RAY_DISABLE_METRICS=1
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
# Start Head # Start Head
@@ -132,6 +133,7 @@ ray stop --force
export RDMA_IFACE=$(ip -o addr show to 192.168.100.0/24 | awk '{print $2}' | head -n1) export RDMA_IFACE=$(ip -o addr show to 192.168.100.0/24 | awk '{print $2}' | head -n1)
export GLOO_SOCKET_IFNAME=$RDMA_IFACE export GLOO_SOCKET_IFNAME=$RDMA_IFACE
export NCCL_SOCKET_IFNAME=$RDMA_IFACE export NCCL_SOCKET_IFNAME=$RDMA_IFACE
export RAY_DISABLE_METRICS=1
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ray start --address='192.168.100.1:6379' --num-gpus=1 --num-cpus=8 --disable-usage-stats ray start --address='192.168.100.1:6379' --num-gpus=1 --num-cpus=8 --disable-usage-stats
+2
Ver Arquivo
@@ -54,6 +54,7 @@ setup_head() {
ray stop --force ray stop --force
# Critical Config # Critical Config
export RAY_DISABLE_METRICS=1
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
export RAY_memory_monitor_refresh_ms=0 export RAY_memory_monitor_refresh_ms=0
export VLLM_HOST_IP=$HEAD_IP export VLLM_HOST_IP=$HEAD_IP
@@ -78,6 +79,7 @@ setup_worker() {
ray stop --force ray stop --force
# Critical Config # Critical Config
export RAY_DISABLE_METRICS=1
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
export RAY_memory_monitor_refresh_ms=0 export RAY_memory_monitor_refresh_ms=0
export VLLM_HOST_IP=$WORKER_IP export VLLM_HOST_IP=$WORKER_IP
+2
Ver Arquivo
@@ -141,6 +141,7 @@ def setup_worker_node(worker_ip, head_ip):
source /etc/profile source /etc/profile
# Silece the kill command # Silece the kill command
ray stop --force > /dev/null 2>&1 || true ray stop --force > /dev/null 2>&1 || true
export RAY_DISABLE_METRICS=1
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
export RAY_memory_monitor_refresh_ms=0 export RAY_memory_monitor_refresh_ms=0
export VLLM_HOST_IP={worker_ip} export VLLM_HOST_IP={worker_ip}
@@ -175,6 +176,7 @@ def setup_head_node(head_ip):
script = f""" script = f"""
# Silence the kill command # Silence the kill command
ray stop --force > /dev/null 2>&1 || true ray stop --force > /dev/null 2>&1 || true
export RAY_DISABLE_METRICS=1
export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 export RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
export RAY_memory_monitor_refresh_ms=0 export RAY_memory_monitor_refresh_ms=0
export VLLM_HOST_IP={head_ip} export VLLM_HOST_IP={head_ip}