feat: Introduce measure_bandwidth.sh script, install perfquery, and add the script to the Docker image for RDMA bandwidth monitoring.
Этот коммит содержится в:
@@ -127,6 +127,7 @@ COPY scripts/99-toolbox-banner.sh /etc/profile.d/99-toolbox-banner.sh
|
||||
COPY scripts/zz-venv-last.sh /etc/profile.d/zz-venv-last.sh
|
||||
COPY scripts/start_vllm.py /opt/start-vllm
|
||||
COPY scripts/start_vllm_cluster.py /opt/start-vllm-cluster
|
||||
COPY scripts/measure_bandwidth.sh /opt/measure_bandwidth.sh
|
||||
COPY scripts/cluster_manager.py /opt/cluster_manager.py
|
||||
COPY scripts/models.py /opt/models.py
|
||||
|
||||
|
||||
@@ -8,5 +8,5 @@ dnf -y install --setopt=install_weak_deps=False --nodocs \
|
||||
gcc gcc-c++ binutils make ffmpeg-free \
|
||||
cmake ninja-build aria2c tar xz vim nano dialog \
|
||||
libdrm-devel zlib-devel openssl-devel pgrep \
|
||||
numactl-devel gperftools-libs iproute libibverbs-utils patch perftest ping iperf3 \
|
||||
numactl-devel gperftools-libs iproute libibverbs-utils patch perftest ping iperf3 perfquery \
|
||||
&& dnf clean all && rm -rf /var/cache/dnf/*
|
||||
|
||||
Исполняемый файл
+18
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
while true; do
|
||||
A_IN=$(rdma statistic | awk '/ip4InOctets/ {print $2}')
|
||||
A_OUT=$(rdma statistic | awk '/ip4OutOctets/ {print $2}')
|
||||
sleep 1
|
||||
B_IN=$(rdma statistic | awk '/ip4InOctets/ {print $2}')
|
||||
B_OUT=$(rdma statistic | awk '/ip4OutOctets/ {print $2}')
|
||||
|
||||
RX=$(( (B_IN - A_IN) * 8 ))
|
||||
TX=$(( (B_OUT - A_OUT) * 8 ))
|
||||
|
||||
printf "%s RDMA RX: %7sbit/s TX: %7sbit/s SUM: %7sbit/s\n" \
|
||||
"$(date +%T)" \
|
||||
"$(numfmt --to=iec $RX)" \
|
||||
"$(numfmt --to=iec $TX)" \
|
||||
"$(numfmt --to=iec $((RX+TX)))"
|
||||
done
|
||||
Ссылка в новой задаче
Block a user