Files
rocm-systems/internal/workloads/Makefile
T
2024-07-01 09:57:08 -05:00

106 行
3.2 KiB
Makefile

HIPCC=hipcc
BUILD=./build
SRC=./src
RESULTS=./results
#rocshmem_DIR=${HOME}/rocshmem
#MPI_HOME=${HOME}/mpich/install
NCCL_HOME=${HOME}/rccl/build
MPI_FLAGS=-lmpi -lhsa-runtime64 -lrt -L${MPI_HOME}/lib -fgpu-rdc
SHMEM_FLAGS=${MPI_FLAGS} -lmlx5 -libverbs
RCCL_FLAGS=${MPI_FLAGS} -Wl,-rpath,$(NCCL_HOME) -L${NCCL_HOME} -lrccl
.SILENT: run_scan extract_scan run_sort run_sort_shmem run_sort_rccl extract_sort
all: ${BUILD}/sort_shmem ${BUILD}/sort_rccl ${BUILD}/sort_mpi
${BUILD}/sort_shmem: ${BUILD}/sort_shmem.o ${rocshmem_DIR}/lib/librocshmem.a
${HIPCC} $^ ${SHMEM_FLAGS} -o $@
${BUILD}/sort_shmem.o: ${SRC}/sort_shmem.cu
${HIPCC} $^ -I${rocshmem_DIR}/include -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
${BUILD}/sort_rccl: ${BUILD}/sort_rccl.o
${HIPCC} $^ ${RCCL_FLAGS} -o $@
${BUILD}/sort_rccl.o: ${SRC}/sort_rccl.cu
${HIPCC} $^ -I$(NCCL_HOME)/include/rccl -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
${BUILD}/sort_mpi: ${BUILD}/sort_mpi.o
${HIPCC} $^ ${MPI_FLAGS} -o $@
${BUILD}/sort_mpi.o: ${SRC}/sort_mpi.cu
${HIPCC} $^ -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
RO_FLAGS=ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1
ITERS?=0 1 2 3 4 5 6 7 8 9
TIMEOUT=1m
HOSTS=sv-pdp-0,sv-pdp-1,sv-pdp-2,sv-pdp-3
SCAN_SIZE=1024
PES=2 4 8 12 16
PES_RCCL=2 4 8
TYPE ?= Naive
LABEL ?= naive
PARAM ?= 0
NUM_PES ?= 2
run_sort_shmem: ${BUILD}/sort_shmem
printf "${TYPE} ";\
echo "" > ${RESULTS}/sort_${LABEL}_${NUM_PES}.out; \
for j in ${ITERS}; do \
${RO_FLAGS} timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_shmem ${PARAM} >> ${RESULTS}/sort_${LABEL}_${NUM_PES}.out;\
done;
run_sort_rccl: ${BUILD}/sort_rccl
printf "RCCL "; \
echo "" > ${RESULTS}/sort_rccl_${NUM_PES}.out; \
for j in ${ITERS}; do \
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_rccl >> ${RESULTS}/sort_rccl_${NUM_PES}.out;\
done;
run_sort_mpi: ${BUILD}/sort_rccl
printf "MPI2 "; \
echo "" > ${RESULTS}/sort_mpi2_${NUM_PES}.out; \
for j in ${ITERS}; do \
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_mpi >> ${RESULTS}/sort_mpi2_${NUM_PES}.out;\
done;
run_sort: ${BUILD}/sort_shmem ${BUILD}/sort_rccl
for i in ${PES}; do \
printf "%d " $$i; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=NAIVE LABEL=naive PARAM=0 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=MPI LABEL=mpi PARAM=1 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN LABEL=gcen PARAM=2 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN2 LABEL=gcen2 PARAM=3 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_mpi NUM_PES=$${i}; \
printf "\n";\
done
for i in ${PES_RCCL}; do \
$(MAKE) --no-print-directory run_sort_rccl NUM_PES=$${i}; \
printf "%d " $$i; \
done
$(MAKE) extract_sort
extract_sort:
printf "Sort latency\n"
printf "PROCS\tType\tRuns"
for i in ${PES}; do \
for type in mpi mpi2 rccl naive gcen gcen2; do\
printf "\n%d\t$${type}\t" $$i; \
file=${RESULTS}/sort_$${type}_$${i}.out;\
latency=$$(grep -E "Avg time" $${file}); \
grep -E "Avg time" $${file} | while read -r j; do\
val=$$(echo $$j | grep -oE -m1 "[0-9]+\.[0-9]+");\
printf "%s\t" $${val};\
done; \
done;\
done
printf "\n"
clean:
rm build/*;