From edcd1ed57e1bc095eabd171e5811111b6eb6db2c Mon Sep 17 00:00:00 2001 From: Yiltan Date: Wed, 30 Apr 2025 08:57:55 -0400 Subject: [PATCH] Added XNACK support (#94) * Added xnack flags * Updated examples compile command --- CMakeLists.txt | 11 ++++---- examples/CMakeLists.txt | 9 +++--- examples/rocshmem_allreduce_test.cc | 38 ++++++++++++++++++------- examples/rocshmem_alltoall_test.cc | 40 +++++++++++++++++++-------- examples/rocshmem_broadcast_test.cc | 40 +++++++++++++++++++-------- examples/rocshmem_getmem_test.cc | 38 ++++++++++++++++++------- examples/rocshmem_init_attr_test.cc | 40 +++++++++++++++++++-------- examples/rocshmem_put_signal_test.cc | 38 ++++++++++++++++++------- tests/functional_tests/CMakeLists.txt | 9 +++--- tests/unit_tests/CMakeLists.txt | 8 +++--- 10 files changed, 189 insertions(+), 82 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 954d952dcc..c90975486e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,8 +97,10 @@ set(ROCMCHECKS_WARN_TOOLCHAIN_VAR OFF) include(cmake/rocm_local_targets.cmake) set(DEFAULT_GPUS - gfx90a - gfx942) + gfx90a:xnack-; + gfx90a:xnack+; + gfx942:xnack-; + gfx942:xnack+) ############################################################################### # PROJECT @@ -146,10 +148,9 @@ if (NOT BUILD_TESTS_ONLY) message(STATUS "Compiling for ${COMPILING_TARGETS}") foreach (target ${COMPILING_TARGETS}) - list(APPEND static_link_flags --offload-arch=${target}) + list(APPEND offload_flags --offload-arch=${target}) endforeach() - list(JOIN static_link_flags " " flags_str) - add_compile_options(${flags_str}) + add_compile_options(${offload_flags}) ############################################################################# # PACKAGE DEPENDENCIES diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0a8eed03ec..d8df62843e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -50,15 +50,14 @@ foreach(SOURCE_FILE IN LISTS EXAMPLE_SOURCES) ) foreach (target ${DEFAULT_GPUS}) - list(APPEND static_link_flags --offload-arch=${target}) + list(APPEND offload_flags --offload-arch=${target}) endforeach() - list(JOIN static_link_flags " " flags_str) target_compile_options( ${EXECUTABLE_NAME} PRIVATE - ${flags_str} - -fgpu-rdc + ${offload_flags} + -fgpu-rdc ) target_link_libraries( @@ -66,7 +65,7 @@ foreach(SOURCE_FILE IN LISTS EXAMPLE_SOURCES) PRIVATE ${MPI_mpi_LIBRARY} ${MPI_mpicxx_LIBRARY} - ${flags_str} + ${offload_flags} -L${ROCSHMEM_HOME}/lib -lamdhip64 -lhsa-runtime64 diff --git a/examples/rocshmem_allreduce_test.cc b/examples/rocshmem_allreduce_test.cc index cc31cf8560..cdf8d21c01 100644 --- a/examples/rocshmem_allreduce_test.cc +++ b/examples/rocshmem_allreduce_test.cc @@ -23,18 +23,36 @@ *****************************************************************************/ /* -hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \ - -I/opt/rocm/include \ - -I$ROCSHMEM_INSTALL_DIR/include \ - -I$OPENMPI_UCX_INSTALL_DIR/include/ + * First find your offload target, and if xnack is enabled/disabled using -hipcc -fgpu-rdc --hip-link rocshmem_allreduce_test.o -o rocshmem_allreduce_test \ - $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ - $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ - -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + rocminfo | grep amdgcn -ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_allreduce_test -*/ + * It should output a string like so: + + "Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-" + + * This lists the offload taret (gfx942) and that xnack is disabled (xnack-). + * Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands. + * Please modify the compile and link commands to suit your system + + * To compile: + hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \ + --offload-arch=: \ + -I/opt/rocm/include \ + -I$ROCSHMEM_INSTALL_DIR/include \ + -I$OPENMPI_UCX_INSTALL_DIR/include/ + + * To link: + hipcc -fgpu-rdc --hip-link rocshmem_allreduce_test.o -o rocshmem_allreduce_test \ + --offload-arch=: \ + $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ + $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ + -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + + * To run: + mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_allreduce_test + + */ #include diff --git a/examples/rocshmem_alltoall_test.cc b/examples/rocshmem_alltoall_test.cc index 1a5a9e3056..3d5f0fb153 100644 --- a/examples/rocshmem_alltoall_test.cc +++ b/examples/rocshmem_alltoall_test.cc @@ -23,18 +23,36 @@ *****************************************************************************/ /* -hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \ - -I/opt/rocm/include \ - -I$ROCSHMEM_INSTALL_DIR/include \ - -I$OPENMPI_UCX_INSTALL_DIR/include/ + * First find your offload target, and if xnack is enabled/disabled using -hipcc -fgpu-rdc --hip-link rocshmem_alltoall_test.o -o rocshmem_alltoall_test \ - $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ - $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ - -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + rocminfo | grep amdgcn -ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_alltoall_test -*/ + * It should output a string like so: + + "Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-" + + * This lists the offload taret (gfx942) and that xnack is disabled (xnack-). + * Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands. + * Please modify the compile and link commands to suit your system + + * To compile: + hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \ + --offload-arch=: \ + -I/opt/rocm/include \ + -I$ROCSHMEM_INSTALL_DIR/include \ + -I$OPENMPI_UCX_INSTALL_DIR/include/ + + * To link: + hipcc -fgpu-rdc --hip-link rocshmem_alltoall_test.o -o rocshmem_alltoall_test \ + --offload-arch=: \ + $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ + $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ + -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + + * To run: + mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_alltoall_test + + */ #include @@ -149,7 +167,7 @@ int main (int argc, char **argv) bool pass = check_recvbuf(dest, nelem, my_pe, npes); printf("Test %s \t nelem %d %s\n", argv[0], nelem, pass ? "[PASS]" : "[FAIL]"); - + rocshmem_free(source); rocshmem_free(dest); diff --git a/examples/rocshmem_broadcast_test.cc b/examples/rocshmem_broadcast_test.cc index a02ddcf995..382dd1237c 100644 --- a/examples/rocshmem_broadcast_test.cc +++ b/examples/rocshmem_broadcast_test.cc @@ -23,18 +23,36 @@ *****************************************************************************/ /* -hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \ - -I/opt/rocm/include \ - -I$ROCSHMEM_INSTALL_DIR/include \ - -I$OPENMPI_UCX_INSTALL_DIR/include/ + * First find your offload target, and if xnack is enabled/disabled using -hipcc -fgpu-rdc --hip-link rocshmem_broadcast_test.o -o rocshmem_broadcast_test \ - $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ - $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ - -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + rocminfo | grep amdgcn -ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_broadcast_test -*/ + * It should output a string like so: + + "Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-" + + * This lists the offload taret (gfx942) and that xnack is disabled (xnack-). + * Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands. + * Please modify the compile and link commands to suit your system + + * To compile: + hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \ + --offload-arch=: \ + -I/opt/rocm/include \ + -I$ROCSHMEM_INSTALL_DIR/include \ + -I$OPENMPI_UCX_INSTALL_DIR/include/ + + * To link: + hipcc -fgpu-rdc --hip-link rocshmem_broadcast_test.o -o rocshmem_broadcast_test \ + --offload-arch=: \ + $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ + $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ + -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + + * To run: + mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_broadcast_test + + */ #include @@ -144,7 +162,7 @@ int main(int argc, char **argv) bool pass = check_recvbuf(dest, nelem, my_pe, npes); printf("Test %s \t nelem %d %s\n", argv[0], nelem, pass ? "[PASS]" : "[FAIL]"); } - + rocshmem_free(source); rocshmem_free(dest); diff --git a/examples/rocshmem_getmem_test.cc b/examples/rocshmem_getmem_test.cc index 71bd780ab0..13bc05d3f2 100644 --- a/examples/rocshmem_getmem_test.cc +++ b/examples/rocshmem_getmem_test.cc @@ -23,18 +23,36 @@ *****************************************************************************/ /* -hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \ - -I/opt/rocm/include \ - -I$ROCSHMEM_INSTALL_DIR/include \ - -I$OPENMPI_UCX_INSTALL_DIR/include/ + * First find your offload target, and if xnack is enabled/disabled using -hipcc -fgpu-rdc --hip-link rocshmem_getmem_test.o -o rocshmem_getmem_test \ - $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ - $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ - -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + rocminfo | grep amdgcn -ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_getmem_test -*/ + * It should output a string like so: + + "Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-" + + * This lists the offload taret (gfx942) and that xnack is disabled (xnack-). + * Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands. + * Please modify the compile and link commands to suit your system + + * To compile: + hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \ + --offload-arch=: \ + -I/opt/rocm/include \ + -I$ROCSHMEM_INSTALL_DIR/include \ + -I$OPENMPI_UCX_INSTALL_DIR/include/ + + * To link: + hipcc -fgpu-rdc --hip-link rocshmem_getmem_test.o -o rocshmem_getmem_test \ + --offload-arch=: \ + $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ + $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ + -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + + * To run: + mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_getmem_test + + */ #include diff --git a/examples/rocshmem_init_attr_test.cc b/examples/rocshmem_init_attr_test.cc index d1b4d77f6c..ef1353077c 100644 --- a/examples/rocshmem_init_attr_test.cc +++ b/examples/rocshmem_init_attr_test.cc @@ -23,18 +23,36 @@ *****************************************************************************/ /* -hipcc -c -fgpu-rdc -x hip rocshmem_init_attr_test.cc \ - -I/opt/rocm/include \ - -I$ROCSHMEM_INSTALL_DIR/include \ - -I$OPENMPI_UCX_INSTALL_DIR/include/ + * First find your offload target, and if xnack is enabled/disabled using -hipcc -fgpu-rdc --hip-link rocshmem_init_attr_test.o -o rocshmem_init_attr_test \ - $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ - $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ - -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + rocminfo | grep amdgcn -ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_init_attr_test -*/ + * It should output a string like so: + + "Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-" + + * This lists the offload taret (gfx942) and that xnack is disabled (xnack-). + * Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands. + * Please modify the compile and link commands to suit your system + + * To compile: + hipcc -c -fgpu-rdc -x hip rocshmem_init_attr_test.cc \ + --offload-arch=: \ + -I/opt/rocm/include \ + -I$ROCSHMEM_INSTALL_DIR/include \ + -I$OPENMPI_UCX_INSTALL_DIR/include/ + + * To link: + hipcc -fgpu-rdc --hip-link rocshmem_init_attr_test.o -o rocshmem_init_attr_test \ + --offload-arch=: \ + $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ + $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ + -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + + * To run: + mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_init_attr_test + + */ #include @@ -92,7 +110,7 @@ int main (int argc, char **argv) std::cout << rank << ": Error in rocshmem_set_attr_uniqueid_args. Aborting.\n"; MPI_Abort (MPI_COMM_WORLD, ret); } - + ret = rocshmem_init_attr(ROCSHMEM_INIT_WITH_UNIQUEID, &attr); if (ret != ROCSHMEM_SUCCESS) { std::cout << rank << ": Error in rocshmem_init_attr. Aborting.\n"; diff --git a/examples/rocshmem_put_signal_test.cc b/examples/rocshmem_put_signal_test.cc index 78ee837325..08a38355ba 100644 --- a/examples/rocshmem_put_signal_test.cc +++ b/examples/rocshmem_put_signal_test.cc @@ -23,18 +23,36 @@ *****************************************************************************/ /* -hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \ - -I/opt/rocm/include \ - -I$ROCSHMEM_INSTALL_DIR/include \ - -I$OPENMPI_UCX_INSTALL_DIR/include/ + * First find your offload target, and if xnack is enabled/disabled using -hipcc -fgpu-rdc --hip-link rocshmem_put_signal_test.o -o rocshmem_getmem_test \ - $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ - $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ - -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + rocminfo | grep amdgcn -ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_put_signal_test -*/ + * It should output a string like so: + + "Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-" + + * This lists the offload taret (gfx942) and that xnack is disabled (xnack-). + * Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands. + * Please modify the compile and link commands to suit your system + + * To compile: + hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \ + --offload-arch=: \ + -I/opt/rocm/include \ + -I$ROCSHMEM_INSTALL_DIR/include \ + -I$OPENMPI_UCX_INSTALL_DIR/include/ + + * To link: + hipcc -fgpu-rdc --hip-link rocshmem_put_signal_test.o -o rocshmem_put_signal_test \ + --offload-arch=: \ + $ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \ + $OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \ + -L/opt/rocm/lib -lamdhip64 -lhsa-runtime64 + + * To run: + mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_put_signal_test + + */ #include diff --git a/tests/functional_tests/CMakeLists.txt b/tests/functional_tests/CMakeLists.txt index 6417e9d192..df6ccc5980 100644 --- a/tests/functional_tests/CMakeLists.txt +++ b/tests/functional_tests/CMakeLists.txt @@ -78,15 +78,14 @@ if (BUILD_TESTS_ONLY) ) foreach (target ${DEFAULT_GPUS}) - list(APPEND static_link_flags --offload-arch=${target}) + list(APPEND offload_flags --offload-arch=${target}) endforeach() - list(JOIN static_link_flags " " flags_str) target_compile_options( ${TESTS_NAME} PRIVATE - ${flags_str} - -fgpu-rdc + ${offload_flags} + -fgpu-rdc ) target_link_libraries( @@ -94,7 +93,7 @@ if (BUILD_TESTS_ONLY) PRIVATE ${MPI_mpi_LIBRARY} ${MPI_mpicxx_LIBRARY} - ${flags_str} + ${offload_flags} -L${ROCSHMEM_HOME}/lib -lamdhip64 -lhsa-runtime64 diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 5c52f9aead..2ccf33380c 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -121,15 +121,14 @@ if (BUILD_TESTS_ONLY) ) foreach (target ${DEFAULT_GPUS}) - list(APPEND static_link_flags --offload-arch=${target}) + list(APPEND offload_flags --offload-arch=${target}) endforeach() - list(JOIN static_link_flags " " flags_str) target_compile_options( ${PROJECT_NAME} PRIVATE - ${flags_str} - -fgpu-rdc + ${offload_flags} + -fgpu-rdc ) target_link_libraries( @@ -137,6 +136,7 @@ if (BUILD_TESTS_ONLY) PRIVATE ${MPI_mpi_LIBRARY} ${MPI_mpicxx_LIBRARY} + ${offload_flags} -L${ROCSHMEM_HOME}/lib -lamdhip64 -lhsa-runtime64