Added XNACK support (#94)

* Added xnack flags
* Updated examples compile command
Esse commit está contido em:
Yiltan
2025-04-30 08:57:55 -04:00
commit de GitHub
commit edcd1ed57e
10 arquivos alterados com 189 adições e 82 exclusões
+6 -5
Ver Arquivo
@@ -97,8 +97,10 @@ set(ROCMCHECKS_WARN_TOOLCHAIN_VAR OFF)
include(cmake/rocm_local_targets.cmake)
set(DEFAULT_GPUS
gfx90a
gfx942)
gfx90a:xnack-;
gfx90a:xnack+;
gfx942:xnack-;
gfx942:xnack+)
###############################################################################
# PROJECT
@@ -146,10 +148,9 @@ if (NOT BUILD_TESTS_ONLY)
message(STATUS "Compiling for ${COMPILING_TARGETS}")
foreach (target ${COMPILING_TARGETS})
list(APPEND static_link_flags --offload-arch=${target})
list(APPEND offload_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
add_compile_options(${flags_str})
add_compile_options(${offload_flags})
#############################################################################
# PACKAGE DEPENDENCIES
+4 -5
Ver Arquivo
@@ -50,15 +50,14 @@ foreach(SOURCE_FILE IN LISTS EXAMPLE_SOURCES)
)
foreach (target ${DEFAULT_GPUS})
list(APPEND static_link_flags --offload-arch=${target})
list(APPEND offload_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
target_compile_options(
${EXECUTABLE_NAME}
PRIVATE
${flags_str}
-fgpu-rdc
${offload_flags}
-fgpu-rdc
)
target_link_libraries(
@@ -66,7 +65,7 @@ foreach(SOURCE_FILE IN LISTS EXAMPLE_SOURCES)
PRIVATE
${MPI_mpi_LIBRARY}
${MPI_mpicxx_LIBRARY}
${flags_str}
${offload_flags}
-L${ROCSHMEM_HOME}/lib
-lamdhip64
-lhsa-runtime64
+28 -10
Ver Arquivo
@@ -23,18 +23,36 @@
*****************************************************************************/
/*
hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* First find your offload target, and if xnack is enabled/disabled using
hipcc -fgpu-rdc --hip-link rocshmem_allreduce_test.o -o rocshmem_allreduce_test \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
rocminfo | grep amdgcn
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_allreduce_test
*/
* It should output a string like so:
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
* Please modify the compile and link commands to suit your system
* To compile:
hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \
--offload-arch=<target>:<xnack> \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* To link:
hipcc -fgpu-rdc --hip-link rocshmem_allreduce_test.o -o rocshmem_allreduce_test \
--offload-arch=<target>:<xnack> \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
* To run:
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_allreduce_test
*/
#include <iostream>
+29 -11
Ver Arquivo
@@ -23,18 +23,36 @@
*****************************************************************************/
/*
hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* First find your offload target, and if xnack is enabled/disabled using
hipcc -fgpu-rdc --hip-link rocshmem_alltoall_test.o -o rocshmem_alltoall_test \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
rocminfo | grep amdgcn
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_alltoall_test
*/
* It should output a string like so:
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
* Please modify the compile and link commands to suit your system
* To compile:
hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \
--offload-arch=<target>:<xnack> \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* To link:
hipcc -fgpu-rdc --hip-link rocshmem_alltoall_test.o -o rocshmem_alltoall_test \
--offload-arch=<target>:<xnack> \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
* To run:
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_alltoall_test
*/
#include <iostream>
@@ -149,7 +167,7 @@ int main (int argc, char **argv)
bool pass = check_recvbuf(dest, nelem, my_pe, npes);
printf("Test %s \t nelem %d %s\n", argv[0], nelem, pass ? "[PASS]" : "[FAIL]");
rocshmem_free(source);
rocshmem_free(dest);
+29 -11
Ver Arquivo
@@ -23,18 +23,36 @@
*****************************************************************************/
/*
hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* First find your offload target, and if xnack is enabled/disabled using
hipcc -fgpu-rdc --hip-link rocshmem_broadcast_test.o -o rocshmem_broadcast_test \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
rocminfo | grep amdgcn
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_broadcast_test
*/
* It should output a string like so:
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
* Please modify the compile and link commands to suit your system
* To compile:
hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \
--offload-arch=<target>:<xnack> \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* To link:
hipcc -fgpu-rdc --hip-link rocshmem_broadcast_test.o -o rocshmem_broadcast_test \
--offload-arch=<target>:<xnack> \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
* To run:
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_broadcast_test
*/
#include <iostream>
@@ -144,7 +162,7 @@ int main(int argc, char **argv)
bool pass = check_recvbuf(dest, nelem, my_pe, npes);
printf("Test %s \t nelem %d %s\n", argv[0], nelem, pass ? "[PASS]" : "[FAIL]");
}
rocshmem_free(source);
rocshmem_free(dest);
+28 -10
Ver Arquivo
@@ -23,18 +23,36 @@
*****************************************************************************/
/*
hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* First find your offload target, and if xnack is enabled/disabled using
hipcc -fgpu-rdc --hip-link rocshmem_getmem_test.o -o rocshmem_getmem_test \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
rocminfo | grep amdgcn
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_getmem_test
*/
* It should output a string like so:
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
* Please modify the compile and link commands to suit your system
* To compile:
hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \
--offload-arch=<target>:<xnack> \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* To link:
hipcc -fgpu-rdc --hip-link rocshmem_getmem_test.o -o rocshmem_getmem_test \
--offload-arch=<target>:<xnack> \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
* To run:
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_getmem_test
*/
#include <iostream>
+29 -11
Ver Arquivo
@@ -23,18 +23,36 @@
*****************************************************************************/
/*
hipcc -c -fgpu-rdc -x hip rocshmem_init_attr_test.cc \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* First find your offload target, and if xnack is enabled/disabled using
hipcc -fgpu-rdc --hip-link rocshmem_init_attr_test.o -o rocshmem_init_attr_test \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
rocminfo | grep amdgcn
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_init_attr_test
*/
* It should output a string like so:
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
* Please modify the compile and link commands to suit your system
* To compile:
hipcc -c -fgpu-rdc -x hip rocshmem_init_attr_test.cc \
--offload-arch=<target>:<xnack> \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* To link:
hipcc -fgpu-rdc --hip-link rocshmem_init_attr_test.o -o rocshmem_init_attr_test \
--offload-arch=<target>:<xnack> \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
* To run:
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_init_attr_test
*/
#include <iostream>
@@ -92,7 +110,7 @@ int main (int argc, char **argv)
std::cout << rank << ": Error in rocshmem_set_attr_uniqueid_args. Aborting.\n";
MPI_Abort (MPI_COMM_WORLD, ret);
}
ret = rocshmem_init_attr(ROCSHMEM_INIT_WITH_UNIQUEID, &attr);
if (ret != ROCSHMEM_SUCCESS) {
std::cout << rank << ": Error in rocshmem_init_attr. Aborting.\n";
+28 -10
Ver Arquivo
@@ -23,18 +23,36 @@
*****************************************************************************/
/*
hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* First find your offload target, and if xnack is enabled/disabled using
hipcc -fgpu-rdc --hip-link rocshmem_put_signal_test.o -o rocshmem_getmem_test \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
rocminfo | grep amdgcn
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_put_signal_test
*/
* It should output a string like so:
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
* Please modify the compile and link commands to suit your system
* To compile:
hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \
--offload-arch=<target>:<xnack> \
-I/opt/rocm/include \
-I$ROCSHMEM_INSTALL_DIR/include \
-I$OPENMPI_UCX_INSTALL_DIR/include/
* To link:
hipcc -fgpu-rdc --hip-link rocshmem_put_signal_test.o -o rocshmem_put_signal_test \
--offload-arch=<target>:<xnack> \
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
* To run:
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_put_signal_test
*/
#include <iostream>
+4 -5
Ver Arquivo
@@ -78,15 +78,14 @@ if (BUILD_TESTS_ONLY)
)
foreach (target ${DEFAULT_GPUS})
list(APPEND static_link_flags --offload-arch=${target})
list(APPEND offload_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
target_compile_options(
${TESTS_NAME}
PRIVATE
${flags_str}
-fgpu-rdc
${offload_flags}
-fgpu-rdc
)
target_link_libraries(
@@ -94,7 +93,7 @@ if (BUILD_TESTS_ONLY)
PRIVATE
${MPI_mpi_LIBRARY}
${MPI_mpicxx_LIBRARY}
${flags_str}
${offload_flags}
-L${ROCSHMEM_HOME}/lib
-lamdhip64
-lhsa-runtime64
+4 -4
Ver Arquivo
@@ -121,15 +121,14 @@ if (BUILD_TESTS_ONLY)
)
foreach (target ${DEFAULT_GPUS})
list(APPEND static_link_flags --offload-arch=${target})
list(APPEND offload_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
target_compile_options(
${PROJECT_NAME}
PRIVATE
${flags_str}
-fgpu-rdc
${offload_flags}
-fgpu-rdc
)
target_link_libraries(
@@ -137,6 +136,7 @@ if (BUILD_TESTS_ONLY)
PRIVATE
${MPI_mpi_LIBRARY}
${MPI_mpicxx_LIBRARY}
${offload_flags}
-L${ROCSHMEM_HOME}/lib
-lamdhip64
-lhsa-runtime64