Added XNACK support (#94)
* Added xnack flags * Updated examples compile command
Esse commit está contido em:
+6
-5
@@ -97,8 +97,10 @@ set(ROCMCHECKS_WARN_TOOLCHAIN_VAR OFF)
|
||||
include(cmake/rocm_local_targets.cmake)
|
||||
|
||||
set(DEFAULT_GPUS
|
||||
gfx90a
|
||||
gfx942)
|
||||
gfx90a:xnack-;
|
||||
gfx90a:xnack+;
|
||||
gfx942:xnack-;
|
||||
gfx942:xnack+)
|
||||
|
||||
###############################################################################
|
||||
# PROJECT
|
||||
@@ -146,10 +148,9 @@ if (NOT BUILD_TESTS_ONLY)
|
||||
message(STATUS "Compiling for ${COMPILING_TARGETS}")
|
||||
|
||||
foreach (target ${COMPILING_TARGETS})
|
||||
list(APPEND static_link_flags --offload-arch=${target})
|
||||
list(APPEND offload_flags --offload-arch=${target})
|
||||
endforeach()
|
||||
list(JOIN static_link_flags " " flags_str)
|
||||
add_compile_options(${flags_str})
|
||||
add_compile_options(${offload_flags})
|
||||
|
||||
#############################################################################
|
||||
# PACKAGE DEPENDENCIES
|
||||
|
||||
@@ -50,15 +50,14 @@ foreach(SOURCE_FILE IN LISTS EXAMPLE_SOURCES)
|
||||
)
|
||||
|
||||
foreach (target ${DEFAULT_GPUS})
|
||||
list(APPEND static_link_flags --offload-arch=${target})
|
||||
list(APPEND offload_flags --offload-arch=${target})
|
||||
endforeach()
|
||||
list(JOIN static_link_flags " " flags_str)
|
||||
|
||||
target_compile_options(
|
||||
${EXECUTABLE_NAME}
|
||||
PRIVATE
|
||||
${flags_str}
|
||||
-fgpu-rdc
|
||||
${offload_flags}
|
||||
-fgpu-rdc
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
@@ -66,7 +65,7 @@ foreach(SOURCE_FILE IN LISTS EXAMPLE_SOURCES)
|
||||
PRIVATE
|
||||
${MPI_mpi_LIBRARY}
|
||||
${MPI_mpicxx_LIBRARY}
|
||||
${flags_str}
|
||||
${offload_flags}
|
||||
-L${ROCSHMEM_HOME}/lib
|
||||
-lamdhip64
|
||||
-lhsa-runtime64
|
||||
|
||||
@@ -23,18 +23,36 @@
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
* First find your offload target, and if xnack is enabled/disabled using
|
||||
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_allreduce_test.o -o rocshmem_allreduce_test \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
rocminfo | grep amdgcn
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_allreduce_test
|
||||
*/
|
||||
* It should output a string like so:
|
||||
|
||||
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
|
||||
|
||||
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
|
||||
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
|
||||
* Please modify the compile and link commands to suit your system
|
||||
|
||||
* To compile:
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_allreduce_test.cc \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
* To link:
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_allreduce_test.o -o rocshmem_allreduce_test \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
* To run:
|
||||
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_allreduce_test
|
||||
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
@@ -23,18 +23,36 @@
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
* First find your offload target, and if xnack is enabled/disabled using
|
||||
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_alltoall_test.o -o rocshmem_alltoall_test \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
rocminfo | grep amdgcn
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_alltoall_test
|
||||
*/
|
||||
* It should output a string like so:
|
||||
|
||||
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
|
||||
|
||||
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
|
||||
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
|
||||
* Please modify the compile and link commands to suit your system
|
||||
|
||||
* To compile:
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_alltoall_test.cc \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
* To link:
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_alltoall_test.o -o rocshmem_alltoall_test \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
* To run:
|
||||
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_alltoall_test
|
||||
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -149,7 +167,7 @@ int main (int argc, char **argv)
|
||||
bool pass = check_recvbuf(dest, nelem, my_pe, npes);
|
||||
|
||||
printf("Test %s \t nelem %d %s\n", argv[0], nelem, pass ? "[PASS]" : "[FAIL]");
|
||||
|
||||
|
||||
rocshmem_free(source);
|
||||
rocshmem_free(dest);
|
||||
|
||||
|
||||
@@ -23,18 +23,36 @@
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
* First find your offload target, and if xnack is enabled/disabled using
|
||||
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_broadcast_test.o -o rocshmem_broadcast_test \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
rocminfo | grep amdgcn
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 8 ./rocshmem_broadcast_test
|
||||
*/
|
||||
* It should output a string like so:
|
||||
|
||||
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
|
||||
|
||||
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
|
||||
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
|
||||
* Please modify the compile and link commands to suit your system
|
||||
|
||||
* To compile:
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_broadcast_test.cc \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
* To link:
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_broadcast_test.o -o rocshmem_broadcast_test \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
* To run:
|
||||
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_broadcast_test
|
||||
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -144,7 +162,7 @@ int main(int argc, char **argv)
|
||||
bool pass = check_recvbuf(dest, nelem, my_pe, npes);
|
||||
printf("Test %s \t nelem %d %s\n", argv[0], nelem, pass ? "[PASS]" : "[FAIL]");
|
||||
}
|
||||
|
||||
|
||||
rocshmem_free(source);
|
||||
rocshmem_free(dest);
|
||||
|
||||
|
||||
@@ -23,18 +23,36 @@
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
* First find your offload target, and if xnack is enabled/disabled using
|
||||
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_getmem_test.o -o rocshmem_getmem_test \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
rocminfo | grep amdgcn
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_getmem_test
|
||||
*/
|
||||
* It should output a string like so:
|
||||
|
||||
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
|
||||
|
||||
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
|
||||
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
|
||||
* Please modify the compile and link commands to suit your system
|
||||
|
||||
* To compile:
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_getmem_test.cc \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
* To link:
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_getmem_test.o -o rocshmem_getmem_test \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
* To run:
|
||||
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_getmem_test
|
||||
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
@@ -23,18 +23,36 @@
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_init_attr_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
* First find your offload target, and if xnack is enabled/disabled using
|
||||
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_init_attr_test.o -o rocshmem_init_attr_test \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
rocminfo | grep amdgcn
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_init_attr_test
|
||||
*/
|
||||
* It should output a string like so:
|
||||
|
||||
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
|
||||
|
||||
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
|
||||
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
|
||||
* Please modify the compile and link commands to suit your system
|
||||
|
||||
* To compile:
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_init_attr_test.cc \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
* To link:
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_init_attr_test.o -o rocshmem_init_attr_test \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
* To run:
|
||||
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_init_attr_test
|
||||
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -92,7 +110,7 @@ int main (int argc, char **argv)
|
||||
std::cout << rank << ": Error in rocshmem_set_attr_uniqueid_args. Aborting.\n";
|
||||
MPI_Abort (MPI_COMM_WORLD, ret);
|
||||
}
|
||||
|
||||
|
||||
ret = rocshmem_init_attr(ROCSHMEM_INIT_WITH_UNIQUEID, &attr);
|
||||
if (ret != ROCSHMEM_SUCCESS) {
|
||||
std::cout << rank << ": Error in rocshmem_init_attr. Aborting.\n";
|
||||
|
||||
@@ -23,18 +23,36 @@
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
* First find your offload target, and if xnack is enabled/disabled using
|
||||
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_put_signal_test.o -o rocshmem_getmem_test \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
rocminfo | grep amdgcn
|
||||
|
||||
ROCSHMEM_MAX_NUM_CONTEXTS=2 mpirun -np 2 ./rocshmem_put_signal_test
|
||||
*/
|
||||
* It should output a string like so:
|
||||
|
||||
"Name: amdgcn-amd-amdhsa--gfx942:sramecc+:xnack-"
|
||||
|
||||
* This lists the offload taret (gfx942) and that xnack is disabled (xnack-).
|
||||
* Therefore, we need to specify --offload-arch=gfx942:xnack- to our link and compile commands.
|
||||
* Please modify the compile and link commands to suit your system
|
||||
|
||||
* To compile:
|
||||
hipcc -c -fgpu-rdc -x hip rocshmem_put_signal_test.cc \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
-I/opt/rocm/include \
|
||||
-I$ROCSHMEM_INSTALL_DIR/include \
|
||||
-I$OPENMPI_UCX_INSTALL_DIR/include/
|
||||
|
||||
* To link:
|
||||
hipcc -fgpu-rdc --hip-link rocshmem_put_signal_test.o -o rocshmem_put_signal_test \
|
||||
--offload-arch=<target>:<xnack> \
|
||||
$ROCSHMEM_INSTALL_DIR/lib/librocshmem.a \
|
||||
$OPENMPI_UCX_INSTALL_DIR/lib/libmpi.so \
|
||||
-L/opt/rocm/lib -lamdhip64 -lhsa-runtime64
|
||||
|
||||
* To run:
|
||||
mpirun -np 8 -x ROCSHMEM_MAX_NUM_CONTEXTS=2 ./rocshmem_put_signal_test
|
||||
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
@@ -78,15 +78,14 @@ if (BUILD_TESTS_ONLY)
|
||||
)
|
||||
|
||||
foreach (target ${DEFAULT_GPUS})
|
||||
list(APPEND static_link_flags --offload-arch=${target})
|
||||
list(APPEND offload_flags --offload-arch=${target})
|
||||
endforeach()
|
||||
list(JOIN static_link_flags " " flags_str)
|
||||
|
||||
target_compile_options(
|
||||
${TESTS_NAME}
|
||||
PRIVATE
|
||||
${flags_str}
|
||||
-fgpu-rdc
|
||||
${offload_flags}
|
||||
-fgpu-rdc
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
@@ -94,7 +93,7 @@ if (BUILD_TESTS_ONLY)
|
||||
PRIVATE
|
||||
${MPI_mpi_LIBRARY}
|
||||
${MPI_mpicxx_LIBRARY}
|
||||
${flags_str}
|
||||
${offload_flags}
|
||||
-L${ROCSHMEM_HOME}/lib
|
||||
-lamdhip64
|
||||
-lhsa-runtime64
|
||||
|
||||
@@ -121,15 +121,14 @@ if (BUILD_TESTS_ONLY)
|
||||
)
|
||||
|
||||
foreach (target ${DEFAULT_GPUS})
|
||||
list(APPEND static_link_flags --offload-arch=${target})
|
||||
list(APPEND offload_flags --offload-arch=${target})
|
||||
endforeach()
|
||||
list(JOIN static_link_flags " " flags_str)
|
||||
|
||||
target_compile_options(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
${flags_str}
|
||||
-fgpu-rdc
|
||||
${offload_flags}
|
||||
-fgpu-rdc
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
@@ -137,6 +136,7 @@ if (BUILD_TESTS_ONLY)
|
||||
PRIVATE
|
||||
${MPI_mpi_LIBRARY}
|
||||
${MPI_mpicxx_LIBRARY}
|
||||
${offload_flags}
|
||||
-L${ROCSHMEM_HOME}/lib
|
||||
-lamdhip64
|
||||
-lhsa-runtime64
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário