GDA support for alltoall via rocshmem integration (#2099)
* ROCSHMEM linking/building to match MSCCL++ style
* add rocSHMEM as a submodule
* Move rocSHMEM submodule to ext-src/rocSHMEM
* Adding submodule support proper, as well as a patch for rocshmem
* Cleaning up INCLUDE_DIR vs INCLUDE_DIRS mixup
* updating patch file
* Pointing rocshmem submodule to edgars fixup patch
* Adding IBVERBS link to the submodule build
* More IBVERBS patching
* pin rocshmem submodule to b534423
* Adding IPC support in rocSHMEM build
* updating rocshmem submodule to resolve CQ errors
* Updating submodule to include recent a2a optimizations
* invoke rocshmem alltoall from rccl
* Updating submodule to CQ error number hang
* Updating submodule to include a2a improvements and bug fixes
* Updating submodule to point to Yiltan's fork and doorbell ring removal commit
* Updating hash to correspond with submodule change
* Updating to no-ctx wg call and updating submodule
* copy-in/copy-out using multiples CUs
* Updating rocSHMEM submodule to include doorbell improvs
* updating gitmodule to point to upstream
* code cleanup and adjust threashold
* guard rocshmem a2a invocation
* Only build with rocshmem when specified
* code cleanup
* address review comments
* Removing debugging failure case
Signed-off-by: Thomas Huber <thomas.huber@amd.com>
* whitespace fix
* Adding rocshmem compile guard
* Removing unneccesary comment
Signed-off-by: Thomas Huber <thomas.huber@amd.com>
* remove commented lines
* address review comments
* cleanup
---------
Signed-off-by: Thomas Huber <thomas.huber@amd.com>
Co-authored-by: Thomas Huber <thomas.huber@amd.com>
Co-authored-by: Nusrat Islam <nusislam@dell300x-ccs-aus-k12-27.cs-aus.dcgpu>
Co-authored-by: Nusrat Islam <nusislam@dell300x-ccs-aus-k13-09.cs-aus.dcgpu>
Co-authored-by: Islam <nusislam@amd.com>
Co-authored-by: Nusrat Islam <nusislam@dell300x-ccs-aus-k13-03.cs-aus.dcgpu>
Цей коміт міститься в:
+12
-1
@@ -41,6 +41,7 @@ force_reduce_pipeline=false
|
||||
generate_sym_kernels=false
|
||||
warp_speed_enabled=true # note that this flag will be overridden to false for non MI350/MI300 platforms
|
||||
quiet_warnings=false
|
||||
build_rocshmem_support=false
|
||||
|
||||
# #################################################
|
||||
# helper functions
|
||||
@@ -82,6 +83,7 @@ function display_help()
|
||||
echo " --force-reduce-pipeline Force reduce_copy sw pipeline to be used for every reduce-based collectives and datatypes"
|
||||
echo " --generate-sym-kernels Generate symmetric memory kernels"
|
||||
echo " -q|--quiet-warnings Suppress majority of compiler warnings (not recommended)"
|
||||
echo " --rocshmem Build with rocSHMEM support"
|
||||
}
|
||||
|
||||
# #################################################
|
||||
@@ -91,7 +93,7 @@ function display_help()
|
||||
# check if we have a modern version of getopt that can handle whitespace and long parameters
|
||||
getopt -T
|
||||
if [[ "$?" -eq 4 ]]; then
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprtq --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,kernel-resource-use,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,quiet-warnings,disable-warp-speed,verbose -- "$@")
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --options cdfhij:lprtq --longoptions address-sanitizer,dependencies,debug,dump-asm,enable-code-coverage,enable_backtrace,disable-colltrace,disable-msccl-kernel,enable-mscclpp,fast,help,install,jobs:,kernel-resource-use,local_gpu_only,amdgpu_targets:,no_clean,npkit-enable,log-trace,openmp-test-enable,roctx-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,static,tests_build,time-trace,force-reduce-pipeline,generate-sym-kernels,quiet-warnings,disable-warp-speed,verbose,rocshmem -- "$@")
|
||||
else
|
||||
echo "Need a new version of getopt"
|
||||
exit 1
|
||||
@@ -140,6 +142,7 @@ while true; do
|
||||
--generate-sym-kernels) generate_sym_kernels=true; shift ;;
|
||||
--disable-warp-speed) warp_speed_enabled=false; shift ;;
|
||||
-q | --quiet-warnings) quiet_warnings=true; shift ;;
|
||||
--rocshmem) build_rocshmem_support=true; shift ;;
|
||||
--) shift ; break ;;
|
||||
*) echo "Unexpected command line parameter received; aborting";
|
||||
exit 1
|
||||
@@ -329,6 +332,14 @@ if [[ "${quiet_warnings}" == true ]]; then
|
||||
fi
|
||||
|
||||
|
||||
# Enable rocSHMEM support
|
||||
if [[ "${build_rocshmem_support}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DENABLE_ROCSHMEM=ON"
|
||||
cmake_common_options="${cmake_common_options} -DROCSHMEM_INSTALL_DIR=${ROCSHMEM_INSTALL_DIR}"
|
||||
else
|
||||
cmake_common_options="${cmake_common_options} -DENABLE_ROCSHMEM=OFF"
|
||||
fi
|
||||
|
||||
check_exit_code "$?"
|
||||
|
||||
# Enable ninja build for time tracing
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача