27648b0900
* ROCSHMEM linking/building to match MSCCL++ style
* add rocSHMEM as a submodule
* Move rocSHMEM submodule to ext-src/rocSHMEM
* Adding submodule support proper, as well as a patch for rocshmem
* Cleaning up INCLUDE_DIR vs INCLUDE_DIRS mixup
* updating patch file
* Pointing rocshmem submodule to edgars fixup patch
* Adding IBVERBS link to the submodule build
* More IBVERBS patching
* pin rocshmem submodule to b534423
* Adding IPC support in rocSHMEM build
* updating rocshmem submodule to resolve CQ errors
* Updating submodule to include recent a2a optimizations
* invoke rocshmem alltoall from rccl
* Updating submodule to CQ error number hang
* Updating submodule to include a2a improvements and bug fixes
* Updating submodule to point to Yiltan's fork and doorbell ring removal commit
* Updating hash to correspond with submodule change
* Updating to no-ctx wg call and updating submodule
* copy-in/copy-out using multiples CUs
* Updating rocSHMEM submodule to include doorbell improvs
* updating gitmodule to point to upstream
* code cleanup and adjust threashold
* guard rocshmem a2a invocation
* Only build with rocshmem when specified
* code cleanup
* address review comments
* Removing debugging failure case
Signed-off-by: Thomas Huber <thomas.huber@amd.com>
* whitespace fix
* Adding rocshmem compile guard
* Removing unneccesary comment
Signed-off-by: Thomas Huber <thomas.huber@amd.com>
* remove commented lines
* address review comments
* cleanup
---------
Signed-off-by: Thomas Huber <thomas.huber@amd.com>
Co-authored-by: Thomas Huber <thomas.huber@amd.com>
Co-authored-by: Nusrat Islam <nusislam@dell300x-ccs-aus-k12-27.cs-aus.dcgpu>
Co-authored-by: Nusrat Islam <nusislam@dell300x-ccs-aus-k13-09.cs-aus.dcgpu>
Co-authored-by: Islam <nusislam@amd.com>
Co-authored-by: Nusrat Islam <nusislam@dell300x-ccs-aus-k13-03.cs-aus.dcgpu>
114 行
4.9 KiB
CMake
114 行
4.9 KiB
CMake
# MIT License
|
|
#
|
|
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
include(ExternalProject)
|
|
|
|
function(add_rocshmem_targets)
|
|
|
|
# Check for an existing installation via the user-provided prefix ROCSHMEM_INSTALL DIR
|
|
if(ROCSHMEM_INSTALL_DIR)
|
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
|
find_package(rocshmem_static)
|
|
if(NOT IBVERBS)
|
|
find_library(IBVERBS ibverbs)
|
|
if(IBVERBS)
|
|
set(IBVERBS ${IBVERBS} PARENT_SCOPE)
|
|
endif()
|
|
endif()
|
|
endif()
|
|
|
|
# If no pre-existing installation, build from submodule into ext/rocshmem
|
|
if(NOT rocshmem_static_FOUND)
|
|
set(_rccl_root "${CMAKE_SOURCE_DIR}")
|
|
set(ROCSHMEM_SOURCE "${_rccl_root}/ext-src/rocSHMEM")
|
|
set(ROCSHMEM_INSTALL_DIR "${_rccl_root}/ext/rocshmem")
|
|
|
|
# Make sure submodule exists (same style as MSCCL++: custom rule + target)
|
|
add_custom_command(
|
|
OUTPUT "${ROCSHMEM_SOURCE}/CMakeLists.txt"
|
|
COMMAND git submodule update --init --recursive ext-src/rocSHMEM
|
|
WORKING_DIRECTORY "${_rccl_root}"
|
|
COMMENT "Checking out submodule: ext-src/rocSHMEM"
|
|
VERBATIM
|
|
)
|
|
|
|
add_custom_target(rocshmem_checkout_submodule
|
|
DEPENDS "${ROCSHMEM_SOURCE}/CMakeLists.txt")
|
|
|
|
# Where our patch files live (like MSCCL++)
|
|
set(EXT_SOURCE "${_rccl_root}/ext-src")
|
|
|
|
# Build and install rocSHMEM. We run `../build_scripts/gdx_bxnt`
|
|
# from a 'build' dir just like the README shows.
|
|
ExternalProject_Add(rocshmem_ext
|
|
SOURCE_DIR "${ROCSHMEM_SOURCE}"
|
|
INSTALL_DIR "${ROCSHMEM_INSTALL_DIR}"
|
|
UPDATE_DISCONNECTED TRUE
|
|
LOG_DOWNLOAD FALSE
|
|
LOG_CONFIGURE FALSE
|
|
LOG_BUILD FALSE
|
|
LOG_INSTALL FALSE
|
|
BUILD_IN_SOURCE TRUE
|
|
DOWNLOAD_COMMAND "" # using the submodule checkout above
|
|
TEST_COMMAND ""
|
|
DEPENDS rocshmem_checkout_submodule
|
|
|
|
# Rocshmem submodule commit hash -> commit b28a56bd54ccc581d05a439ffa466c3dacb3385
|
|
# The project has its own scripts; we replicate the README sequence:
|
|
CONFIGURE_COMMAND ""
|
|
BUILD_COMMAND
|
|
${CMAKE_COMMAND} -E make_directory build
|
|
&& ${CMAKE_COMMAND} -E chdir build bash -lc "../scripts/build_configs/gda_bnxt -DUSE_EXTERNAL_MPI=OFF -DUSE_IPC=ON -DBUILD_EXAMPLES=OFF "
|
|
&& ${CMAKE_COMMAND} -E chdir build ${CMAKE_COMMAND}
|
|
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
|
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
|
-DBUILD_EXAMPLES=OFF ..
|
|
&& ${CMAKE_COMMAND} -E chdir build ${CMAKE_MAKE_PROGRAM} -j
|
|
INSTALL_COMMAND
|
|
${CMAKE_COMMAND} -E chdir build ${CMAKE_MAKE_PROGRAM} install
|
|
)
|
|
|
|
# After build, define the variables RCCL expects
|
|
set(ROCSHMEM_INCLUDE_DIR "${ROCSHMEM_INSTALL_DIR}/include" PARENT_SCOPE)
|
|
set(ROCSHMEM_LIBRARY "${ROCSHMEM_INSTALL_DIR}/lib/librocshmem.a" PARENT_SCOPE)
|
|
find_library(_IBVERBS ibverbs)
|
|
if(NOT _IBVERBS)
|
|
message(FATAL_ERROR "libibverbs not found (install rdma-core/libibverbs-dev)")
|
|
endif()
|
|
set(IBVERBS ${_IBVERBS} PARENT_SCOPE)
|
|
|
|
# Provide a dummy target other code can depend on
|
|
add_custom_target(rocshmem_static ALL DEPENDS rocshmem_ext)
|
|
else()
|
|
# We found a prebuilt rocSHMEM; export variables upward as-is
|
|
set(ROCSHMEM_INCLUDE_DIR "${ROCSHMEM_INCLUDE_DIR}" PARENT_SCOPE)
|
|
set(ROCSHMEM_LIBRARY "${ROCSHMEM_LIBRARY}" PARENT_SCOPE)
|
|
|
|
find_library(_IBVERBS ibverbs)
|
|
if(NOT _IBVERBS)
|
|
message(FATAL_ERROR "libibverbs not found")
|
|
endif()
|
|
set(IBVERBS ${_IBVERBS} PARENT_SCOPE)
|
|
endif()
|
|
|
|
endfunction()
|