مقایسه کامیتها
21 کامیتها
develop
...
gfx1151-rccl
| مولف | SHA1 | تاریخ | |
|---|---|---|---|
| a2686c9f41 | |||
| 532214edfb | |||
| aec38e7dde | |||
| f4b6e5f450 | |||
| 0586700b06 | |||
| 3f31d17ae7 | |||
| f227312867 | |||
| 54de8024d3 | |||
| 3bd4e81a8b | |||
| 7504897fe4 | |||
| 1d5c0c1add | |||
| 2e6df33acc | |||
| cd91b85935 | |||
| 484bd5bf0f | |||
| 95b150d96a | |||
| 6289de70ad | |||
| f1f0851398 | |||
| b4f25507ec | |||
| d2ea5d5d4c | |||
| 8126402d12 | |||
| 0b8251289a |
@@ -44,6 +44,9 @@ option(TRACE "Enable additional tracing"
|
|||||||
option(FAULT_INJECTION "Enable fault injection" ON)
|
option(FAULT_INJECTION "Enable fault injection" ON)
|
||||||
option(QUIET_WARNINGS "Supress compiler warnings" OFF)
|
option(QUIET_WARNINGS "Supress compiler warnings" OFF)
|
||||||
option(ENABLE_ROCSHMEM "Enable rocSHMEM support in RCCL" OFF)
|
option(ENABLE_ROCSHMEM "Enable rocSHMEM support in RCCL" OFF)
|
||||||
|
option(ENABLE_AMDSMI "Enable AMD/ROCm SMI support" ON)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Default GPU architectures to build
|
# Default GPU architectures to build
|
||||||
#==================================================================================================
|
#==================================================================================================
|
||||||
@@ -58,7 +61,8 @@ set(DEFAULT_GPUS
|
|||||||
gfx1101
|
gfx1101
|
||||||
gfx1102
|
gfx1102
|
||||||
gfx1200
|
gfx1200
|
||||||
gfx1201)
|
gfx1201
|
||||||
|
gfx1151)
|
||||||
|
|
||||||
# Load CMake modules
|
# Load CMake modules
|
||||||
#==================================================================================================
|
#==================================================================================================
|
||||||
@@ -116,9 +120,23 @@ else()
|
|||||||
set(SUPPORTED_GPUS ${DEFAULT_GPUS})
|
set(SUPPORTED_GPUS ${DEFAULT_GPUS})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(GPU_TARGETS "${SUPPORTED_GPUS}")
|
||||||
set(GPU_TARGETS "${SUPPORTED_GPUS}")
|
set(GPU_TARGETS "${SUPPORTED_GPUS}")
|
||||||
message(STATUS "Compiling for ${GPU_TARGETS}")
|
message(STATUS "Compiling for ${GPU_TARGETS}")
|
||||||
|
|
||||||
|
# Auto-disable SMI for gfx1151 as it is not supported
|
||||||
|
if("${GPU_TARGETS}" MATCHES "gfx1151")
|
||||||
|
message(STATUS "Detected gfx1151 target: Forcing ENABLE_AMDSMI=OFF")
|
||||||
|
set(ENABLE_AMDSMI OFF CACHE BOOL "Force disable SMI for gfx1151" FORCE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(ENABLE_AMDSMI)
|
||||||
|
message(STATUS "SMI Support: ENABLED")
|
||||||
|
add_compile_definitions(RCCL_SMI_ENABLED)
|
||||||
|
else()
|
||||||
|
message(STATUS "SMI Support: DISABLED")
|
||||||
|
endif()
|
||||||
|
|
||||||
## NOTE: Reload rocm-cmake in order to update GPU_TARGETS
|
## NOTE: Reload rocm-cmake in order to update GPU_TARGETS
|
||||||
include(cmake/Dependencies.cmake) # Reloading to use desired GPU_TARGETS instead of defaults
|
include(cmake/Dependencies.cmake) # Reloading to use desired GPU_TARGETS instead of defaults
|
||||||
|
|
||||||
@@ -264,7 +282,7 @@ get_target_property(HSA_INCLUDE_PATH hsa-runtime64::hsa-runtime64 INTERFACE_INCL
|
|||||||
message(STATUS "HSA runtime: ${HSA_INCLUDE_PATH}")
|
message(STATUS "HSA runtime: ${HSA_INCLUDE_PATH}")
|
||||||
|
|
||||||
## Check for amd-smi if ROCm 7.11.0 or newer
|
## Check for amd-smi if ROCm 7.11.0 or newer
|
||||||
if(ROCM_VERSION VERSION_GREATER_EQUAL "71100")
|
if(ROCM_VERSION VERSION_GREATER_EQUAL "71100" AND ENABLE_AMDSMI)
|
||||||
find_package(amd_smi PATHS ${ROCM_PATH}/lib/cmake/amd_smi)
|
find_package(amd_smi PATHS ${ROCM_PATH}/lib/cmake/amd_smi)
|
||||||
if(amd_smi_FOUND)
|
if(amd_smi_FOUND)
|
||||||
message(STATUS "amd_smi_INCLUDE_DIR: ${amd_smi_INCLUDE_DIR}")
|
message(STATUS "amd_smi_INCLUDE_DIR: ${amd_smi_INCLUDE_DIR}")
|
||||||
@@ -281,38 +299,7 @@ if(ROCM_VERSION VERSION_GREATER_EQUAL "71100")
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT USE_AMDSMI)
|
|
||||||
## Fallback to rocm-smi if amd-smi not found or ROCm < 7.11.0
|
|
||||||
message(WARNING "Could not find amd_smi. Falling back to rocm_smi.")
|
|
||||||
find_package(rocm_smi PATHS ${ROCM_PATH}/lib/cmake/rocm_smi)
|
|
||||||
if(rocm_smi_FOUND)
|
|
||||||
set(SMI_INCLUDE_DIR "${rocm_smi_INCLUDE_DIR}" CACHE INTERNAL "rocm-smi include directory")
|
|
||||||
set(SMI_LIB_DIR "${rocm_smi_LIB_DIR}" CACHE INTERNAL "rocm-smi library directory")
|
|
||||||
else()
|
|
||||||
message(WARNING "CMake could not find rocm-smi. Checking old include directory structure for rocm_smi")
|
|
||||||
set(SMI_INCLUDE_DIR "${ROCM_PATH}/rocm_smi/include")
|
|
||||||
set(SMI_LIB_DIR "${ROCM_PATH}/rocm_smi/lib")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT EXISTS "${SMI_INCLUDE_DIR}" OR NOT EXISTS "${SMI_LIB_DIR}")
|
|
||||||
message(FATAL_ERROR "rocm_smi not found in ${SMI_INCLUDE_DIR}")
|
|
||||||
endif()
|
|
||||||
message(STATUS "Found rocm_smi at ${SMI_INCLUDE_DIR}")
|
|
||||||
set(SMI_LIB_NAME "rocm-smi-lib" CACHE INTERNAL "rocm-smi-lib for packaging")
|
|
||||||
set(SMI_LIBRARIES rocm_smi64)
|
|
||||||
|
|
||||||
check_include_file_cxx("${SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG)
|
|
||||||
|
|
||||||
### Check for RSMI_INIT_FLAG_THRAD_ONLY_MUTEX support
|
|
||||||
file(READ "${SMI_INCLUDE_DIR}/rocm_smi/rocm_smi.h" rocm_smi_incl)
|
|
||||||
string(FIND "${rocm_smi_incl}" "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX" matchres)
|
|
||||||
if(${matchres} EQUAL -1)
|
|
||||||
message(STATUS "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX not supported")
|
|
||||||
else()
|
|
||||||
message(STATUS "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX supported")
|
|
||||||
set(HAVE_ROCM_SMI_THREAD_ONLY_MUTEX True)
|
|
||||||
endif ()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
## Check for BFD library if custom backtrace is requested
|
## Check for BFD library if custom backtrace is requested
|
||||||
if(BUILD_BFD)
|
if(BUILD_BFD)
|
||||||
@@ -792,16 +779,26 @@ set(SRC_FILES
|
|||||||
src/misc/latency_profiler/CollTraceUtils.cc
|
src/misc/latency_profiler/CollTraceUtils.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
if(USE_AMDSMI)
|
# Unconditionally include SMI headers so they are hipified/available
|
||||||
set(SMI_SOURCES
|
set(SMI_HEADERS
|
||||||
src/include/amdsmi_wrap.h
|
src/include/rocm_smi_wrap.h
|
||||||
src/misc/amdsmi_wrap.cc
|
src/include/amdsmi_wrap.h
|
||||||
)
|
)
|
||||||
|
list(APPEND SRC_FILES ${SMI_HEADERS})
|
||||||
|
|
||||||
|
if(ENABLE_AMDSMI)
|
||||||
|
# Only compile the wrapper sources if SMI is enabled
|
||||||
|
if(USE_AMDSMI)
|
||||||
|
list(APPEND SRC_FILES src/misc/amdsmi_wrap.cc)
|
||||||
|
else()
|
||||||
|
list(APPEND SRC_FILES src/misc/rocm_smi_wrap.cc)
|
||||||
|
endif()
|
||||||
else()
|
else()
|
||||||
set(SMI_SOURCES
|
# When SMI is disabled, compile the shim to provide dummy symbols (rsmi_init)
|
||||||
src/include/rocm_smi_wrap.h
|
# This satisfies external dependencies (like PyTorch) that expect SMI symbols
|
||||||
src/misc/rocm_smi_wrap.cc
|
# to be present, preventing them from failing to load or trying to load
|
||||||
)
|
# the broken system library.
|
||||||
|
list(APPEND SRC_FILES src/misc/smi_shim.cc)
|
||||||
endif()
|
endif()
|
||||||
list(APPEND SRC_FILES ${SMI_SOURCES})
|
list(APPEND SRC_FILES ${SMI_SOURCES})
|
||||||
|
|
||||||
@@ -1206,12 +1203,12 @@ if (HAVE_PARALLEL_JOBS)
|
|||||||
target_compile_options(rccl PRIVATE -parallel-jobs=12)
|
target_compile_options(rccl PRIVATE -parallel-jobs=12)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ROCM_VERSION VERSION_GREATER_EQUAL "60200")
|
if (ROCM_VERSION VERSION_GREATER_EQUAL "60200" AND NOT NO_COMPRESS)
|
||||||
target_compile_options(rccl PRIVATE --offload-compress) # Compress GPU code at compile time.
|
target_compile_options(rccl PRIVATE --offload-compress) # Compress GPU code at compile time.
|
||||||
target_link_libraries(rccl PRIVATE --offload-compress) # Compress GPU code at link time.
|
target_link_libraries(rccl PRIVATE --offload-compress) # Compress GPU code at link time.
|
||||||
message(STATUS "--offload-compress enabled - ROCm version >= 6.2.0")
|
message(STATUS "--offload-compress enabled - ROCm version >= 6.2.0")
|
||||||
else()
|
else()
|
||||||
message(STATUS "--offload-compress disabled - ROCm version < 6.2.0")
|
message(STATUS "--offload-compress disabled (ROCM < 6.2.0 or NO_COMPRESS=ON)")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_compile_options(rccl PRIVATE -Werror=uninitialized)
|
target_compile_options(rccl PRIVATE -Werror=uninitialized)
|
||||||
|
|||||||
@@ -26,7 +26,7 @@
|
|||||||
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
|
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
|
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1200__) || defined(__gfx1201__)
|
||||||
#define __trace_hwreg() \
|
#define __trace_hwreg() \
|
||||||
collTrace->data_0 = 0;
|
collTrace->data_0 = 0;
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -1019,7 +1019,7 @@ ncclResult_t ncclTopoGetAlgoTime(struct ncclComm* comm, int coll, int algorithm,
|
|||||||
int rcclGetTuningIndexForArch(const char* gfxarch) {
|
int rcclGetTuningIndexForArch(const char* gfxarch) {
|
||||||
static const std::vector<std::pair<std::string, int>> tuningIndexMap = {
|
static const std::vector<std::pair<std::string, int>> tuningIndexMap = {
|
||||||
{"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5},
|
{"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5},
|
||||||
{"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0},
|
{"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0}, {"gfx1151", 0},
|
||||||
{"gfx1200", 7}, {"gfx1201", 7}
|
{"gfx1200", 7}, {"gfx1201", 7}
|
||||||
};
|
};
|
||||||
if (gfxarch == nullptr) return 0;
|
if (gfxarch == nullptr) return 0;
|
||||||
|
|||||||
@@ -7,11 +7,20 @@
|
|||||||
#include "amd_smi/amdsmi.h"
|
#include "amd_smi/amdsmi.h"
|
||||||
#include "nccl.h"
|
#include "nccl.h"
|
||||||
|
|
||||||
|
#if defined(USE_AMDSMI) && defined(RCCL_SMI_ENABLED)
|
||||||
ncclResult_t amd_smi_init();
|
ncclResult_t amd_smi_init();
|
||||||
ncclResult_t amd_smi_shutdown();
|
ncclResult_t amd_smi_shutdown();
|
||||||
ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs);
|
ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs);
|
||||||
ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len);
|
ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len);
|
||||||
ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex);
|
ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex);
|
||||||
ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count);
|
ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count);
|
||||||
|
#else
|
||||||
|
inline ncclResult_t amd_smi_init() { return ncclSuccess; }
|
||||||
|
inline ncclResult_t amd_smi_shutdown() { return ncclSuccess; }
|
||||||
|
inline ncclResult_t amd_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; }
|
||||||
|
inline ncclResult_t amd_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; }
|
||||||
|
inline ncclResult_t amd_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { return ncclInternalError; }
|
||||||
|
inline ncclResult_t amd_smi_getLinkInfo(int srcDev, int dstDev, amdsmi_link_type_t* type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; }
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ typedef struct
|
|||||||
} rccl_bfloat8;
|
} rccl_bfloat8;
|
||||||
|
|
||||||
// __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__))
|
// __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__))
|
||||||
#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1030__))
|
#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1030__))
|
||||||
|
|
||||||
#include <hip/hip_fp8.h>
|
#include <hip/hip_fp8.h>
|
||||||
|
|
||||||
|
|||||||
@@ -29,10 +29,18 @@ THE SOFTWARE.
|
|||||||
#endif
|
#endif
|
||||||
#include "nccl.h"
|
#include "nccl.h"
|
||||||
|
|
||||||
|
#if defined(USE_ROCMSMI) && defined(RCCL_SMI_ENABLED)
|
||||||
ncclResult_t rocm_smi_init();
|
ncclResult_t rocm_smi_init();
|
||||||
ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs);
|
ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs);
|
||||||
ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len);
|
ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len);
|
||||||
ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex);
|
ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex);
|
||||||
ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count);
|
ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count);
|
||||||
|
#else
|
||||||
|
inline ncclResult_t rocm_smi_init() { return ncclSuccess; }
|
||||||
|
inline ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; }
|
||||||
|
inline ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; }
|
||||||
|
inline ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex) { *deviceIndex = 0; return ncclSuccess; }
|
||||||
|
inline ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count) { *hops=1; *count=1; return ncclSuccess; }
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
#if defined(USE_ROCMSMI)
|
||||||
/*
|
/*
|
||||||
Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
|
||||||
@@ -201,3 +202,4 @@ ncclResult_t rocm_smi_getLinkInfo(int srcIndex, int dstIndex, RSMI_IO_LINK_TYPE*
|
|||||||
|
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -0,0 +1,11 @@
|
|||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
// Dummy implementation of rsmi_init to satisfy linker dependencies
|
||||||
|
// when the real ROCm SMI library is broken or causes Bus Errors (gfx1151).
|
||||||
|
// Returns 0 (RSMI_STATUS_SUCCESS).
|
||||||
|
__attribute__((visibility("default")))
|
||||||
|
int rsmi_init(uint64_t flags) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -742,7 +742,7 @@ int getFirmwareVersion() {
|
|||||||
|
|
||||||
fw_version = info.fw_info_list[0].fw_version;
|
fw_version = info.fw_info_list[0].fw_version;
|
||||||
|
|
||||||
#else
|
#elif defined(USE_ROCMSMI)
|
||||||
rsmi_status_t ret;
|
rsmi_status_t ret;
|
||||||
ret = rsmi_init(0);
|
ret = rsmi_init(0);
|
||||||
if (ret != RSMI_STATUS_SUCCESS) {
|
if (ret != RSMI_STATUS_SUCCESS) {
|
||||||
@@ -755,6 +755,9 @@ int getFirmwareVersion() {
|
|||||||
ERROR("Could not query firmware info using rocm-smi");
|
ERROR("Could not query firmware info using rocm-smi");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
// SMI disabled
|
||||||
|
fw_version = -1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return fw_version;
|
return fw_version;
|
||||||
|
|||||||
@@ -260,6 +260,7 @@ static bool ncclSymkImplemented(ncclFunc_t coll, int/*ncclDevRedOp_t*/ red, nccl
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t ncclSymkMask(struct ncclComm* comm, ncclFunc_t coll, int/*ncclDevRedOp_t*/ red, ncclDataType_t ty, size_t nElts) {
|
static uint32_t ncclSymkMask(struct ncclComm* comm, ncclFunc_t coll, int/*ncclDevRedOp_t*/ red, ncclDataType_t ty, size_t nElts) {
|
||||||
|
#if defined(GENERATE_SYM_KERNELS) && GENERATE_SYM_KERNELS
|
||||||
uint32_t kmask = kernelMask_coll(coll);
|
uint32_t kmask = kernelMask_coll(coll);
|
||||||
kmask &= kernelMask_user();
|
kmask &= kernelMask_user();
|
||||||
|
|
||||||
@@ -299,6 +300,9 @@ static uint32_t ncclSymkMask(struct ncclComm* comm, ncclFunc_t coll, int/*ncclDe
|
|||||||
if (nBusBytes >= 32*(size_t(2)<<30)) kmask = 0;
|
if (nBusBytes >= 32*(size_t(2)<<30)) kmask = 0;
|
||||||
|
|
||||||
return kmask;
|
return kmask;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ncclSymkAvailable(struct ncclComm* comm, ncclFunc_t coll, int/*ncclDevRedOp_t*/ red,
|
bool ncclSymkAvailable(struct ncclComm* comm, ncclFunc_t coll, int/*ncclDevRedOp_t*/ red,
|
||||||
@@ -354,7 +358,7 @@ const char* ncclSymkKernelIdToString(int kernelId) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef GENERATE_SYM_KERNELS
|
#ifndef GENERATE_SYM_KERNELS
|
||||||
void* ncclSymGetKernelPtr(ncclSymkKernelId kernelId, int/*ncclDevRedOp_t*/ red, ncclDataType_t ty) {
|
void* ncclSymkGetKernelPtr(ncclSymkKernelId kernelId, int/*ncclDevRedOp_t*/ red, ncclDataType_t ty) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ THE SOFTWARE.
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Macro for collecting HW_REG_HW_ID
|
// Macro for collecting HW_REG_HW_ID
|
||||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__)
|
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__NVCC__)
|
||||||
#define GetHwId(val) \
|
#define GetHwId(val) \
|
||||||
val = 0
|
val = 0
|
||||||
#else
|
#else
|
||||||
|
|||||||
مرجع در شماره جدید
Block a user