Two temporary workarounds for cuda-clang issues.
[ROCm/rccl commit: 346fc49514]
Tento commit je obsažen v:
odevzdal
Sylvain Jeaugey
rodič
457c2ab5ae
revize
fcf027b42d
@@ -56,9 +56,17 @@
|
||||
|
||||
// Must be consistent with the ncclFuncSet enum
|
||||
__device__ ncclKern_t ncclFuncs[ncclCollCount*ncclNumOps*ncclNumTypes*2] = {
|
||||
// Don't try to initialize the host shadow copy of this device-side global
|
||||
// variable. There is no host pointer to a device-side function, which
|
||||
// confuses clang. This will be fixed in the next clang release.
|
||||
#if __CUDA_ARCH__
|
||||
NCCL_FUNCS2B(ncclBroadcast),
|
||||
NCCL_FUNCS2A(ncclReduce),
|
||||
NCCL_FUNCS2B(ncclAllGather),
|
||||
NCCL_FUNCS2A(ncclReduceScatter),
|
||||
NCCL_FUNCS2A(ncclAllReduce)
|
||||
#endif
|
||||
};
|
||||
|
||||
// Workaround for https://reviews.llvm.org/D55580
|
||||
__device__ void ncclWorkaroundClangD55580() {}
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele