Two temporary workarounds for cuda-clang issues.

[ROCm/rccl commit: 346fc49514]
Tento commit je obsažen v:
Christian Sigg
2018-12-13 16:09:12 +01:00
odevzdal Sylvain Jeaugey
rodič 457c2ab5ae
revize fcf027b42d
+8
Zobrazit soubor
@@ -56,9 +56,17 @@
// Must be consistent with the ncclFuncSet enum
__device__ ncclKern_t ncclFuncs[ncclCollCount*ncclNumOps*ncclNumTypes*2] = {
// Don't try to initialize the host shadow copy of this device-side global
// variable. There is no host pointer to a device-side function, which
// confuses clang. This will be fixed in the next clang release.
#if __CUDA_ARCH__
NCCL_FUNCS2B(ncclBroadcast),
NCCL_FUNCS2A(ncclReduce),
NCCL_FUNCS2B(ncclAllGather),
NCCL_FUNCS2A(ncclReduceScatter),
NCCL_FUNCS2A(ncclAllReduce)
#endif
};
// Workaround for https://reviews.llvm.org/D55580
__device__ void ncclWorkaroundClangD55580() {}