Add detection of cooperative multi device launch attribute (#345)

Этот коммит содержится в:
TomSang
2021-04-11 16:29:24 -04:00
коммит произвёл GitHub
родитель def8b4ca0d
Коммит 87f12cbb86
+9 -3
Просмотреть файл
@@ -573,12 +573,18 @@ ncclResult_t ncclCommSetIntra(struct ncclComm* comm, int rank, int ranks, struct
}
if (comm->launchMode == ncclComm::GROUP) {
CUDACHECK(hipStreamCreateWithFlags(&comm->groupStream, hipStreamNonBlocking));
#if CUDART_VERSION >= 9000
if (*comm->intraCC && (ncclCudaCompCap() == *comm->intraCC)) {
// Check whether the GPU supports Cooperative Group Multi Device Launch
(void) hipDeviceGetAttribute(&cgMdLaunch, cudaDevAttrCooperativeMultiDeviceLaunch, comm->cudaDev);
hipError_t ret = hipDeviceGetAttribute(&cgMdLaunch, hipDeviceAttributeCooperativeMultiDeviceLaunch, comm->cudaDev);
if (ret != hipSuccess) {
INFO(NCCL_INIT, "hipDeviceGetAttribute(hipDeviceAttributeCooperativeMultiDeviceLaunch, %d) failed with %s",
comm->cudaDev, hipGetErrorString(ret));
return ncclInternalError;
}
if (!cgMdLaunch) {
INFO(NCCL_INIT, "Multi-GPU cooperative launch support not available for device %d", comm->cudaDev);
}
}
#endif
}
// Disable cgMdLaunch if any rank does not support it