diff --git a/src/common.cu b/src/common.cu index 0990621e88..edcb1d389c 100644 --- a/src/common.cu +++ b/src/common.cu @@ -1040,9 +1040,15 @@ testResult_t AllocateBuffs(void **sendbuff, size_t sendBytes, void **recvbuff, s #endif } else { +#if NCCL_VERSION_CODE >= NCCL_VERSION(2,19,0) + NCCLCHECK(ncclMemAlloc(sendbuff, nbytes)); + NCCLCHECK(ncclMemAlloc(recvbuff, nbytes)); + if (datacheck) NCCLCHECK(ncclMemAlloc(expected, recvBytes)); +#else CUDACHECK(cudaMalloc(sendbuff, nbytes)); CUDACHECK(cudaMalloc(recvbuff, nbytes)); if (datacheck) CUDACHECK(cudaMalloc(expected, recvBytes)); +#endif } CUDACHECK(hipMemset(*sendbuff, 1, nbytes)); if (datacheck) CUDACHECK(hipMemset(*expected, 1, recvBytes)); @@ -1676,9 +1682,15 @@ testResult_t run() { // Free off CUDA allocated memory for (int i=0; i= NCCL_VERSION(2,19,0) + if (sendbuffs[i]) NCCLCHECK(ncclMemFree((char*)sendbuffs[i])); + if (recvbuffs[i]) NCCLCHECK(ncclMemFree((char*)recvbuffs[i])); + if (datacheck) NCCLCHECK(ncclMemFree(expected[i])); +#else if (sendbuffs[i]) CUDACHECK(cudaFree((char*)sendbuffs[i])); if (recvbuffs[i]) CUDACHECK(cudaFree((char*)recvbuffs[i])); if (datacheck) CUDACHECK(cudaFree(expected[i])); +#endif } CUDACHECK(cudaFreeHost(delta)); #if NCCL_VERSION_CODE >= NCCL_VERSION(2,19,0)