From b0e3a2ac640f60892faeb539faa4cf0d66bb5e2d Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Wed, 1 May 2019 12:58:04 -0700 Subject: [PATCH] allow using different memory types for input and output buffers [ROCm/rccl-tests commit: 3f89175af5bc93db0d36758a0e4217f49b7147fa] --- projects/rccl-tests/src/common.cu | 37 ++++++++++++++++++++++++++----- projects/rccl-tests/src/common.h | 15 +++++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/projects/rccl-tests/src/common.cu b/projects/rccl-tests/src/common.cu index 81351e0433..61084eb1bd 100644 --- a/projects/rccl-tests/src/common.cu +++ b/projects/rccl-tests/src/common.cu @@ -22,6 +22,7 @@ const char *test_typenames[ncclNumTypes] = {"char", "int", "half", "float", "dou #endif ncclRedOp_t test_ops[ncclNumOps] = {ncclSum, ncclProd, ncclMax, ncclMin}; const char *test_opnames[ncclNumOps] = {"sum", "prod", "max", "min"}; +const char *test_memorytypes[nccl_NUM_MTYPES] = {"coarse", "fine", "host"}; thread_local int is_main_thread = 0; @@ -41,6 +42,7 @@ static int nccltype = ncclFloat; static int ncclroot = 0; static int parallel_init = 0; static int blocking_coll = 0; +static int memorytype = 0; double parsesize(char *value) { long long int units; @@ -579,10 +581,22 @@ testResult_t threadLaunch(struct testThread* thread) { } testResult_t AllocateBuffs(void **sendbuff, size_t sendBytes, void **recvbuff, size_t recvBytes, void **expected, size_t nbytes, int nranks) { + if (memorytype == ncclFine) { + HIPCHECK(hipExtMallocWithFlags(sendbuff, nbytes, hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags(recvbuff, nbytes, hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags(expected, recvBytes, hipDeviceMallocFinegrained)); + } + else if (memorytype == ncclHost) { + HIPCHECK(hipHostMalloc(sendbuff, nbytes)); + HIPCHECK(hipHostMalloc(recvbuff, nbytes)); + HIPCHECK(hipHostMalloc(expected, recvBytes)); + } + else { HIPCHECK(hipMalloc(sendbuff, nbytes)); HIPCHECK(hipMalloc(recvbuff, nbytes)); HIPCHECK(hipMalloc(expected, recvBytes)); - return testSuccess; + } + return testSuccess; } testResult_t run(); // Main function @@ -609,12 +623,13 @@ int main(int argc, char* argv[]) { {"datatype", required_argument, 0, 'd'}, {"root", required_argument, 0, 'r'}, {"blocking", required_argument, 0, 'z'}, + {"memory_type", required_argument, 0, 'y'}, {"help", no_argument, 0, 'h'} }; while(1) { int c; - c = getopt_long(argc, argv, "t:g:b:e:i:f:n:m:w:p:c:o:d:r:z:h", longopts, &longindex); + c = getopt_long(argc, argv, "t:g:b:e:i:f:n:m:w:p:c:o:d:r:z:y:h", longopts, &longindex); if (c == -1) break; @@ -669,6 +684,9 @@ int main(int argc, char* argv[]) { case 'z': blocking_coll = strtol(optarg, NULL, 0); break; + case 'y': + memorytype = ncclstringtomtype(optarg); + break; case 'h': printf("USAGE: %s \n\t" "[-t,--nthreads ] \n\t" @@ -686,6 +704,7 @@ int main(int argc, char* argv[]) { "[-d,--datatype ] \n\t" "[-r,--root ] \n\t" "[-z,--blocking <0/1>] \n\t" + "[-y,--memory_type ] \n\t" "[-h,--help]\n", basename(argv[0])); return 0; @@ -707,6 +726,7 @@ int main(int argc, char* argv[]) { "[-d,--datatype ] \n\t" "[-r,--root ] \n\t" "[-z,--blocking <0/1>] \n\t" + "[-y,--memory_type ] \n\t" "[-h,--help]\n", basename(argv[0])); return 0; @@ -890,9 +910,16 @@ testResult_t run() { // Free off HIP allocated memory for (int i=0; i