From cfc04a8aef91c1630960baa43e656d8d3c58ab53 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:36:48 -0800 Subject: [PATCH] p2p-latency-tests: fix build by switching to gcnArchName (#1030) * p2p-latency-tests: fix build by switching to gcnArchName * rccl-prim-test: switch to gcnArchName --- tools/p2p-latency-test/ll_latency_test.cpp | 8 ++++---- tools/p2p-latency-test/p2p_latency_test.cpp | 8 ++++---- tools/rccl-prim-test/rccl_prim_test.cpp | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/p2p-latency-test/ll_latency_test.cpp b/tools/p2p-latency-test/ll_latency_test.cpp index f05b2fe72d..a26980c0c2 100644 --- a/tools/p2p-latency-test/ll_latency_test.cpp +++ b/tools/p2p-latency-test/ll_latency_test.cpp @@ -147,7 +147,7 @@ int main(int argc, char** argv) { HIPCHECK(hipStreamCreateWithFlags(&stream[0], hipStreamNonBlocking)); HIPCHECK(hipDeviceEnablePeerAccess(device_id[1], 0)); HIPCHECK(hipGetDeviceProperties(&prop[0], device_id[0])); - HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, prop[0].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipHostMalloc ((void**)&time_delta[0], sizeof(uint64_t), hipHostMallocDefault)); HIPCHECK(hipMalloc((void**)&abortFlag[0], sizeof(uint32_t))); HIPCHECK(hipMemsetAsync(flag[0], 0, HIP_IPC_MEM_MIN_SIZE, stream[0])); @@ -158,7 +158,7 @@ int main(int argc, char** argv) { HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); HIPCHECK(hipDeviceEnablePeerAccess(device_id[0], 0)); HIPCHECK(hipGetDeviceProperties(&prop[1], device_id[1])); - HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, prop[1].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipHostMalloc((void**)&time_delta[1], sizeof(uint64_t), hipHostMallocDefault)); HIPCHECK(hipMalloc((void**)&abortFlag[1], sizeof(uint32_t))); HIPCHECK(hipMemsetAsync(flag[1], 0, HIP_IPC_MEM_MIN_SIZE, stream[1])); @@ -174,11 +174,11 @@ int main(int argc, char** argv) { double vega_gpu_rtc_freq; HIPCHECK(hipStreamSynchronize(stream[0])); - vega_gpu_rtc_freq = (prop[0].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7; + vega_gpu_rtc_freq = strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7; fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[0]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2); HIPCHECK(hipStreamSynchronize(stream[1])); - vega_gpu_rtc_freq = (prop[1].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7; + vega_gpu_rtc_freq = strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7; fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[1]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2); HIPCHECK(hipFree(flag[0])); diff --git a/tools/p2p-latency-test/p2p_latency_test.cpp b/tools/p2p-latency-test/p2p_latency_test.cpp index 7f1e6bc58c..d92f995ed1 100644 --- a/tools/p2p-latency-test/p2p_latency_test.cpp +++ b/tools/p2p-latency-test/p2p_latency_test.cpp @@ -86,7 +86,7 @@ int main(int argc, char** argv) { HIPCHECK(hipStreamCreateWithFlags(&stream[0], hipStreamNonBlocking)); HIPCHECK(hipDeviceEnablePeerAccess(device_id[1], 0)); HIPCHECK(hipGetDeviceProperties(&prop[0], device_id[0])); - HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, prop[0].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipMalloc((void**)&time_delta[0], HIP_IPC_MEM_MIN_SIZE)); HIPCHECK(hipMemsetAsync(flag[0], 0, HIP_IPC_MEM_MIN_SIZE, stream[0])); HIPCHECK(hipStreamSynchronize(stream[0])); @@ -95,7 +95,7 @@ int main(int argc, char** argv) { HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); HIPCHECK(hipDeviceEnablePeerAccess(device_id[0], 0)); HIPCHECK(hipGetDeviceProperties(&prop[1], device_id[1])); - HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, prop[1].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipMalloc((void**)&time_delta[1], HIP_IPC_MEM_MIN_SIZE)); HIPCHECK(hipMemsetAsync(flag[1], 0, HIP_IPC_MEM_MIN_SIZE, stream[1])); HIPCHECK(hipStreamSynchronize(stream[1])); @@ -109,11 +109,11 @@ int main(int argc, char** argv) { double vega_gpu_rtc_freq; HIPCHECK(hipStreamSynchronize(stream[0])); - vega_gpu_rtc_freq = (prop[0].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7; + vega_gpu_rtc_freq = strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7; fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[0]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2); HIPCHECK(hipStreamSynchronize(stream[1])); - vega_gpu_rtc_freq = (prop[1].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7; + vega_gpu_rtc_freq = strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7; fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[1]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2); HIPCHECK(hipFree(flag[0])); diff --git a/tools/rccl-prim-test/rccl_prim_test.cpp b/tools/rccl-prim-test/rccl_prim_test.cpp index fca8b7db9f..05ea1912fc 100644 --- a/tools/rccl-prim-test/rccl_prim_test.cpp +++ b/tools/rccl-prim-test/rccl_prim_test.cpp @@ -436,7 +436,7 @@ int main(int argc,char* argv[]) if (nGpu == 8 && !cr8g) { hipDeviceProp_t prop; HIPCHECK(hipGetDeviceProperties(&prop, 0)); - if (prop.gcnArch/10 == 94) { + if (strncmp(prop.gcnArchName, "gfx94", 5) == 0) { r = (char *)ring_gfx940_8p; if(!workgroups) workgroups = 28; } else { @@ -521,11 +521,11 @@ int main(int argc,char* argv[]) profiling_data[i] = (struct profiling_data_t *)malloc(sizeof(struct profiling_data_t)*iters); HIPCHECK(hipMalloc((void**) &d_profiling_data[i], sizeof(struct profiling_data_t)*iters)); - HIPCHECK(hipExtMallocWithFlags((void**) &transfer_data[i], sizeof(struct transfer_data_t), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**) &transfer_data[i], sizeof(struct transfer_data_t), strncmp(prop.gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); for (int j = 0; j < workgroups; j++) { - HIPCHECK(hipExtMallocWithFlags((void**) &buff[i*MAX_WORKGROUPS+j], 2*N*sizeof(float), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**) &buff[i*MAX_WORKGROUPS+j], 2*N*sizeof(float), strncmp(prop.gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); // additional fine grained buffer for local doublecopy, only need 1 buffer (not used by remote) - HIPCHECK(hipExtMallocWithFlags((void**) &buff_fine[i*MAX_WORKGROUPS+j], N*sizeof(float), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); + HIPCHECK(hipExtMallocWithFlags((void**) &buff_fine[i*MAX_WORKGROUPS+j], N*sizeof(float), strncmp(prop.gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipMalloc((void**) &buff_coarse[i*MAX_WORKGROUPS+j], 2*N*sizeof(float))); //randomize test data hipLaunchKernelGGL(initTestDataKernel, @@ -670,7 +670,7 @@ int main(int argc,char* argv[]) hipDeviceProp_t prop; HIPCHECK(hipGetDeviceProperties(&prop, i)); double vega_gpu_rtc_freq, bw_std_dev = 0, mean_write_cycle = 0; - if (prop.gcnArch/10 == 94) + if (strncmp(prop.gcnArchName, "gfx94", 5) == 0) vega_gpu_rtc_freq = 1.0E8; else vega_gpu_rtc_freq = 2.5E7;