p2p-latency-tests: fix build by switching to gcnArchName (#1030)
* p2p-latency-tests: fix build by switching to gcnArchName * rccl-prim-test: switch to gcnArchName
This commit is contained in:
@@ -147,7 +147,7 @@ int main(int argc, char** argv) {
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream[0], hipStreamNonBlocking));
|
||||
HIPCHECK(hipDeviceEnablePeerAccess(device_id[1], 0));
|
||||
HIPCHECK(hipGetDeviceProperties(&prop[0], device_id[0]));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, prop[0].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipHostMalloc ((void**)&time_delta[0], sizeof(uint64_t), hipHostMallocDefault));
|
||||
HIPCHECK(hipMalloc((void**)&abortFlag[0], sizeof(uint32_t)));
|
||||
HIPCHECK(hipMemsetAsync(flag[0], 0, HIP_IPC_MEM_MIN_SIZE, stream[0]));
|
||||
@@ -158,7 +158,7 @@ int main(int argc, char** argv) {
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||
HIPCHECK(hipDeviceEnablePeerAccess(device_id[0], 0));
|
||||
HIPCHECK(hipGetDeviceProperties(&prop[1], device_id[1]));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, prop[1].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipHostMalloc((void**)&time_delta[1], sizeof(uint64_t), hipHostMallocDefault));
|
||||
HIPCHECK(hipMalloc((void**)&abortFlag[1], sizeof(uint32_t)));
|
||||
HIPCHECK(hipMemsetAsync(flag[1], 0, HIP_IPC_MEM_MIN_SIZE, stream[1]));
|
||||
@@ -174,11 +174,11 @@ int main(int argc, char** argv) {
|
||||
double vega_gpu_rtc_freq;
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(stream[0]));
|
||||
vega_gpu_rtc_freq = (prop[0].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7;
|
||||
vega_gpu_rtc_freq = strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7;
|
||||
fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[0]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2);
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(stream[1]));
|
||||
vega_gpu_rtc_freq = (prop[1].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7;
|
||||
vega_gpu_rtc_freq = strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7;
|
||||
fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[1]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2);
|
||||
|
||||
HIPCHECK(hipFree(flag[0]));
|
||||
|
||||
@@ -86,7 +86,7 @@ int main(int argc, char** argv) {
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream[0], hipStreamNonBlocking));
|
||||
HIPCHECK(hipDeviceEnablePeerAccess(device_id[1], 0));
|
||||
HIPCHECK(hipGetDeviceProperties(&prop[0], device_id[0]));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, prop[0].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipMalloc((void**)&time_delta[0], HIP_IPC_MEM_MIN_SIZE));
|
||||
HIPCHECK(hipMemsetAsync(flag[0], 0, HIP_IPC_MEM_MIN_SIZE, stream[0]));
|
||||
HIPCHECK(hipStreamSynchronize(stream[0]));
|
||||
@@ -95,7 +95,7 @@ int main(int argc, char** argv) {
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||
HIPCHECK(hipDeviceEnablePeerAccess(device_id[0], 0));
|
||||
HIPCHECK(hipGetDeviceProperties(&prop[1], device_id[1]));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, prop[1].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipMalloc((void**)&time_delta[1], HIP_IPC_MEM_MIN_SIZE));
|
||||
HIPCHECK(hipMemsetAsync(flag[1], 0, HIP_IPC_MEM_MIN_SIZE, stream[1]));
|
||||
HIPCHECK(hipStreamSynchronize(stream[1]));
|
||||
@@ -109,11 +109,11 @@ int main(int argc, char** argv) {
|
||||
double vega_gpu_rtc_freq;
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(stream[0]));
|
||||
vega_gpu_rtc_freq = (prop[0].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7;
|
||||
vega_gpu_rtc_freq = strncmp(prop[0].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7;
|
||||
fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[0]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2);
|
||||
|
||||
HIPCHECK(hipStreamSynchronize(stream[1]));
|
||||
vega_gpu_rtc_freq = (prop[1].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7;
|
||||
vega_gpu_rtc_freq = strncmp(prop[1].gcnArchName, "gfx94", 5) == 0 ? 1.0E8 : 2.5E7;
|
||||
fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[1]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2);
|
||||
|
||||
HIPCHECK(hipFree(flag[0]));
|
||||
|
||||
@@ -436,7 +436,7 @@ int main(int argc,char* argv[])
|
||||
if (nGpu == 8 && !cr8g) {
|
||||
hipDeviceProp_t prop;
|
||||
HIPCHECK(hipGetDeviceProperties(&prop, 0));
|
||||
if (prop.gcnArch/10 == 94) {
|
||||
if (strncmp(prop.gcnArchName, "gfx94", 5) == 0) {
|
||||
r = (char *)ring_gfx940_8p;
|
||||
if(!workgroups) workgroups = 28;
|
||||
} else {
|
||||
@@ -521,11 +521,11 @@ int main(int argc,char* argv[])
|
||||
profiling_data[i] = (struct profiling_data_t *)malloc(sizeof(struct profiling_data_t)*iters);
|
||||
HIPCHECK(hipMalloc((void**) &d_profiling_data[i], sizeof(struct profiling_data_t)*iters));
|
||||
|
||||
HIPCHECK(hipExtMallocWithFlags((void**) &transfer_data[i], sizeof(struct transfer_data_t), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**) &transfer_data[i], sizeof(struct transfer_data_t), strncmp(prop.gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
for (int j = 0; j < workgroups; j++) {
|
||||
HIPCHECK(hipExtMallocWithFlags((void**) &buff[i*MAX_WORKGROUPS+j], 2*N*sizeof(float), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**) &buff[i*MAX_WORKGROUPS+j], 2*N*sizeof(float), strncmp(prop.gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
// additional fine grained buffer for local doublecopy, only need 1 buffer (not used by remote)
|
||||
HIPCHECK(hipExtMallocWithFlags((void**) &buff_fine[i*MAX_WORKGROUPS+j], N*sizeof(float), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipExtMallocWithFlags((void**) &buff_fine[i*MAX_WORKGROUPS+j], N*sizeof(float), strncmp(prop.gcnArchName, "gfx94", 5) == 0 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained));
|
||||
HIPCHECK(hipMalloc((void**) &buff_coarse[i*MAX_WORKGROUPS+j], 2*N*sizeof(float)));
|
||||
//randomize test data
|
||||
hipLaunchKernelGGL(initTestDataKernel,
|
||||
@@ -670,7 +670,7 @@ int main(int argc,char* argv[])
|
||||
hipDeviceProp_t prop;
|
||||
HIPCHECK(hipGetDeviceProperties(&prop, i));
|
||||
double vega_gpu_rtc_freq, bw_std_dev = 0, mean_write_cycle = 0;
|
||||
if (prop.gcnArch/10 == 94)
|
||||
if (strncmp(prop.gcnArchName, "gfx94", 5) == 0)
|
||||
vega_gpu_rtc_freq = 1.0E8;
|
||||
else
|
||||
vega_gpu_rtc_freq = 2.5E7;
|
||||
|
||||
Reference in New Issue
Block a user