From 76eb4b7b1e20b5de7bb7adfe8a2e78e28cb728e5 Mon Sep 17 00:00:00 2001 From: DURGESH KROTTAPALLI Date: Mon, 11 Jan 2021 21:10:55 +0530 Subject: [PATCH] [dtest] Kernel Execution time fix for hipExtLaunchKernelGGL and hipExtModuleLaunchKernel API's Fix for kernel execution time using clock rate SWDEV-238517 for enhancing hip unit tests Change-Id: Id06df31387ecab96f219e0c1fbcdda1609f75605 [ROCm/hip commit: 0b871a24f6b0ad4fbe63a465f0432a7a9935db45] --- .../module/hipExtLaunchKernelGGL.cpp | 37 ++++++++++++------- .../module/hipExtModuleLaunchKernel.cpp | 7 ++++ .../tests/src/runtimeApi/module/matmul.cpp | 27 +++++++------- 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/projects/hip/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp b/projects/hip/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp index efd102ae9b..d370670537 100755 --- a/projects/hip/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipExtLaunchKernelGGL.cpp @@ -38,25 +38,25 @@ #define TWOSEC_KERNEL 2999 __device__ int globalvar = 1; -__global__ void TwoSecKernel() { +__global__ void TwoSecKernel(int clockrate) { if (globalvar == 0x2222) { globalvar = 0x3333; } - uint64_t wait_t = 4000000000, - start = clock64(), cur; - do { cur = clock64()-start;}while (cur < wait_t); + uint64_t wait_t = 2000, + start = clock64()/clockrate, cur; + do { cur = (clock64()/clockrate)-start;}while (cur < wait_t); if (globalvar != 0x3333) { globalvar = 0x5555; } } -__global__ void FourSecKernel() { +__global__ void FourSecKernel(int clockrate) { if (globalvar == 1) { globalvar = 0x2222; } - uint64_t wait_t = 8000000000, - start = clock64(), cur; - do { cur = clock64()-start;}while (cur < wait_t); + uint64_t wait_t = 4000, + start = clock64()/clockrate, cur; + do { cur = (clock64()/clockrate)-start;}while (cur < wait_t); if (globalvar == 0x2222) { globalvar = 0x4444; } @@ -71,16 +71,19 @@ __global__ void FourSecKernel() { bool DisableTimeFlag() { bool testStatus = true; hipStream_t stream1; + HIPCHECK(hipSetDevice(0)); hipError_t e; float time_4sec, time_2sec; hipEvent_t start_event1, end_event1; + int clkRate = 0; + HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); HIPCHECK(hipEventCreateWithFlags(&start_event1, hipEventDisableTiming)); HIPCHECK(hipEventCreateWithFlags(&end_event1, hipEventDisableTiming)); HIPCHECK(hipStreamCreate(&stream1)); hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0, - stream1, start_event1, end_event1, 0); + stream1, start_event1, end_event1, 0, clkRate); HIPCHECK(hipStreamSynchronize(stream1)); e = hipEventElapsedTime(&time_2sec, start_event1, end_event1); if (e == hipErrorInvalidHandle) { @@ -108,12 +111,14 @@ bool ConcurencyCheck_GlobalVar(int conc_flag) { bool testStatus = true; hipStream_t stream1; int deviceGlobal_h = 0; - + HIPCHECK(hipSetDevice(0)); + int clkRate = 0; + HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); HIPCHECK(hipStreamCreate(&stream1)); hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0, - stream1, nullptr, nullptr, conc_flag); + stream1, nullptr, nullptr, conc_flag, clkRate); hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0, - stream1, nullptr, nullptr, conc_flag); + stream1, nullptr, nullptr, conc_flag, clkRate); HIPCHECK(hipStreamSynchronize(stream1)); HIPCHECK(hipMemcpyFromSymbol(&deviceGlobal_h, globalvar, sizeof(int))); @@ -123,6 +128,7 @@ bool ConcurencyCheck_GlobalVar(int conc_flag) { } else if (!conc_flag && deviceGlobal_h == 0x5555) { testStatus = true; } else { + printf("Concurrency check failed when conc_flag is %d ", conc_flag); testStatus = false; } HIPCHECK(hipStreamDestroy(stream1)); @@ -138,8 +144,11 @@ bool KernelTimeExecution() { bool testStatus = true; hipStream_t stream1; hipError_t e; + HIPCHECK(hipSetDevice(0)); hipEvent_t start_event1, end_event1, start_event2, end_event2; float time_4sec, time_2sec; + int clkRate = 0; + HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); HIPCHECK(hipEventCreate(&start_event1)); HIPCHECK(hipEventCreate(&end_event1)); @@ -147,9 +156,9 @@ bool KernelTimeExecution() { HIPCHECK(hipEventCreate(&end_event2)); HIPCHECK(hipStreamCreate(&stream1)); hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0, - stream1, start_event1, end_event1, 0); + stream1, start_event1, end_event1, 0, clkRate); hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0, - stream1, start_event2, end_event2, 0); + stream1, start_event2, end_event2, 0, clkRate); HIPCHECK(hipStreamSynchronize(stream1)); e = hipEventElapsedTime(&time_4sec, start_event1, end_event1); e = hipEventElapsedTime(&time_2sec, start_event2, end_event2); diff --git a/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp b/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp index 96e3cd7635..55cf0fba57 100755 --- a/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp +++ b/projects/hip/tests/src/runtimeApi/module/hipExtModuleLaunchKernel.cpp @@ -63,6 +63,7 @@ class ModuleLaunchKernel { hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel, TwoSecKernel, KernelandExtraParamKernel; struct { + int clockRate; void* _Ad; void* _Bd; void* _Cd; @@ -101,14 +102,18 @@ void ModuleLaunchKernel::AllocateMemory() { HIPCHECK(hipHostMalloc(reinterpret_cast(&C), SIZE*sizeof(int))); HIPCHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice)); HIPCHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice)); + int clkRate = 0; + HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); args1._Ad = Ad; args1._Bd = Bd; args1._Cd = C; args1._n = N; + args1.clockRate = clkRate; args2._Ad = NULL; args2._Bd = NULL; args2._Cd = NULL; args2._n = 0; + args2.clockRate = clkRate; size1 = sizeof(args1); size2 = sizeof(args2); HIPCHECK(hipEventCreate(&start_event1)); @@ -156,10 +161,12 @@ void ModuleLaunchKernel::DeAllocateMemory() { */ bool ModuleLaunchKernel::ExtModule_KernelExecutionTime() { bool testStatus = true; + HIPCHECK(hipSetDevice(0)); AllocateMemory(); ModuleLoad(); hipError_t e; float time_4sec, time_2sec; + void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2, HIP_LAUNCH_PARAM_END}; diff --git a/projects/hip/tests/src/runtimeApi/module/matmul.cpp b/projects/hip/tests/src/runtimeApi/module/matmul.cpp index f968a0d852..2d1641443e 100755 --- a/projects/hip/tests/src/runtimeApi/module/matmul.cpp +++ b/projects/hip/tests/src/runtimeApi/module/matmul.cpp @@ -20,7 +20,8 @@ THE SOFTWARE. #include"hip/hip_runtime.h" __device__ int deviceGlobal = 1; -extern "C" __global__ void matmulK(int* A, int* B, int* C, int N) { +extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C, + int N) { int ROW = blockIdx.y*blockDim.y+threadIdx.y; int COL = blockIdx.x*blockDim.x+threadIdx.x; int tmpSum = 0; @@ -48,31 +49,31 @@ extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C, D[ROW * N + COL] = tmpSum; } -extern "C" __global__ void SixteenSecKernel() { - uint64_t wait_t = 32000000000, - start = clock64(), cur; - do { cur = clock64()-start;}while (cur < wait_t); +extern "C" __global__ void SixteenSecKernel(int clockrate) { + uint64_t wait_t = 16000, + start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < wait_t); } -extern "C" __global__ void TwoSecKernel() { +extern "C" __global__ void TwoSecKernel(int clockrate) { if (deviceGlobal == 0x2222) { deviceGlobal = 0x3333; } - uint64_t wait_t = 4000000000, - start = clock64(), cur; - do { cur = clock64()-start;}while (cur < wait_t); + uint64_t wait_t = 2000, + start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < wait_t); if (deviceGlobal != 0x3333) { deviceGlobal = 0x5555; } } -extern "C" __global__ void FourSecKernel() { +extern "C" __global__ void FourSecKernel(int clockrate) { if (deviceGlobal == 1) { deviceGlobal = 0x2222; } - uint64_t wait_t = 8000000000, - start = clock64(), cur; - do { cur = clock64()-start;}while (cur < wait_t); + uint64_t wait_t = 4000, + start = clock64()/clockrate, cur; + do { cur = clock64()/clockrate-start;}while (cur < wait_t); if (deviceGlobal == 0x2222) { deviceGlobal = 0x4444; }