[dtest] Kernel Execution time fix for hipExtLaunchKernelGGL and hipExtModuleLaunchKernel API's

Fix for kernel execution time using clock rate

SWDEV-238517 for enhancing hip unit tests

Change-Id: Id06df31387ecab96f219e0c1fbcdda1609f75605


[ROCm/hip commit: 0b871a24f6]
Этот коммит содержится в:
DURGESH KROTTAPALLI
2021-01-11 21:10:55 +05:30
коммит произвёл Mohan Kumar Mithur
родитель e7073fc17c
Коммит 76eb4b7b1e
3 изменённых файлов: 44 добавлений и 27 удалений
+23 -14
Просмотреть файл
@@ -38,25 +38,25 @@
#define TWOSEC_KERNEL 2999
__device__ int globalvar = 1;
__global__ void TwoSecKernel() {
__global__ void TwoSecKernel(int clockrate) {
if (globalvar == 0x2222) {
globalvar = 0x3333;
}
uint64_t wait_t = 4000000000,
start = clock64(), cur;
do { cur = clock64()-start;}while (cur < wait_t);
uint64_t wait_t = 2000,
start = clock64()/clockrate, cur;
do { cur = (clock64()/clockrate)-start;}while (cur < wait_t);
if (globalvar != 0x3333) {
globalvar = 0x5555;
}
}
__global__ void FourSecKernel() {
__global__ void FourSecKernel(int clockrate) {
if (globalvar == 1) {
globalvar = 0x2222;
}
uint64_t wait_t = 8000000000,
start = clock64(), cur;
do { cur = clock64()-start;}while (cur < wait_t);
uint64_t wait_t = 4000,
start = clock64()/clockrate, cur;
do { cur = (clock64()/clockrate)-start;}while (cur < wait_t);
if (globalvar == 0x2222) {
globalvar = 0x4444;
}
@@ -71,16 +71,19 @@ __global__ void FourSecKernel() {
bool DisableTimeFlag() {
bool testStatus = true;
hipStream_t stream1;
HIPCHECK(hipSetDevice(0));
hipError_t e;
float time_4sec, time_2sec;
hipEvent_t start_event1, end_event1;
int clkRate = 0;
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
HIPCHECK(hipEventCreateWithFlags(&start_event1,
hipEventDisableTiming));
HIPCHECK(hipEventCreateWithFlags(&end_event1,
hipEventDisableTiming));
HIPCHECK(hipStreamCreate(&stream1));
hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0,
stream1, start_event1, end_event1, 0);
stream1, start_event1, end_event1, 0, clkRate);
HIPCHECK(hipStreamSynchronize(stream1));
e = hipEventElapsedTime(&time_2sec, start_event1, end_event1);
if (e == hipErrorInvalidHandle) {
@@ -108,12 +111,14 @@ bool ConcurencyCheck_GlobalVar(int conc_flag) {
bool testStatus = true;
hipStream_t stream1;
int deviceGlobal_h = 0;
HIPCHECK(hipSetDevice(0));
int clkRate = 0;
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
HIPCHECK(hipStreamCreate(&stream1));
hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0,
stream1, nullptr, nullptr, conc_flag);
stream1, nullptr, nullptr, conc_flag, clkRate);
hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0,
stream1, nullptr, nullptr, conc_flag);
stream1, nullptr, nullptr, conc_flag, clkRate);
HIPCHECK(hipStreamSynchronize(stream1));
HIPCHECK(hipMemcpyFromSymbol(&deviceGlobal_h, globalvar,
sizeof(int)));
@@ -123,6 +128,7 @@ bool ConcurencyCheck_GlobalVar(int conc_flag) {
} else if (!conc_flag && deviceGlobal_h == 0x5555) {
testStatus = true;
} else {
printf("Concurrency check failed when conc_flag is %d ", conc_flag);
testStatus = false;
}
HIPCHECK(hipStreamDestroy(stream1));
@@ -138,8 +144,11 @@ bool KernelTimeExecution() {
bool testStatus = true;
hipStream_t stream1;
hipError_t e;
HIPCHECK(hipSetDevice(0));
hipEvent_t start_event1, end_event1, start_event2, end_event2;
float time_4sec, time_2sec;
int clkRate = 0;
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
HIPCHECK(hipEventCreate(&start_event1));
HIPCHECK(hipEventCreate(&end_event1));
@@ -147,9 +156,9 @@ bool KernelTimeExecution() {
HIPCHECK(hipEventCreate(&end_event2));
HIPCHECK(hipStreamCreate(&stream1));
hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0,
stream1, start_event1, end_event1, 0);
stream1, start_event1, end_event1, 0, clkRate);
hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0,
stream1, start_event2, end_event2, 0);
stream1, start_event2, end_event2, 0, clkRate);
HIPCHECK(hipStreamSynchronize(stream1));
e = hipEventElapsedTime(&time_4sec, start_event1, end_event1);
e = hipEventElapsedTime(&time_2sec, start_event2, end_event2);
+7
Просмотреть файл
@@ -63,6 +63,7 @@ class ModuleLaunchKernel {
hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel,
TwoSecKernel, KernelandExtraParamKernel;
struct {
int clockRate;
void* _Ad;
void* _Bd;
void* _Cd;
@@ -101,14 +102,18 @@ void ModuleLaunchKernel::AllocateMemory() {
HIPCHECK(hipHostMalloc(reinterpret_cast<void**>(&C), SIZE*sizeof(int)));
HIPCHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice));
HIPCHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice));
int clkRate = 0;
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
args1._Ad = Ad;
args1._Bd = Bd;
args1._Cd = C;
args1._n = N;
args1.clockRate = clkRate;
args2._Ad = NULL;
args2._Bd = NULL;
args2._Cd = NULL;
args2._n = 0;
args2.clockRate = clkRate;
size1 = sizeof(args1);
size2 = sizeof(args2);
HIPCHECK(hipEventCreate(&start_event1));
@@ -156,10 +161,12 @@ void ModuleLaunchKernel::DeAllocateMemory() {
*/
bool ModuleLaunchKernel::ExtModule_KernelExecutionTime() {
bool testStatus = true;
HIPCHECK(hipSetDevice(0));
AllocateMemory();
ModuleLoad();
hipError_t e;
float time_4sec, time_2sec;
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
HIP_LAUNCH_PARAM_END};
+14 -13
Просмотреть файл
@@ -20,7 +20,8 @@ THE SOFTWARE.
#include"hip/hip_runtime.h"
__device__ int deviceGlobal = 1;
extern "C" __global__ void matmulK(int* A, int* B, int* C, int N) {
extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C,
int N) {
int ROW = blockIdx.y*blockDim.y+threadIdx.y;
int COL = blockIdx.x*blockDim.x+threadIdx.x;
int tmpSum = 0;
@@ -48,31 +49,31 @@ extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C,
D[ROW * N + COL] = tmpSum;
}
extern "C" __global__ void SixteenSecKernel() {
uint64_t wait_t = 32000000000,
start = clock64(), cur;
do { cur = clock64()-start;}while (cur < wait_t);
extern "C" __global__ void SixteenSecKernel(int clockrate) {
uint64_t wait_t = 16000,
start = clock64()/clockrate, cur;
do { cur = clock64()/clockrate-start;}while (cur < wait_t);
}
extern "C" __global__ void TwoSecKernel() {
extern "C" __global__ void TwoSecKernel(int clockrate) {
if (deviceGlobal == 0x2222) {
deviceGlobal = 0x3333;
}
uint64_t wait_t = 4000000000,
start = clock64(), cur;
do { cur = clock64()-start;}while (cur < wait_t);
uint64_t wait_t = 2000,
start = clock64()/clockrate, cur;
do { cur = clock64()/clockrate-start;}while (cur < wait_t);
if (deviceGlobal != 0x3333) {
deviceGlobal = 0x5555;
}
}
extern "C" __global__ void FourSecKernel() {
extern "C" __global__ void FourSecKernel(int clockrate) {
if (deviceGlobal == 1) {
deviceGlobal = 0x2222;
}
uint64_t wait_t = 8000000000,
start = clock64(), cur;
do { cur = clock64()-start;}while (cur < wait_t);
uint64_t wait_t = 4000,
start = clock64()/clockrate, cur;
do { cur = clock64()/clockrate-start;}while (cur < wait_t);
if (deviceGlobal == 0x2222) {
deviceGlobal = 0x4444;
}