[dtest] Kernel Execution time fix for hipExtLaunchKernelGGL and hipExtModuleLaunchKernel API's
Fix for kernel execution time using clock rate
SWDEV-238517 for enhancing hip unit tests
Change-Id: Id06df31387ecab96f219e0c1fbcdda1609f75605
[ROCm/hip commit: 0b871a24f6]
Этот коммит содержится в:
коммит произвёл
Mohan Kumar Mithur
родитель
e7073fc17c
Коммит
76eb4b7b1e
@@ -38,25 +38,25 @@
|
||||
#define TWOSEC_KERNEL 2999
|
||||
|
||||
__device__ int globalvar = 1;
|
||||
__global__ void TwoSecKernel() {
|
||||
__global__ void TwoSecKernel(int clockrate) {
|
||||
if (globalvar == 0x2222) {
|
||||
globalvar = 0x3333;
|
||||
}
|
||||
uint64_t wait_t = 4000000000,
|
||||
start = clock64(), cur;
|
||||
do { cur = clock64()-start;}while (cur < wait_t);
|
||||
uint64_t wait_t = 2000,
|
||||
start = clock64()/clockrate, cur;
|
||||
do { cur = (clock64()/clockrate)-start;}while (cur < wait_t);
|
||||
if (globalvar != 0x3333) {
|
||||
globalvar = 0x5555;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void FourSecKernel() {
|
||||
__global__ void FourSecKernel(int clockrate) {
|
||||
if (globalvar == 1) {
|
||||
globalvar = 0x2222;
|
||||
}
|
||||
uint64_t wait_t = 8000000000,
|
||||
start = clock64(), cur;
|
||||
do { cur = clock64()-start;}while (cur < wait_t);
|
||||
uint64_t wait_t = 4000,
|
||||
start = clock64()/clockrate, cur;
|
||||
do { cur = (clock64()/clockrate)-start;}while (cur < wait_t);
|
||||
if (globalvar == 0x2222) {
|
||||
globalvar = 0x4444;
|
||||
}
|
||||
@@ -71,16 +71,19 @@ __global__ void FourSecKernel() {
|
||||
bool DisableTimeFlag() {
|
||||
bool testStatus = true;
|
||||
hipStream_t stream1;
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
hipError_t e;
|
||||
float time_4sec, time_2sec;
|
||||
hipEvent_t start_event1, end_event1;
|
||||
int clkRate = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
HIPCHECK(hipEventCreateWithFlags(&start_event1,
|
||||
hipEventDisableTiming));
|
||||
HIPCHECK(hipEventCreateWithFlags(&end_event1,
|
||||
hipEventDisableTiming));
|
||||
HIPCHECK(hipStreamCreate(&stream1));
|
||||
hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0,
|
||||
stream1, start_event1, end_event1, 0);
|
||||
stream1, start_event1, end_event1, 0, clkRate);
|
||||
HIPCHECK(hipStreamSynchronize(stream1));
|
||||
e = hipEventElapsedTime(&time_2sec, start_event1, end_event1);
|
||||
if (e == hipErrorInvalidHandle) {
|
||||
@@ -108,12 +111,14 @@ bool ConcurencyCheck_GlobalVar(int conc_flag) {
|
||||
bool testStatus = true;
|
||||
hipStream_t stream1;
|
||||
int deviceGlobal_h = 0;
|
||||
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
int clkRate = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
HIPCHECK(hipStreamCreate(&stream1));
|
||||
hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0,
|
||||
stream1, nullptr, nullptr, conc_flag);
|
||||
stream1, nullptr, nullptr, conc_flag, clkRate);
|
||||
hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0,
|
||||
stream1, nullptr, nullptr, conc_flag);
|
||||
stream1, nullptr, nullptr, conc_flag, clkRate);
|
||||
HIPCHECK(hipStreamSynchronize(stream1));
|
||||
HIPCHECK(hipMemcpyFromSymbol(&deviceGlobal_h, globalvar,
|
||||
sizeof(int)));
|
||||
@@ -123,6 +128,7 @@ bool ConcurencyCheck_GlobalVar(int conc_flag) {
|
||||
} else if (!conc_flag && deviceGlobal_h == 0x5555) {
|
||||
testStatus = true;
|
||||
} else {
|
||||
printf("Concurrency check failed when conc_flag is %d ", conc_flag);
|
||||
testStatus = false;
|
||||
}
|
||||
HIPCHECK(hipStreamDestroy(stream1));
|
||||
@@ -138,8 +144,11 @@ bool KernelTimeExecution() {
|
||||
bool testStatus = true;
|
||||
hipStream_t stream1;
|
||||
hipError_t e;
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
hipEvent_t start_event1, end_event1, start_event2, end_event2;
|
||||
float time_4sec, time_2sec;
|
||||
int clkRate = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
|
||||
HIPCHECK(hipEventCreate(&start_event1));
|
||||
HIPCHECK(hipEventCreate(&end_event1));
|
||||
@@ -147,9 +156,9 @@ bool KernelTimeExecution() {
|
||||
HIPCHECK(hipEventCreate(&end_event2));
|
||||
HIPCHECK(hipStreamCreate(&stream1));
|
||||
hipExtLaunchKernelGGL((FourSecKernel), dim3(1), dim3(1), 0,
|
||||
stream1, start_event1, end_event1, 0);
|
||||
stream1, start_event1, end_event1, 0, clkRate);
|
||||
hipExtLaunchKernelGGL((TwoSecKernel), dim3(1), dim3(1), 0,
|
||||
stream1, start_event2, end_event2, 0);
|
||||
stream1, start_event2, end_event2, 0, clkRate);
|
||||
HIPCHECK(hipStreamSynchronize(stream1));
|
||||
e = hipEventElapsedTime(&time_4sec, start_event1, end_event1);
|
||||
e = hipEventElapsedTime(&time_2sec, start_event2, end_event2);
|
||||
|
||||
@@ -63,6 +63,7 @@ class ModuleLaunchKernel {
|
||||
hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel,
|
||||
TwoSecKernel, KernelandExtraParamKernel;
|
||||
struct {
|
||||
int clockRate;
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
void* _Cd;
|
||||
@@ -101,14 +102,18 @@ void ModuleLaunchKernel::AllocateMemory() {
|
||||
HIPCHECK(hipHostMalloc(reinterpret_cast<void**>(&C), SIZE*sizeof(int)));
|
||||
HIPCHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice));
|
||||
int clkRate = 0;
|
||||
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
args1._Ad = Ad;
|
||||
args1._Bd = Bd;
|
||||
args1._Cd = C;
|
||||
args1._n = N;
|
||||
args1.clockRate = clkRate;
|
||||
args2._Ad = NULL;
|
||||
args2._Bd = NULL;
|
||||
args2._Cd = NULL;
|
||||
args2._n = 0;
|
||||
args2.clockRate = clkRate;
|
||||
size1 = sizeof(args1);
|
||||
size2 = sizeof(args2);
|
||||
HIPCHECK(hipEventCreate(&start_event1));
|
||||
@@ -156,10 +161,12 @@ void ModuleLaunchKernel::DeAllocateMemory() {
|
||||
*/
|
||||
bool ModuleLaunchKernel::ExtModule_KernelExecutionTime() {
|
||||
bool testStatus = true;
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
hipError_t e;
|
||||
float time_4sec, time_2sec;
|
||||
|
||||
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
@@ -20,7 +20,8 @@ THE SOFTWARE.
|
||||
#include"hip/hip_runtime.h"
|
||||
__device__ int deviceGlobal = 1;
|
||||
|
||||
extern "C" __global__ void matmulK(int* A, int* B, int* C, int N) {
|
||||
extern "C" __global__ void matmulK(int clockrate, int* A, int* B, int* C,
|
||||
int N) {
|
||||
int ROW = blockIdx.y*blockDim.y+threadIdx.y;
|
||||
int COL = blockIdx.x*blockDim.x+threadIdx.x;
|
||||
int tmpSum = 0;
|
||||
@@ -48,31 +49,31 @@ extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C,
|
||||
D[ROW * N + COL] = tmpSum;
|
||||
}
|
||||
|
||||
extern "C" __global__ void SixteenSecKernel() {
|
||||
uint64_t wait_t = 32000000000,
|
||||
start = clock64(), cur;
|
||||
do { cur = clock64()-start;}while (cur < wait_t);
|
||||
extern "C" __global__ void SixteenSecKernel(int clockrate) {
|
||||
uint64_t wait_t = 16000,
|
||||
start = clock64()/clockrate, cur;
|
||||
do { cur = clock64()/clockrate-start;}while (cur < wait_t);
|
||||
}
|
||||
|
||||
extern "C" __global__ void TwoSecKernel() {
|
||||
extern "C" __global__ void TwoSecKernel(int clockrate) {
|
||||
if (deviceGlobal == 0x2222) {
|
||||
deviceGlobal = 0x3333;
|
||||
}
|
||||
uint64_t wait_t = 4000000000,
|
||||
start = clock64(), cur;
|
||||
do { cur = clock64()-start;}while (cur < wait_t);
|
||||
uint64_t wait_t = 2000,
|
||||
start = clock64()/clockrate, cur;
|
||||
do { cur = clock64()/clockrate-start;}while (cur < wait_t);
|
||||
if (deviceGlobal != 0x3333) {
|
||||
deviceGlobal = 0x5555;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" __global__ void FourSecKernel() {
|
||||
extern "C" __global__ void FourSecKernel(int clockrate) {
|
||||
if (deviceGlobal == 1) {
|
||||
deviceGlobal = 0x2222;
|
||||
}
|
||||
uint64_t wait_t = 8000000000,
|
||||
start = clock64(), cur;
|
||||
do { cur = clock64()-start;}while (cur < wait_t);
|
||||
uint64_t wait_t = 4000,
|
||||
start = clock64()/clockrate, cur;
|
||||
do { cur = clock64()/clockrate-start;}while (cur < wait_t);
|
||||
if (deviceGlobal == 0x2222) {
|
||||
deviceGlobal = 0x4444;
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user