From 87c8a0417c6f4ef2c7d905b71eb42e02a0c4ef4a Mon Sep 17 00:00:00 2001 From: Jaydeep Patel Date: Tue, 27 Aug 2024 10:33:45 +0000 Subject: [PATCH] SWDEV-481514, SWDEV-482400 - Use correct way to add delay/wait on GPU. Change-Id: I163896333bf741053173d636a499ed6406e77a09 --- catch/unit/module/hipExtModuleLaunchKernel.cc | 5 +++ catch/unit/module/matmul.cpp | 40 +++++++++---------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/catch/unit/module/hipExtModuleLaunchKernel.cc b/catch/unit/module/hipExtModuleLaunchKernel.cc index 4a050a31d9..d5c2f88a4f 100644 --- a/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -346,7 +346,12 @@ void ModuleLaunchKernel::AllocateMemory() { HIP_CHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice)); HIP_CHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice)); int clkRate = 0; + #if HT_AMD + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0)); + #endif + #if HT_NVIDIA HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + #endif args1._Ad = Ad; args1._Bd = Bd; args1._Cd = C; diff --git a/catch/unit/module/matmul.cpp b/catch/unit/module/matmul.cpp index e2931549ad..02993ac725 100644 --- a/catch/unit/module/matmul.cpp +++ b/catch/unit/module/matmul.cpp @@ -48,34 +48,32 @@ extern "C" __global__ void KernelandExtraParams(int* A, int* B, int* C, D[ROW * N + COL] = tmpSum; } +__device__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { + while (interval--) { + #if HT_AMD + uint64_t start = wall_clock64(); + while (wall_clock64() - start < ticks_per_ms) { + __builtin_amdgcn_s_sleep(10); + } + #endif + #if HT_NVIDIA + uint64_t start = clock64(); + while (clock64() - start < ticks_per_ms) { + } + #endif + } +} + extern "C" __global__ void SixteenSecKernel(int clockrate) { - uint64_t wait_t = 16000, - start = clock64()/clockrate, cur; - do { cur = clock64()/clockrate-start;}while (cur < wait_t); + Delay(16000, clockrate); } extern "C" __global__ void TwoSecKernel(int clockrate) { - if (deviceGlobal == 0x2222) { - deviceGlobal = 0x3333; - } - uint64_t wait_t = 2000, - start = clock64()/clockrate, cur; - do { cur = clock64()/clockrate-start;}while (cur < wait_t); - if (deviceGlobal != 0x3333) { - deviceGlobal = 0x5555; - } + Delay(2000, clockrate); } extern "C" __global__ void FourSecKernel(int clockrate) { - if (deviceGlobal == 1) { - deviceGlobal = 0x2222; - } - uint64_t wait_t = 4000, - start = clock64()/clockrate, cur; - do { cur = clock64()/clockrate-start;}while (cur < wait_t); - if (deviceGlobal == 0x2222) { - deviceGlobal = 0x4444; - } + Delay(4000, clockrate); } extern "C" __global__ void dummyKernel() {