SWDEV-444265 - No need to add delay.

As sync between stream is via event record, There
is no really need of having delay in GPU.

Change-Id: Iee835752395fff5bf380535643835f6da0a84b80


[ROCm/hip-tests commit: 598fa2e8a5]
Este commit está contenido en:
Jaydeep Patel
2024-02-05 06:30:21 +00:00
cometido por Sai-poorna Pasham
padre b60e8dca7c
commit 49a7085d38
@@ -54,24 +54,9 @@ Testcase Scenarios :
/**
* Kernel Functions to perform square and introduce delay in device.
*/
static __global__ void sqr_ker_func(int* a, int* b, size_t N, int clockrate, size_t delayMs) {
static __global__ void sqr_ker_func(int* a, int* b, size_t N) {
int tx = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if (tx < N) b[tx] = a[tx] * a[tx];
uint64_t wait_t = delayMs, start = clock64() / clockrate, cur;
do {
cur = clock64() / clockrate - start;
} while (cur < wait_t);
}
static __global__ void sqr_ker_func_gfx11(int* a, int* b, size_t N, int clockrate, size_t delayMs) {
#if HT_AMD
int tx = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if (tx < N) b[tx] = a[tx] * a[tx];
uint64_t wait_t = delayMs, start = wall_clock64() / clockrate, cur;
do {
cur = wall_clock64() / clockrate - start;
} while (cur < wait_t);
#endif
}
/**
@@ -109,24 +94,13 @@ TEST_CASE("Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") {
// graph1 creation ...........
// Create memcpy and kernel nodes for graph1
// MemcpyH2D -> kernel1 -> event_rec
size_t NElem{N};
hipGraphNode_t memcpyH2D, kernelnode1;
hipKernelNodeParams kernelNodeParams1{};
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph1, nullptr, 0, inp_d, inp_h, memsize,
hipMemcpyHostToDevice));
// Get device clock rate
int clkRate = 0;
size_t NElem{N};
size_t delayMs{2000};
if (IsGfx11()) {
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeWallClockRate, 0));
} else {
HIPCHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
}
// kernel1
auto sqr_ker_func_used = IsGfx11() ? sqr_ker_func_gfx11 : sqr_ker_func;
void* kernelArgs[] = {&inp_d, &out_d, reinterpret_cast<void*>(&NElem),
reinterpret_cast<void*>(&clkRate), reinterpret_cast<void*>(&delayMs)};
kernelNodeParams1.func = reinterpret_cast<void*>(sqr_ker_func_used);
void* kernelArgs[] = {&inp_d, &out_d, reinterpret_cast<void*>(&NElem)};
kernelNodeParams1.func = reinterpret_cast<void*>(sqr_ker_func);
kernelNodeParams1.gridDim = dim3(gridSize);
kernelNodeParams1.blockDim = dim3(blockSize);
kernelNodeParams1.sharedMemBytes = 0;