SWDEV-314080 - Testing All hipMemsetxx() apis with hipStreamPerThread (#2494)
Change-Id: I452edb7d0047146b22500750bf529a59ab7f99d5
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
fe2bb1bf3e
Коммит
bdc8f76306
@@ -61,7 +61,6 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") {
|
||||
size_t elements = NUM_W * NUM_H;
|
||||
unsigned blocks{};
|
||||
int validateCount{};
|
||||
hipStream_t stream;
|
||||
|
||||
blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A,
|
||||
@@ -81,21 +80,42 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") {
|
||||
}
|
||||
HIP_CHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H,
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
SECTION("Using User created stream") {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
for (size_t k = 0; k < ITER; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, B_d, C_d, elements);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H,
|
||||
stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t p = 0 ; p < elements ; p++) {
|
||||
if (A_h[p] == memsetval) {
|
||||
validateCount+= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
SECTION("Using hipStreamPerThread stream") {
|
||||
for (size_t k = 0; k < ITER; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, hipStreamPerThread, B_d, C_d, elements);
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H,
|
||||
hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t k = 0; k < ITER; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, stream, B_d, C_d, elements);
|
||||
|
||||
HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t p = 0 ; p < elements ; p++) {
|
||||
if (A_h[p] == memsetval) {
|
||||
validateCount+= 1;
|
||||
for (size_t p = 0 ; p < elements ; p++) {
|
||||
if (A_h[p] == memsetval) {
|
||||
validateCount+= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -104,7 +124,6 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") {
|
||||
|
||||
HIP_CHECK(hipFree(A_d)); HIP_CHECK(hipFree(B_d)); HIP_CHECK(hipFree(C_d));
|
||||
free(A_h); free(B_h);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -184,11 +184,17 @@ static void testMemsetMaxValue(bool bAsync) {
|
||||
|
||||
HIP_CHECK(hipMalloc3D(&devPitchedPtr, extent));
|
||||
if (bAsync) {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
SECTION("Using user created stream") {
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
SECTION("Using hipStreamPerThread") {
|
||||
HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
}
|
||||
} else {
|
||||
HIP_CHECK(hipMemset3D(devPitchedPtr, memsetval, extent));
|
||||
}
|
||||
@@ -236,7 +242,7 @@ static void seekAndSet3DArraySlice(bool bAsync) {
|
||||
|
||||
// select random slice for memset
|
||||
unsigned int seed = time(nullptr);
|
||||
int slice_index = HipTest::RAND_R(&seed) % ZSIZE_S;
|
||||
int slice_index = rand_r(&seed) % ZSIZE_S;
|
||||
|
||||
INFO("memset3d for sliceindex " << slice_index);
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
* Test for checking order of execution of device kernel and
|
||||
* hipMemsetAsync apis on all gpus
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
@@ -83,16 +82,26 @@ class MemSetKernelTest {
|
||||
}
|
||||
};
|
||||
|
||||
static bool testhipMemsetAsyncWithKernel() {
|
||||
static bool testhipMemsetAsyncWithKernel(bool UseStrmPerThrd) {
|
||||
MemSetKernelTest<char> obj;
|
||||
constexpr char memsetval = 0x42;
|
||||
|
||||
obj.memAllocate(memsetval);
|
||||
for (int k = 0 ; k < ITER ; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
|
||||
HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal , N , obj.stream));
|
||||
HIP_CHECK(hipStreamSynchronize(obj.stream));
|
||||
for (int k = 0 ; k < ITER ; ++k) {
|
||||
if (UseStrmPerThrd) { // will use hipStreamPerThread stream object
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, hipStreamPerThread, obj.B_d,
|
||||
obj.C_d, N);
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal, N, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
} else {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d,
|
||||
N);
|
||||
HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal , N , obj.stream));
|
||||
HIP_CHECK(hipStreamSynchronize(obj.stream));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
obj.validateExecutionOrder();
|
||||
@@ -109,7 +118,7 @@ static bool testhipMemsetD32AsyncWithKernel() {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
|
||||
HIP_CHECK(hipMemsetD32Async((hipDeviceptr_t)obj.C_d , obj.memSetVal,
|
||||
N, obj.stream));
|
||||
N, obj.stream));
|
||||
HIP_CHECK(hipStreamSynchronize(obj.stream));
|
||||
HIP_CHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -161,7 +170,7 @@ static bool testhipMemsetD8AsyncWithKernel() {
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemsetAsync_VerifyExecutionWithKernel") {
|
||||
int numDevices = 0;
|
||||
bool ret;
|
||||
bool ret, UseStrmPerThrd = false;
|
||||
|
||||
blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
@@ -172,7 +181,13 @@ TEST_CASE("Unit_hipMemsetAsync_VerifyExecutionWithKernel") {
|
||||
HIP_CHECK(hipSetDevice(devNum));
|
||||
|
||||
SECTION("hipMemsetAsync With Kernel") {
|
||||
ret = testhipMemsetAsyncWithKernel();
|
||||
UseStrmPerThrd = false;
|
||||
ret = testhipMemsetAsyncWithKernel(UseStrmPerThrd);
|
||||
REQUIRE(ret == true);
|
||||
}
|
||||
SECTION("hipMemsetAsync With Kernel using hipStreamPerThread stream obj") {
|
||||
UseStrmPerThrd = true;
|
||||
ret = testhipMemsetAsyncWithKernel(UseStrmPerThrd);
|
||||
REQUIRE(ret == true);
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user