SWDEV-294470 - [dtest] Catch2 unit tests for hipMemset2d, hipMemset2d Mthread, hipMemset3d files. (#2347)

Change-Id: Ia503f9dd12b8c576dee17c3fcbb018eeac305a7e

Co-authored-by: Maneesh Gupta <maneesh.gupta@amd.com>

[ROCm/hip-tests commit: e3996d3b92]
This commit is contained in:
sumanthtg
2021-09-17 11:54:39 +05:30
committed by GitHub
parent 595ec1b4bb
commit 19857aae5f
5 changed files with 531 additions and 1 deletions
@@ -23,10 +23,46 @@ THE SOFTWARE.
#pragma once
#include "hip_test_common.hh"
#ifdef __linux__
#include <sys/sysinfo.h>
#endif
namespace HipTest {
static inline int getGeviceCount() {
int dev = 0;
HIPCHECK(hipGetDeviceCount(&dev));
HIP_CHECK(hipGetDeviceCount(&dev));
return dev;
}
// Get Free Memory from the system
static size_t getMemoryAmount() {
#ifdef __linux__
struct sysinfo info{};
sysinfo(&info);
return info.freeram / (1024 * 1024); // MB
#elif defined(_WIN32)
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
GlobalMemoryStatusEx(&statex);
return (statex.ullAvailPhys / (1024 * 1024)); // MB
#endif
}
static size_t getHostThreadCount(const size_t memPerThread,
const size_t maxThreads) {
if (memPerThread == 0) return 0;
auto memAmount = getMemoryAmount();
const auto processor_count = std::thread::hardware_concurrency();
if (processor_count == 0 || memAmount == 0) return 0;
size_t thread_count = 0;
if ((processor_count * memPerThread) < memAmount)
thread_count = processor_count;
else
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
if (maxThreads > 0) {
return (thread_count > maxThreads) ? maxThreads : thread_count;
}
return thread_count;
}
} // namespace HipTest
@@ -37,6 +37,9 @@ set(TEST_SRC
hipMemset.cc
hipMemsetAsyncMultiThread.cc
hipMemsetAsyncAndKernel.cc
hipMemset3D.cc
hipMemset2D.cc
hipMemset2DAsyncMultiThreadAndKernel.cc
)
else()
set(TEST_SRC
@@ -74,6 +77,9 @@ set(TEST_SRC
hipMemset.cc
hipMemsetAsyncMultiThread.cc
hipMemsetAsyncAndKernel.cc
hipMemset3D.cc
hipMemset2D.cc
hipMemset2DAsyncMultiThreadAndKernel.cc
)
endif()
# Create shared lib of all tests
@@ -0,0 +1,175 @@
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
Testcase Scenarios :
1) hipMemset2D api with basic functionality.
2) hipMemset2DAsync api with basic functionality.
3) hipMemset2D api with partial memset and unique width/height.
*/
#include <hip_test_common.hh>
// Table with unique width/height and memset values.
// (width2D, height2D, memsetWidth, memsetHeight)
typedef std::tuple<int, int, int, int> tupletype;
static constexpr std::initializer_list<tupletype> tableItems {
std::make_tuple(20, 20, 20, 20),
std::make_tuple(10, 10, 4, 4),
std::make_tuple(100, 100, 20, 40),
std::make_tuple(256, 256, 39, 19),
std::make_tuple(100, 100, 20, 0),
std::make_tuple(100, 100, 0, 20),
std::make_tuple(100, 100, 0, 0),
};
/**
* Basic Functionality of hipMemset2D
*/
TEST_CASE("Unit_hipMemset2D_BasicFunctional") {
constexpr int memsetval = 0x24;
constexpr size_t numH = 256;
constexpr size_t numW = 256;
size_t pitch_A;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH;
size_t elements = numW * numH;
char *A_d, *A_h;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
numH));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
for (size_t i = 0; i < elements; i++) {
A_h[i] = 1;
}
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
HIP_CHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
hipMemcpyDeviceToHost));
for (size_t i = 0; i < elements; i++) {
if (A_h[i] != memsetval) {
INFO("Memset2D mismatch at index:" << i << " computed:"
<< A_h[i] << " memsetval:" << memsetval);
REQUIRE(false);
}
}
hipFree(A_d);
free(A_h);
}
/**
* Basic Functionality of hipMemset2DAsync
*/
TEST_CASE("Unit_hipMemset2DAsync_BasicFunctional") {
constexpr int memsetval = 0x26;
constexpr size_t numH = 256;
constexpr size_t numW = 256;
size_t pitch_A;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH;
size_t elements = numW * numH;
char *A_d, *A_h;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A,
width, numH));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
for (size_t i = 0; i < elements; i++) {
A_h[i] = 1;
}
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
hipMemcpyDeviceToHost));
for (size_t i=0; i < elements; i++) {
if (A_h[i] != memsetval) {
INFO("Memset2DAsync mismatch at index:" << i << " computed:"
<< A_h[i] << " memsetval:" << memsetval);
REQUIRE(false);
}
}
hipFree(A_d);
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
}
/**
* Memset partial buffer with unique Width and Height
*/
TEST_CASE("Unit_hipMemset2D_UniqueWidthHeight") {
int width2D, height2D;
int memsetWidth, memsetHeight;
char *A_d, *A_h;
size_t pitch_A;
constexpr int memsetval = 0x26;
std::tie(width2D, height2D, memsetWidth, memsetHeight) =
GENERATE(table<int, int, int, int>(tableItems));
size_t width = width2D * sizeof(char);
size_t sizeElements = width * height2D;
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A,
width, height2D));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
for (size_t index = 0; index < sizeElements; index++) {
A_h[index] = 'c';
}
INFO("2D Dimension: Width:" << width2D << " Height:" << height2D <<
" MemsetWidth:" << memsetWidth << " MemsetHeight:" << memsetHeight);
HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, memsetWidth, memsetHeight));
HIP_CHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, width2D, height2D,
hipMemcpyDeviceToHost));
for (int row = 0; row < memsetHeight; row++) {
for (int column = 0; column < memsetWidth; column++) {
if (A_h[(row * width) + column] != memsetval) {
INFO("A_h[" << row << "][" << column << "]" <<
" didnot match " << memsetval);
REQUIRE(false);
}
}
}
hipFree(A_d);
free(A_h);
}
@@ -0,0 +1,185 @@
/*
* Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
Testcase Scenarios :
1) Order of execution of device kernel and hipMemset2DAsync api
2) hipMemSet2DAsync execution in multiple threads
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_helper.hh>
#include <hip_test_kernels.hh>
/* Defines */
#define NUM_THREADS 1000
#define ITER 100
#define NUM_H 256
#define NUM_W 256
void queueJobsForhipMemset2DAsync(char* A_d, char* A_h, size_t pitch,
size_t width, hipStream_t stream) {
constexpr int memsetval = 0x22;
HIPCHECK(hipMemset2DAsync(A_d, pitch, memsetval, NUM_W, NUM_H, stream));
HIPCHECK(hipMemcpy2DAsync(A_h, width, A_d, pitch, NUM_W, NUM_H,
hipMemcpyDeviceToHost, stream));
}
/**
* Order of execution of device kernel and hipMemset2DAsync api.
*/
TEST_CASE("Unit_hipMemset2DAsync_WithKernel") {
constexpr auto N = 4 * 1024 * 1024;
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
constexpr int memsetval = 0x22;
char *A_d, *A_h, *B_d, *B_h, *C_d;
size_t pitch_A, pitch_B, pitch_C;
size_t width = NUM_W * sizeof(char);
size_t sizeElements = width * NUM_H;
size_t elements = NUM_W * NUM_H;
unsigned blocks{};
int validateCount{};
hipStream_t stream;
blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A,
width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d), &pitch_B,
width, NUM_H));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
B_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(B_h != nullptr);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&C_d), &pitch_C,
width, NUM_H));
for (size_t i = 0; i < elements; i++) {
B_h[i] = i;
}
HIP_CHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H,
hipMemcpyHostToDevice));
HIP_CHECK(hipStreamCreate(&stream));
for (size_t k = 0; k < ITER; k++) {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, stream, B_d, C_d, elements);
HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H,
hipMemcpyDeviceToHost));
for (size_t p = 0 ; p < elements ; p++) {
if (A_h[p] == memsetval) {
validateCount+= 1;
}
}
}
REQUIRE(static_cast<size_t>(validateCount) == (ITER * elements));
HIP_CHECK(hipFree(A_d)); HIP_CHECK(hipFree(B_d)); HIP_CHECK(hipFree(C_d));
free(A_h); free(B_h);
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* hipMemSet2DAsync execution in multiple threads.
*/
TEST_CASE("Unit_hipMemset2DAsync_MultiThread") {
constexpr auto N = 4 * 1024 * 1024;
constexpr auto blocksPerCU = 6; // to hide latency
constexpr auto threadsPerBlock = 256;
constexpr auto memPerThread = 200;
constexpr int memsetval = 0x22;
char *A_d, *A_h, *B_d, *B_h, *C_d;
size_t pitch_A, pitch_B, pitch_C;
size_t width = NUM_W * sizeof(char);
size_t sizeElements = width * NUM_H;
size_t elements = NUM_W * NUM_H;
unsigned blocks{};
int validateCount{};
hipStream_t stream;
blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
auto thread_count = HipTest::getHostThreadCount(memPerThread, NUM_THREADS);
if (thread_count == 0) {
WARN("Resources not available for thread creation");
return;
}
std::thread *t = new std::thread[thread_count];
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A,
width, NUM_H));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&B_d), &pitch_B,
width, NUM_H));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
B_h = reinterpret_cast<char*>(malloc(sizeElements));
REQUIRE(B_h != nullptr);
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&C_d), &pitch_C,
width, NUM_H));
for (size_t i = 0 ; i < elements ; i++) {
B_h[i] = i;
}
HIP_CHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H,
hipMemcpyHostToDevice));
HIP_CHECK(hipStreamCreate(&stream));
for (int i = 0 ; i < ITER ; i++) {
for (size_t k = 0 ; k < thread_count; k++) {
if (k%2) {
t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, A_h, pitch_A,
width, stream);
} else {
t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, B_h, pitch_A,
width, stream);
}
}
for (size_t j = 0 ; j < thread_count; j++) {
t[j].join();
}
HIP_CHECK(hipStreamSynchronize(stream));
for (size_t k = 0 ; k < elements ; k++) {
if ((A_h[k] == memsetval) && (B_h[k] == memsetval)) {
validateCount+= 1;
}
}
}
REQUIRE(static_cast<size_t>(validateCount) == (ITER * elements));
HIP_CHECK(hipFree(A_d)); HIP_CHECK(hipFree(B_d)); HIP_CHECK(hipFree(C_d));
free(A_h); free(B_h);
HIP_CHECK(hipStreamDestroy(stream));
delete[] t;
}
@@ -0,0 +1,128 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Functional test for Memset3D and Memset3DAsync
*/
#include <hip_test_common.hh>
/**
* Basic Functional test of hipMemset3D
*/
TEST_CASE("Unit_hipMemset3D_BasicFunctional") {
constexpr int memsetval = 0x22;
constexpr size_t numH = 256;
constexpr size_t numW = 256;
constexpr size_t depth = 10;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH * depth;
size_t elements = numW * numH * depth;
char *A_h;
hipExtent extent = make_hipExtent(width, numH, depth);
hipPitchedPtr devPitchedPtr;
HIP_CHECK(hipMalloc3D(&devPitchedPtr, extent));
A_h = reinterpret_cast<char *>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
for (size_t i = 0; i < elements; i++) {
A_h[i] = 1;
}
HIP_CHECK(hipMemset3D(devPitchedPtr, memsetval, extent));
hipMemcpy3DParms myparms{};
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
myparms.srcPtr = devPitchedPtr;
myparms.extent = extent;
#if HT_NVIDIA
myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost);
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
HIP_CHECK(hipMemcpy3D(&myparms));
for (size_t i = 0; i < elements; i++) {
if (A_h[i] != memsetval) {
INFO("Memset3D mismatch at index:" << i << " computed:"
<< A_h[i] << " memsetval:" << memsetval);
REQUIRE(false);
}
}
HIP_CHECK(hipFree(devPitchedPtr.ptr));
free(A_h);
}
/**
* Basic Functional test of hipMemset3DAsync
*/
TEST_CASE("Unit_hipMemset3DAsync_BasicFunctional") {
constexpr int memsetval = 0x22;
constexpr size_t numH = 256;
constexpr size_t numW = 256;
constexpr size_t depth = 10;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH * depth;
size_t elements = numW * numH * depth;
hipExtent extent = make_hipExtent(width, numH, depth);
hipPitchedPtr devPitchedPtr;
char *A_h;
HIP_CHECK(hipMalloc3D(&devPitchedPtr, extent));
A_h = reinterpret_cast<char *>(malloc(sizeElements));
REQUIRE(A_h != nullptr);
for (size_t i = 0; i < elements; i++) {
A_h[i] = 1;
}
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
HIP_CHECK(hipStreamSynchronize(stream));
hipMemcpy3DParms myparms{};
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
myparms.srcPtr = devPitchedPtr;
myparms.extent = extent;
#if HT_NVIDIA
myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost);
#else
myparms.kind = hipMemcpyDeviceToHost;
#endif
HIP_CHECK(hipMemcpy3D(&myparms));
for (size_t i = 0; i < elements; i++) {
if (A_h[i] != memsetval) {
INFO("Memset3DAsync mismatch at index:" << i << " computed:"
<< A_h[i] << " memsetval:" << memsetval);
REQUIRE(false);
}
}
HIP_CHECK(hipFree(devPitchedPtr.ptr));
free(A_h);
}