diff --git a/projects/hip-tests/catch/include/hip_test_helper.hh b/projects/hip-tests/catch/include/hip_test_helper.hh index 08b6bec0c6..0aec8bac00 100644 --- a/projects/hip-tests/catch/include/hip_test_helper.hh +++ b/projects/hip-tests/catch/include/hip_test_helper.hh @@ -23,10 +23,46 @@ THE SOFTWARE. #pragma once #include "hip_test_common.hh" +#ifdef __linux__ +#include +#endif + namespace HipTest { static inline int getGeviceCount() { int dev = 0; - HIPCHECK(hipGetDeviceCount(&dev)); + HIP_CHECK(hipGetDeviceCount(&dev)); return dev; } + +// Get Free Memory from the system +static size_t getMemoryAmount() { +#ifdef __linux__ + struct sysinfo info{}; + sysinfo(&info); + return info.freeram / (1024 * 1024); // MB +#elif defined(_WIN32) + MEMORYSTATUSEX statex; + statex.dwLength = sizeof(statex); + GlobalMemoryStatusEx(&statex); + return (statex.ullAvailPhys / (1024 * 1024)); // MB +#endif +} + +static size_t getHostThreadCount(const size_t memPerThread, + const size_t maxThreads) { + if (memPerThread == 0) return 0; + auto memAmount = getMemoryAmount(); + const auto processor_count = std::thread::hardware_concurrency(); + if (processor_count == 0 || memAmount == 0) return 0; + size_t thread_count = 0; + if ((processor_count * memPerThread) < memAmount) + thread_count = processor_count; + else + thread_count = reinterpret_cast(memAmount / memPerThread); + if (maxThreads > 0) { + return (thread_count > maxThreads) ? maxThreads : thread_count; + } + return thread_count; +} + } // namespace HipTest diff --git a/projects/hip-tests/catch/unit/memory/CMakeLists.txt b/projects/hip-tests/catch/unit/memory/CMakeLists.txt index 093b6d8ba9..773abef797 100644 --- a/projects/hip-tests/catch/unit/memory/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/memory/CMakeLists.txt @@ -37,6 +37,9 @@ set(TEST_SRC hipMemset.cc hipMemsetAsyncMultiThread.cc hipMemsetAsyncAndKernel.cc + hipMemset3D.cc + hipMemset2D.cc + hipMemset2DAsyncMultiThreadAndKernel.cc ) else() set(TEST_SRC @@ -74,6 +77,9 @@ set(TEST_SRC hipMemset.cc hipMemsetAsyncMultiThread.cc hipMemsetAsyncAndKernel.cc + hipMemset3D.cc + hipMemset2D.cc + hipMemset2DAsyncMultiThreadAndKernel.cc ) endif() # Create shared lib of all tests diff --git a/projects/hip-tests/catch/unit/memory/hipMemset2D.cc b/projects/hip-tests/catch/unit/memory/hipMemset2D.cc new file mode 100644 index 0000000000..e48b8931f0 --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemset2D.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/** + Testcase Scenarios : + 1) hipMemset2D api with basic functionality. + 2) hipMemset2DAsync api with basic functionality. + 3) hipMemset2D api with partial memset and unique width/height. +*/ + + +#include + + +// Table with unique width/height and memset values. +// (width2D, height2D, memsetWidth, memsetHeight) +typedef std::tuple tupletype; + +static constexpr std::initializer_list tableItems { + std::make_tuple(20, 20, 20, 20), + std::make_tuple(10, 10, 4, 4), + std::make_tuple(100, 100, 20, 40), + std::make_tuple(256, 256, 39, 19), + std::make_tuple(100, 100, 20, 0), + std::make_tuple(100, 100, 0, 20), + std::make_tuple(100, 100, 0, 0), + }; + + + +/** + * Basic Functionality of hipMemset2D + */ +TEST_CASE("Unit_hipMemset2D_BasicFunctional") { + constexpr int memsetval = 0x24; + constexpr size_t numH = 256; + constexpr size_t numW = 256; + size_t pitch_A; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH; + size_t elements = numW * numH; + char *A_d, *A_h; + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, + numH)); + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + + for (size_t i = 0; i < elements; i++) { + A_h[i] = 1; + } + + HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH)); + HIP_CHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH, + hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < elements; i++) { + if (A_h[i] != memsetval) { + INFO("Memset2D mismatch at index:" << i << " computed:" + << A_h[i] << " memsetval:" << memsetval); + REQUIRE(false); + } + } + + hipFree(A_d); + free(A_h); +} + + +/** + * Basic Functionality of hipMemset2DAsync + */ +TEST_CASE("Unit_hipMemset2DAsync_BasicFunctional") { + constexpr int memsetval = 0x26; + constexpr size_t numH = 256; + constexpr size_t numW = 256; + size_t pitch_A; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH; + size_t elements = numW * numH; + char *A_d, *A_h; + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, + width, numH)); + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + + for (size_t i = 0; i < elements; i++) { + A_h[i] = 1; + } + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH, + hipMemcpyDeviceToHost)); + + for (size_t i=0; i < elements; i++) { + if (A_h[i] != memsetval) { + INFO("Memset2DAsync mismatch at index:" << i << " computed:" + << A_h[i] << " memsetval:" << memsetval); + REQUIRE(false); + } + } + + hipFree(A_d); + HIP_CHECK(hipStreamDestroy(stream)); + free(A_h); +} + + +/** + * Memset partial buffer with unique Width and Height + */ +TEST_CASE("Unit_hipMemset2D_UniqueWidthHeight") { + int width2D, height2D; + int memsetWidth, memsetHeight; + char *A_d, *A_h; + size_t pitch_A; + constexpr int memsetval = 0x26; + + std::tie(width2D, height2D, memsetWidth, memsetHeight) = + GENERATE(table(tableItems)); + + size_t width = width2D * sizeof(char); + size_t sizeElements = width * height2D; + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, + width, height2D)); + + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + + for (size_t index = 0; index < sizeElements; index++) { + A_h[index] = 'c'; + } + + INFO("2D Dimension: Width:" << width2D << " Height:" << height2D << + " MemsetWidth:" << memsetWidth << " MemsetHeight:" << memsetHeight); + + HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, memsetWidth, memsetHeight)); + HIP_CHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, width2D, height2D, + hipMemcpyDeviceToHost)); + + for (int row = 0; row < memsetHeight; row++) { + for (int column = 0; column < memsetWidth; column++) { + if (A_h[(row * width) + column] != memsetval) { + INFO("A_h[" << row << "][" << column << "]" << + " didnot match " << memsetval); + REQUIRE(false); + } + } + } + + hipFree(A_d); + free(A_h); +} + diff --git a/projects/hip-tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc b/projects/hip-tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc new file mode 100644 index 0000000000..04240a4104 --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemset2DAsyncMultiThreadAndKernel.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. +*/ + +/** + Testcase Scenarios : + 1) Order of execution of device kernel and hipMemset2DAsync api + 2) hipMemSet2DAsync execution in multiple threads +*/ + +#include +#include +#include +#include + + +/* Defines */ +#define NUM_THREADS 1000 +#define ITER 100 +#define NUM_H 256 +#define NUM_W 256 + + + +void queueJobsForhipMemset2DAsync(char* A_d, char* A_h, size_t pitch, + size_t width, hipStream_t stream) { + constexpr int memsetval = 0x22; + HIPCHECK(hipMemset2DAsync(A_d, pitch, memsetval, NUM_W, NUM_H, stream)); + HIPCHECK(hipMemcpy2DAsync(A_h, width, A_d, pitch, NUM_W, NUM_H, + hipMemcpyDeviceToHost, stream)); +} + + +/** + * Order of execution of device kernel and hipMemset2DAsync api. + */ +TEST_CASE("Unit_hipMemset2DAsync_WithKernel") { + constexpr auto N = 4 * 1024 * 1024; + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + constexpr int memsetval = 0x22; + char *A_d, *A_h, *B_d, *B_h, *C_d; + size_t pitch_A, pitch_B, pitch_C; + size_t width = NUM_W * sizeof(char); + size_t sizeElements = width * NUM_H; + size_t elements = NUM_W * NUM_H; + unsigned blocks{}; + int validateCount{}; + hipStream_t stream; + + blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, + width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), &pitch_B, + width, NUM_H)); + + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + B_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(B_h != nullptr); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&C_d), &pitch_C, + width, NUM_H)); + + for (size_t i = 0; i < elements; i++) { + B_h[i] = i; + } + HIP_CHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H, + hipMemcpyHostToDevice)); + HIP_CHECK(hipStreamCreate(&stream)); + + + for (size_t k = 0; k < ITER; k++) { + hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks), + dim3(threadsPerBlock), 0, stream, B_d, C_d, elements); + + HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipMemcpy2D(A_h, width, C_d, pitch_C, NUM_W, NUM_H, + hipMemcpyDeviceToHost)); + + for (size_t p = 0 ; p < elements ; p++) { + if (A_h[p] == memsetval) { + validateCount+= 1; + } + } + } + + REQUIRE(static_cast(validateCount) == (ITER * elements)); + + HIP_CHECK(hipFree(A_d)); HIP_CHECK(hipFree(B_d)); HIP_CHECK(hipFree(C_d)); + free(A_h); free(B_h); + HIP_CHECK(hipStreamDestroy(stream)); +} + + +/** + * hipMemSet2DAsync execution in multiple threads. + */ +TEST_CASE("Unit_hipMemset2DAsync_MultiThread") { + constexpr auto N = 4 * 1024 * 1024; + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + constexpr auto memPerThread = 200; + constexpr int memsetval = 0x22; + char *A_d, *A_h, *B_d, *B_h, *C_d; + size_t pitch_A, pitch_B, pitch_C; + size_t width = NUM_W * sizeof(char); + size_t sizeElements = width * NUM_H; + size_t elements = NUM_W * NUM_H; + unsigned blocks{}; + int validateCount{}; + hipStream_t stream; + + blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + auto thread_count = HipTest::getHostThreadCount(memPerThread, NUM_THREADS); + if (thread_count == 0) { + WARN("Resources not available for thread creation"); + return; + } + + std::thread *t = new std::thread[thread_count]; + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, + width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), &pitch_B, + width, NUM_H)); + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + B_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(B_h != nullptr); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&C_d), &pitch_C, + width, NUM_H)); + + for (size_t i = 0 ; i < elements ; i++) { + B_h[i] = i; + } + HIP_CHECK(hipMemcpy2D(B_d, width, B_h, pitch_B, NUM_W, NUM_H, + hipMemcpyHostToDevice)); + HIP_CHECK(hipStreamCreate(&stream)); + + for (int i = 0 ; i < ITER ; i++) { + for (size_t k = 0 ; k < thread_count; k++) { + if (k%2) { + t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, A_h, pitch_A, + width, stream); + } else { + t[k] = std::thread(queueJobsForhipMemset2DAsync, A_d, B_h, pitch_A, + width, stream); + } + } + for (size_t j = 0 ; j < thread_count; j++) { + t[j].join(); + } + + HIP_CHECK(hipStreamSynchronize(stream)); + for (size_t k = 0 ; k < elements ; k++) { + if ((A_h[k] == memsetval) && (B_h[k] == memsetval)) { + validateCount+= 1; + } + } + } + + REQUIRE(static_cast(validateCount) == (ITER * elements)); + + HIP_CHECK(hipFree(A_d)); HIP_CHECK(hipFree(B_d)); HIP_CHECK(hipFree(C_d)); + free(A_h); free(B_h); + HIP_CHECK(hipStreamDestroy(stream)); + + delete[] t; +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemset3D.cc b/projects/hip-tests/catch/unit/memory/hipMemset3D.cc new file mode 100644 index 0000000000..0fc1a83818 --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemset3D.cc @@ -0,0 +1,128 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + Functional test for Memset3D and Memset3DAsync + */ + + +#include + + +/** + * Basic Functional test of hipMemset3D + */ +TEST_CASE("Unit_hipMemset3D_BasicFunctional") { + constexpr int memsetval = 0x22; + constexpr size_t numH = 256; + constexpr size_t numW = 256; + constexpr size_t depth = 10; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH * depth; + size_t elements = numW * numH * depth; + char *A_h; + + hipExtent extent = make_hipExtent(width, numH, depth); + hipPitchedPtr devPitchedPtr; + + HIP_CHECK(hipMalloc3D(&devPitchedPtr, extent)); + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + + for (size_t i = 0; i < elements; i++) { + A_h[i] = 1; + } + HIP_CHECK(hipMemset3D(devPitchedPtr, memsetval, extent)); + hipMemcpy3DParms myparms{}; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH); + myparms.srcPtr = devPitchedPtr; + myparms.extent = extent; +#if HT_NVIDIA + myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); +#else + myparms.kind = hipMemcpyDeviceToHost; +#endif + HIP_CHECK(hipMemcpy3D(&myparms)); + + for (size_t i = 0; i < elements; i++) { + if (A_h[i] != memsetval) { + INFO("Memset3D mismatch at index:" << i << " computed:" + << A_h[i] << " memsetval:" << memsetval); + REQUIRE(false); + } + } + HIP_CHECK(hipFree(devPitchedPtr.ptr)); + free(A_h); +} + +/** + * Basic Functional test of hipMemset3DAsync + */ +TEST_CASE("Unit_hipMemset3DAsync_BasicFunctional") { + constexpr int memsetval = 0x22; + constexpr size_t numH = 256; + constexpr size_t numW = 256; + constexpr size_t depth = 10; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH * depth; + size_t elements = numW * numH * depth; + hipExtent extent = make_hipExtent(width, numH, depth); + hipPitchedPtr devPitchedPtr; + char *A_h; + + HIP_CHECK(hipMalloc3D(&devPitchedPtr, extent)); + A_h = reinterpret_cast(malloc(sizeElements)); + REQUIRE(A_h != nullptr); + + for (size_t i = 0; i < elements; i++) { + A_h[i] = 1; + } + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + hipMemcpy3DParms myparms{}; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH); + myparms.srcPtr = devPitchedPtr; + myparms.extent = extent; +#if HT_NVIDIA + myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost); +#else + myparms.kind = hipMemcpyDeviceToHost; +#endif + HIP_CHECK(hipMemcpy3D(&myparms)); + + for (size_t i = 0; i < elements; i++) { + if (A_h[i] != memsetval) { + INFO("Memset3DAsync mismatch at index:" << i << " computed:" + << A_h[i] << " memsetval:" << memsetval); + REQUIRE(false); + } + } + HIP_CHECK(hipFree(devPitchedPtr.ptr)); + free(A_h); +}