From 8b291f071acff90b6974560af569850b4339028b Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary Date: Fri, 10 Jun 2022 16:46:23 +0100 Subject: [PATCH] Add missing tests for hipHostMalloc (#2566) --- catch/include/hip_test_helper.hh | 3 +- catch/stress/memory/CMakeLists.txt | 1 + catch/stress/memory/hipHostMalloc.cc | 52 +++++++++++++++ catch/stress/memory/hipMallocManagedStress.cc | 1 - catch/unit/memory/hipHostMallocTests.cc | 27 +++++--- catch/unit/memory/hipMemCoherencyTst.cc | 66 ++++++++----------- 6 files changed, 100 insertions(+), 50 deletions(-) create mode 100644 catch/stress/memory/hipHostMalloc.cc diff --git a/catch/include/hip_test_helper.hh b/catch/include/hip_test_helper.hh index 04aec9318b..fbbcb6cb06 100644 --- a/catch/include/hip_test_helper.hh +++ b/catch/include/hip_test_helper.hh @@ -51,8 +51,7 @@ static size_t getMemoryAmount() { #endif } -static size_t getHostThreadCount(const size_t memPerThread, - const size_t maxThreads) { +static inline size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) { if (memPerThread == 0) return 0; auto memAmount = getMemoryAmount(); const auto processor_count = std::thread::hardware_concurrency(); diff --git a/catch/stress/memory/CMakeLists.txt b/catch/stress/memory/CMakeLists.txt index da5dfa8182..e55869f8e3 100644 --- a/catch/stress/memory/CMakeLists.txt +++ b/catch/stress/memory/CMakeLists.txt @@ -4,6 +4,7 @@ set(TEST_SRC hipMemcpyMThreadMSize.cc hipMallocManagedStress.cc hipMemPrftchAsyncStressTst.cc + hipHostMalloc.cc ) hip_add_exe_to_target(NAME memory diff --git a/catch/stress/memory/hipHostMalloc.cc b/catch/stress/memory/hipHostMalloc.cc new file mode 100644 index 0000000000..da9fa977ff --- /dev/null +++ b/catch/stress/memory/hipHostMalloc.cc @@ -0,0 +1,52 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_test_common.hh" +#include "hip_test_helper.hh" + +// Stress allocation tests +// Try to allocate as much memory as possible +// But since max allocation can fail, we need to try the next value + +TEST_CASE("Stress_hipHostMalloc_MaxAllocation") { + size_t devMemAvail{0}, devMemFree{0}; + HIP_CHECK(hipMemGetInfo(&devMemFree, &devMemAvail)); + auto hostMemFree = HipTest::getMemoryAmount() /* In MB */ * 1024 * 1024; // In bytes + REQUIRE(devMemFree > 0); + REQUIRE(devMemAvail > 0); + REQUIRE(hostMemFree > 0); + + size_t memFree = std::min(devMemFree, hostMemFree); // which is the limiter cpu or gpu + char* d_ptr{nullptr}; + size_t counter{0}; + + INFO("Max Allocation of " << memFree << " bytes!"); + while (hipHostMalloc(&d_ptr, memFree) != hipSuccess && memFree > 1) { + counter++; + INFO("Attempt to allocate " << memFree << " bytes out of " << devMemFree << "bytes Failed!"); + memFree >>= 1; // reduce the memory to be allocated by half + REQUIRE(counter <= 2); // Make sure that we are atleast able to allocate 1/4th of max memory + } + + HIP_CHECK(hipMemset(d_ptr, 1, memFree)); + HIP_CHECK(hipDeviceSynchronize()); // Flush caches + REQUIRE(std::all_of(d_ptr, d_ptr + memFree, [](unsigned char n) { return n == 1; })); + HIP_CHECK(hipHostFree(d_ptr)); +} + diff --git a/catch/stress/memory/hipMallocManagedStress.cc b/catch/stress/memory/hipMallocManagedStress.cc index e582056685..8a11ab35ef 100644 --- a/catch/stress/memory/hipMallocManagedStress.cc +++ b/catch/stress/memory/hipMallocManagedStress.cc @@ -67,7 +67,6 @@ __global__ void KernelMul_MngdMem(int *Hmm, int *Dptr, size_t n) { Hmm[i] = Dptr[i] * 10; } } -static bool IfTestPassed = true; static void LaunchKrnl4(size_t NumElms, int InitVal) { int *Hmm = NULL, *Dptr = NULL, blockSize = 64, DataMismatch = 0; diff --git a/catch/unit/memory/hipHostMallocTests.cc b/catch/unit/memory/hipHostMallocTests.cc index de445e2e24..b0f3d4dbfa 100644 --- a/catch/unit/memory/hipHostMallocTests.cc +++ b/catch/unit/memory/hipHostMallocTests.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -29,33 +29,40 @@ Testcase Scenarios : */ #include +#include /** * Performs argument validation of hipHostMalloc api. */ TEST_CASE("Unit_hipHostMalloc_ArgValidation") { - hipError_t ret; +#if HT_NVIDIA + HipTest::HIP_SKIP_TEST("TODO: Need to debug"); +#endif constexpr size_t allocSize = 1000; - char *ptr; + char* ptr; SECTION("Pass ptr as nullptr") { - ret = hipHostMalloc(static_cast(nullptr), allocSize); - REQUIRE(ret != hipSuccess); + HIP_CHECK_ERROR(hipHostMalloc(static_cast(nullptr), allocSize), hipErrorInvalidValue); } SECTION("Size as max(size_t)") { - ret = hipHostMalloc(&ptr, std::numeric_limits::max()); - REQUIRE(ret != hipSuccess); + HIP_CHECK_ERROR(hipHostMalloc(&ptr, std::numeric_limits::max()), + hipErrorMemoryAllocation); } SECTION("Flags as max(uint)") { - ret = hipHostMalloc(&ptr, allocSize, - std::numeric_limits::max()); - REQUIRE(ret != hipSuccess); + HIP_CHECK_ERROR(hipHostMalloc(&ptr, allocSize, std::numeric_limits::max()), + hipErrorInvalidValue); } SECTION("Pass size as zero and check ptr reset") { HIP_CHECK(hipHostMalloc(&ptr, 0)); REQUIRE(ptr == nullptr); } + + SECTION("Pass hipHostMallocCoherent and hipHostMallocNonCoherent simultaneously") { + HIP_CHECK_ERROR( + hipHostMalloc(&ptr, allocSize, hipHostMallocCoherent | hipHostMallocNonCoherent), + hipErrorInvalidValue); + } } diff --git a/catch/unit/memory/hipMemCoherencyTst.cc b/catch/unit/memory/hipMemCoherencyTst.cc index 73bf4aeb09..67f455de29 100644 --- a/catch/unit/memory/hipMemCoherencyTst.cc +++ b/catch/unit/memory/hipMemCoherencyTst.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -34,17 +34,9 @@ #include #include -__global__ void CoherentTst(int *ptr, int PeakClk) { - // Incrementing the value by 1 - int64_t GpuFrq = (PeakClk * 1000); - int64_t StrtTck = clock64(); - atomicAdd(ptr, 1); - // The following while loop checks the value in ptr for around 3-4 seconds - while ((clock64() - StrtTck) <= (3 * GpuFrq)) { - if (*ptr == 3) { - atomicAdd(ptr, 1); - return; - } +__global__ void CoherentTst(int* ptr) { // ptr was set to 1 + atomicAdd(ptr, 1); // now ptr is 2 + while (atomicCAS(ptr, 3, 4) != 3) { // wait till ptr is 3, then change it to 4 } } @@ -59,34 +51,34 @@ __global__ void SquareKrnl(int *ptr) { static bool YES_COHERENT = false; // The function tests the coherency of allocated memory -static void TstCoherency(int *Ptr, bool HmmMem) { - int *Dptr = nullptr, peak_clk; - hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); +// If this test hangs, means there is issue in coherency +static void TstCoherency(int* ptr, bool hmmMem) { + int* dptr = nullptr; + hipStream_t stream{}; + HIP_CHECK(hipStreamCreate(&stream)); + // storing value 1 in the memory created above - *Ptr = 1; - // Getting gpu frequency - HIP_CHECK(hipDeviceGetAttribute(&peak_clk, hipDeviceAttributeClockRate, 0)); - if (!HmmMem) { - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&Dptr), Ptr, - 0)); - CoherentTst<<<1, 1, 0, strm>>>(Dptr, peak_clk); + *ptr = 1; + + if (!hmmMem) { + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&dptr), ptr, 0)); + CoherentTst<<<1, 1, 0, stream>>>(dptr); } else { - CoherentTst<<<1, 1, 0, strm>>>(Ptr, peak_clk); + CoherentTst<<<1, 1, 0, stream>>>(ptr); } - // looping until the value is 2 for 3 seconds - std::chrono::steady_clock::time_point start = - std::chrono::steady_clock::now(); - while (std::chrono::duration_cast( - std::chrono::steady_clock::now() - start).count() < 3) { - if (*Ptr == 2) { - *Ptr += 1; - break; - } - } - HIP_CHECK(hipStreamSynchronize(strm)); - HIP_CHECK(hipStreamDestroy(strm)); - if (*Ptr == 4) { + + std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); + while (std::chrono::duration_cast(std::chrono::steady_clock::now() - start) + .count() < 3 && + *ptr == 2) { + } // wait till ptr is 2 from kernel or 3 seconds + + *ptr += 1; // increment it to 3 + + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + + if (*ptr == 4) { YES_COHERENT = true; } }