From 7e6f301b4e45504284b3f9b13d60a74f3b8efdee Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Mon, 14 Aug 2023 20:50:29 +0530 Subject: [PATCH] SWDEV-383372 - [catch2][dtest] Adding corner and stress tests for hipHostMalloc(). (#208) Change-Id: I2308059e50a951a1e70de4f90fef9e2c76af21f1 [ROCm/hip-tests commit: 0b56a9d38c10f657f7190ffd28dc40c3c91b0afc] --- .../config/config_amd_windows_MI2xx.json | 23 +++--- .../stress/memory/hipHostMallocStress.cc | 54 ++++++++++--- .../catch/unit/memory/hipHostMalloc.cc | 76 +++++++++++++++++-- 3 files changed, 125 insertions(+), 28 deletions(-) diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json index f5d29920e0..92ce9593a5 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows_MI2xx.json @@ -94,19 +94,20 @@ "Unit_hipGraphAddEventRecordNode_Functional_ElapsedTime", "Unit_hipStreamBeginCapture_captureComplexGraph", "Unit_hipGraphAddChildGraphNode_MultGraphsAsSingleGraph", - "Unit_hipMemGetAddressRange_Negative", + "Unit_hipMemGetAddressRange_Negative", "Unit_hipStreamValue_Wait64_Blocking_NoMask_Nor", "Unit_hipLaunchHostFunc_Graph", "Unit_hipLaunchHostFunc_KernelHost", - "=== Below hiprtc tests are disabled temporarily, will be renabled once patches for SWDEV-395996 are merged ===", - "Unit_hiprtc_saxpy.Unit_hiprtc_saxpy", - "Unit_hiprtc_warpsize.Unit_hiprtc_warpsize", - "Unit_hiprtc_functional.Unit_hiprtc_functional", - "Unit_hipStreamCaptureRtc.Unit_hipStreamCaptureRtc", - "Unit_hiprtc_cpp17.Unit_hiprtc_cpp17", - "Unit_hiprtc_namehandling.Unit_hiprtc_namehandling", - "Unit_hiprtc_getloweredname.Unit_hiprtc_getloweredname", - "Unit_hiprtc_test_hip_bfloat16.Unit_hiprtc_test_hip_bfloat16", - "Unit_RTC_LinkerAPI.Unit_RTC_LinkerAPI" + "Unit_hipDeviceGetUuid_Positive", + "=== Below hiprtc tests are disabled temporarily, will be renabled once patches for SWDEV-395996 are merged ===", + "Unit_hiprtc_saxpy.Unit_hiprtc_saxpy", + "Unit_hiprtc_warpsize.Unit_hiprtc_warpsize", + "Unit_hiprtc_functional.Unit_hiprtc_functional", + "Unit_hipStreamCaptureRtc.Unit_hipStreamCaptureRtc", + "Unit_hiprtc_cpp17.Unit_hiprtc_cpp17", + "Unit_hiprtc_namehandling.Unit_hiprtc_namehandling", + "Unit_hiprtc_getloweredname.Unit_hiprtc_getloweredname", + "Unit_hiprtc_test_hip_bfloat16.Unit_hiprtc_test_hip_bfloat16", + "Unit_RTC_LinkerAPI.Unit_RTC_LinkerAPI" ] } diff --git a/projects/hip-tests/catch/stress/memory/hipHostMallocStress.cc b/projects/hip-tests/catch/stress/memory/hipHostMallocStress.cc index da9fa977ff..bb48e7f54a 100644 --- a/projects/hip-tests/catch/stress/memory/hipHostMallocStress.cc +++ b/projects/hip-tests/catch/stress/memory/hipHostMallocStress.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,8 +17,10 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "hip_test_common.hh" -#include "hip_test_helper.hh" +#include +#include + +#define ADDITIONAL_MEMORY_PERCENT 10 // Stress allocation tests // Try to allocate as much memory as possible @@ -27,26 +29,60 @@ THE SOFTWARE. TEST_CASE("Stress_hipHostMalloc_MaxAllocation") { size_t devMemAvail{0}, devMemFree{0}; HIP_CHECK(hipMemGetInfo(&devMemFree, &devMemAvail)); - auto hostMemFree = HipTest::getMemoryAmount() /* In MB */ * 1024 * 1024; // In bytes + auto hostMemFree = HipTest::getMemoryAmount() * 1024 * 1024; // In bytes REQUIRE(devMemFree > 0); REQUIRE(devMemAvail > 0); REQUIRE(hostMemFree > 0); - - size_t memFree = std::min(devMemFree, hostMemFree); // which is the limiter cpu or gpu + // which is the limiter cpu or gpu + size_t memFree = std::min(devMemFree, hostMemFree); char* d_ptr{nullptr}; size_t counter{0}; INFO("Max Allocation of " << memFree << " bytes!"); while (hipHostMalloc(&d_ptr, memFree) != hipSuccess && memFree > 1) { counter++; - INFO("Attempt to allocate " << memFree << " bytes out of " << devMemFree << "bytes Failed!"); + INFO("Attempt to allocate " << memFree << \ + " bytes out of " << devMemFree << "bytes Failed!"); memFree >>= 1; // reduce the memory to be allocated by half - REQUIRE(counter <= 2); // Make sure that we are atleast able to allocate 1/4th of max memory + REQUIRE(counter <= 2); // Make sure that we are atleast able to allocate + // 1/4th of max memory } HIP_CHECK(hipMemset(d_ptr, 1, memFree)); HIP_CHECK(hipDeviceSynchronize()); // Flush caches - REQUIRE(std::all_of(d_ptr, d_ptr + memFree, [](unsigned char n) { return n == 1; })); + REQUIRE(std::all_of(d_ptr, d_ptr + memFree, + [](unsigned char n) { return n == 1; })); HIP_CHECK(hipHostFree(d_ptr)); } +// Allocate more memory than total GPU memory in each available GPU. +// hipHostMalloc should return hipSuccess. + +TEST_CASE("Stress_hipHostMalloc_MaxAllocation_AllGpu") { + char* A = nullptr; + size_t maxGpuMem = 0, availableMem = 0; + int count = 0; + HIP_CHECK(hipGetDeviceCount(&count)); + for (int dev = 0; dev < count; dev++) { + // Get available GPU memory and total GPU memory + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipMemGetInfo(&availableMem, &maxGpuMem)); + size_t allocsize = maxGpuMem + + ((maxGpuMem*ADDITIONAL_MEMORY_PERCENT)/100); + // Get free host In bytes + size_t hostMemFree = HipTest::getMemoryAmount() * 1024 * 1024; + if (allocsize < hostMemFree) { + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A), allocsize)); + // Check accessibility of memory + constexpr size_t samplesize = 1024; + constexpr int val = 32; + // Write at beginning of memory chunk for a size of samplesize + HIP_CHECK(hipMemset(A, val, samplesize)); + // Write at end of memory chunk for a size of samplesize + HIP_CHECK(hipMemset((A + allocsize - 1 - samplesize), val, samplesize)); + HIP_CHECK(hipHostFree(A)); + } else { + WARN("Skipping test as CPU memory is less than GPU memory"); + } + } +} diff --git a/projects/hip-tests/catch/unit/memory/hipHostMalloc.cc b/projects/hip-tests/catch/unit/memory/hipHostMalloc.cc index f0b1fafe8d..bebea366bf 100644 --- a/projects/hip-tests/catch/unit/memory/hipHostMalloc.cc +++ b/projects/hip-tests/catch/unit/memory/hipHostMalloc.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -29,19 +29,30 @@ This testfile verifies the following scenarios of hipHostMalloc API 5. Allocating memory using hipHostMalloc with default flag */ -#include -#include -#include +#include +#include +#include #include +#include #define SYNC_EVENT 0 #define SYNC_STREAM 1 #define SYNC_DEVICE 2 +#define ADDITIONAL_MEMORY_PERCENT 10 +#define BLOCK_SIZE 512 +#define VALUE 32 std::vector syncMsg = {"event", "stream", "device"}; static constexpr int numElements{1024 * 16}; static constexpr size_t sizeBytes{numElements * sizeof(int)}; +#if HT_AMD +static __global__ void kerTestMemAccess(char *buf) { + int myId = threadIdx.x + blockDim.x * blockIdx.x; + buf[myId] = VALUE; +} +#endif + void CheckHostPointer(int numElements, int* ptr, unsigned eventFlags, int syncMethod, std::string msg) { std::cerr << "test: CheckHostPointer " @@ -134,8 +145,8 @@ TEST_CASE("Unit_hipHostMalloc_Basic") { dim3 dimGrid(LEN / 512, 1, 1); dim3 dimBlock(512, 1, 1); HipTest::launchKernel(HipTest::vectorADD, dimGrid, dimBlock, - 0, 0, static_cast(A_d), - static_cast(B_d), C_d, static_cast(LEN)); + 0, 0, static_cast(A_d), + static_cast(B_d), C_d, static_cast(LEN)); HIP_CHECK(hipMemcpy(C_h, C_d, LEN*sizeof(float), hipMemcpyDeviceToHost)); HIP_CHECK(hipDeviceSynchronize()); @@ -229,16 +240,65 @@ TEST_CASE("Unit_hipHostMalloc_Default") { CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType); CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType); CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType); - } TEST_CASE("Unit_hipHostGetDevicePointer_NullCheck") { int* d_a; HIP_CHECK(hipHostMalloc(reinterpret_cast(&d_a), sizeof(int))); - auto res = hipHostGetDevicePointer(nullptr,d_a,0); + auto res = hipHostGetDevicePointer(nullptr, d_a, 0); REQUIRE(res == hipErrorInvalidValue); HIP_CHECK(hipHostFree(d_a)); } +/* +This testcase verifies the hipHostMalloc API by +1. Allocating more memory than total GPU memory. Should return hipSuccess. +2. Allocating more memory than the total GPU memory and accessing the memory + in a device function. +*/ +TEST_CASE("Unit_hipHostMalloc_AllocateMoreThanAvailGPUMemory") { + char* A = nullptr; + size_t maxGpuMem = 0, availableMem = 0; + // Get available GPU memory and total GPU memory + HIP_CHECK(hipMemGetInfo(&availableMem, &maxGpuMem)); + size_t allocsize = maxGpuMem + + ((maxGpuMem*ADDITIONAL_MEMORY_PERCENT)/100); + // Get free host In bytes + size_t hostMemFree = HipTest::getMemoryAmount() * 1024 * 1024; + // Ensure that allocsize < hostMemFree + if (allocsize < hostMemFree) { + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A), allocsize)); + HIP_CHECK(hipHostFree(A)); + } else { + WARN("Skipping test as CPU memory is less than GPU memory"); + } +} + +#if HT_AMD +TEST_CASE("Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory") { + char* A = nullptr; + size_t maxGpuMem = 0, availableMem = 0; + // Get available GPU memory and total GPU memory + HIP_CHECK(hipMemGetInfo(&availableMem, &maxGpuMem)); + size_t allocsize = maxGpuMem + + ((maxGpuMem*ADDITIONAL_MEMORY_PERCENT)/100); + // Get free host In bytes + size_t hostMemFree = HipTest::getMemoryAmount() * 1024 * 1024; + // Ensure that allocsize < hostMemFree + if (allocsize < hostMemFree) { + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A), allocsize)); + constexpr int sample_size = 1024; + // memset a sample size to 0 + HIP_CHECK(hipMemset(A, 0, sample_size)); + unsigned int grid_size = allocsize/BLOCK_SIZE; + // Check if the allocated memory can be accessed in kernels + kerTestMemAccess<<>>(A); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipHostFree(A)); + } else { + WARN("Skipping test as CPU memory is less than GPU memory"); + } +} +#endif