From b8ef4025e29855fceae4f3df5f67749c3e0777ec Mon Sep 17 00:00:00 2001 From: "Hernandez, Gerardo" Date: Fri, 18 Jul 2025 09:15:19 +0100 Subject: [PATCH] SWDEV-534207 - solve phoenix mem test failures (#218) * make sure symbolStatus after hipGetProcAddress() is the expected in the tests that are failing on Phoenix * fix that calls to hipMemset2DAsync() and hipMemset2DAsync_spt() to set values on submatrices where not taking into account the pitch, causing test failures on Phoenix * before testing whether hipMemset2DAsync() works, initialize the whole matrix to a known value. This makes sure the test fails even if the uninitialized memory returned after hipMallocPitch() happens to have the expected value * Sbefore testing whether hipMemset2DAsync_spt() works, initialize the whole matrix to a known value. This makes sure the test fails even if the uninitialized memory returned after hipMallocPitch() happens to have the expected value * fix up compiler error: dyn_hipMemset2DAsync_ptr should have been dyn_hipMemset2DAsync_spt_ptr * when Unit_hipMalloc3D_Basic fails due to a potential memory leak, print the values that hipMemGetInfo() returns before and after the allocation/deallocation pair. * Also print intermediate free memory * Make sure Unit_hipMalloc3D_Basic allocates at least PalSettings::subAllocationChunkSize_ on Windows. Otherwise hipMemGetInfo() will not report an increase on available memory after hipFree() is called, as the minimum amount of memory that cause a call to the CoarseMemorySubAllocator::Create() is that chunk size * Fix up previous commit; allocated too much memory * skip some extent sizes in Unit_hipFreeMipmappedArrayImplicitSyncArray if allocating them would be require more memory than the actual totalGlobalMem of the device * Do not expect an exact match when comparing the memory available memory before and after hipMalloc3D() + hipFree() * Do not allocate more memory than the total GPU memory in Unit_hipFreeMipmappedArrayImplicitSyncArray * fix expected available memory amount comparison in hipMalloc3D - Basic * use SUCCEED() macro in Unit_hipFreeMipmappedArrayImplicitSyncArray to log more information when there is not enough memory for the mipmapped array to be allocated * fix formatting [ROCm/hip-tests commit: 4b2ed7653f25d87a346f0d2a7b9e5738fdc893d4] --- .../unit/memory/hipFreeMipmappedArray.cc | 48 ++++++++++++------ .../memory/hipGetProcAddressMemoryApis.cc | 34 +++++++------ .../catch/unit/memory/hipMalloc3D.cc | 50 +++++++++++++++---- .../hipGetProcAddressSptApis.cc | 34 +++++++------ 4 files changed, 112 insertions(+), 54 deletions(-) diff --git a/projects/hip-tests/catch/unit/memory/hipFreeMipmappedArray.cc b/projects/hip-tests/catch/unit/memory/hipFreeMipmappedArray.cc index e31467ed18..fc87b6546c 100644 --- a/projects/hip-tests/catch/unit/memory/hipFreeMipmappedArray.cc +++ b/projects/hip-tests/catch/unit/memory/hipFreeMipmappedArray.cc @@ -24,21 +24,24 @@ THE SOFTWARE. #include #include "hipArrayCommon.hh" #include "utils.hh" +#include + /* * hipFreeMipmappedArray API test scenarios * 1. Check that hipFreeMipmappedArray implicitly synchronises the device. - * 2. Perform multiple allocations and then call hipFreeMipmappedArray on each pointer concurrently (from unique - * threads) for different memory types and different allocation sizes. + * 2. Perform multiple allocations and then call hipFreeMipmappedArray on each pointer concurrently + * (from unique threads) for different memory types and different allocation sizes. * 3. Pass nullptr as argument and check that correct error code is returned. - * 4. Call hipFreeMipmappedArray twice on the same pointer and check that the implementation handles the second - * call correctly. + * 4. Call hipFreeMipmappedArray twice on the same pointer and check that the implementation handles + * the second call correctly. */ - TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayImplicitSyncArray", "", char, float) { hipMipmappedArray_t arrayPtr{}; hipExtent extent{}; hipChannelFormatDesc desc = hipCreateChannelDesc(); + hipDeviceProp_t props; + std::array levels = {1, 5, 7}; #if HT_AMD const unsigned int flags = hipArrayDefault; @@ -50,16 +53,30 @@ TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayImplicitSyncArray", "", char, floa extent.height = GENERATE(64, 256, 1024); extent.depth = GENERATE(0, 64, 256, 1024); - const unsigned int numLevels = GENERATE(1, 5, 7); + HIP_CHECK(hipGetDeviceProperties(&props, 0)) - HIP_CHECK_IGNORED_RETURN(hipMallocMipmappedArray(&arrayPtr, &desc, extent, numLevels, flags), - hipErrorNotSupported); + for (auto numLevels : levels) { + if (extent.width * extent.height * extent.depth * numLevels * sizeof(TestType) > + props.totalGlobalMem) { + // some devices will not have enough memory allocate the 6GB required for the biggest extent + // We skip the test in that case (and no warning is needed) + SUCCEED( + "Device does not have enough global memory to allocate a mipmapped array using this " + " extent: (" + << extent.width << ", " << extent.height << ", " << extent.depth << ") and " << numLevels + << " levels"); + continue; + } - LaunchDelayKernel(std::chrono::milliseconds{50}, nullptr); - // make sure device is busy - HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady); - HIP_CHECK(hipFreeMipmappedArray(arrayPtr)); - HIP_CHECK(hipStreamQuery(nullptr)); + HIP_CHECK_IGNORED_RETURN(hipMallocMipmappedArray(&arrayPtr, &desc, extent, numLevels, flags), + hipErrorNotSupported); + + LaunchDelayKernel(std::chrono::milliseconds{50}, nullptr); + // make sure device is busy + HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady); + HIP_CHECK(hipFreeMipmappedArray(arrayPtr)); + HIP_CHECK(hipStreamQuery(nullptr)); + } } TEST_CASE("Unit_hipFreeMipmappedArray_Negative_Nullptr") { @@ -111,14 +128,13 @@ TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayMultiTArray", "", char, int) { int i = 0; for (; i < ptrs.size(); i++) { - if (hipErrorOutOfMemory == hipMallocMipmappedArray(&ptrs[i], &desc, extent, - numLevels, flags)) { + if (hipErrorOutOfMemory == hipMallocMipmappedArray(&ptrs[i], &desc, extent, numLevels, flags)) { break; } } for (int j = 0; j < i; j++) { - threads.emplace_back([ptrs,j] { + threads.emplace_back([ptrs, j] { if (hipSuccess != hipFreeMipmappedArray(ptrs[j])) { return; } diff --git a/projects/hip-tests/catch/unit/memory/hipGetProcAddressMemoryApis.cc b/projects/hip-tests/catch/unit/memory/hipGetProcAddressMemoryApis.cc index e98a86b6f3..d6041beb58 100644 --- a/projects/hip-tests/catch/unit/memory/hipGetProcAddressMemoryApis.cc +++ b/projects/hip-tests/catch/unit/memory/hipGetProcAddressMemoryApis.cc @@ -2761,22 +2761,23 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemset2D3D") { void* hipMemset2DAsync_ptr = nullptr; void* hipMemset3D_ptr = nullptr; void* hipMemset3DAsync_ptr = nullptr; + hipDriverProcAddressQueryResult symbolStatus = HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND; int currentHipVersion = 0; HIP_CHECK(hipRuntimeGetVersion(¤tHipVersion)); - HIP_CHECK(hipGetProcAddress("hipMemset2D", - &hipMemset2D_ptr, - currentHipVersion, 0, nullptr)); - HIP_CHECK(hipGetProcAddress("hipMemset2DAsync", - &hipMemset2DAsync_ptr, - currentHipVersion, 0, nullptr)); - HIP_CHECK(hipGetProcAddress("hipMemset3D", - &hipMemset3D_ptr, - currentHipVersion, 0, nullptr)); - HIP_CHECK(hipGetProcAddress("hipMemset3DAsync", - &hipMemset3DAsync_ptr, - currentHipVersion, 0, nullptr)); + HIP_CHECK( + hipGetProcAddress("hipMemset2D", &hipMemset2D_ptr, currentHipVersion, 0, &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); + HIP_CHECK(hipGetProcAddress("hipMemset2DAsync", &hipMemset2DAsync_ptr, currentHipVersion, 0, + &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); + HIP_CHECK( + hipGetProcAddress("hipMemset3D", &hipMemset3D_ptr, currentHipVersion, 0, &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); + HIP_CHECK(hipGetProcAddress("hipMemset3DAsync", &hipMemset3DAsync_ptr, currentHipVersion, 0, + &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); hipError_t (*dyn_hipMemset2D_ptr)(void *, size_t, int, size_t, size_t) = reinterpret_cast @@ -2829,6 +2830,10 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemset2D3D") { &pitch, width, height)); REQUIRE(devMem != nullptr); + // set the whole matrix first to something different than 'value' + HIP_CHECK(dyn_hipMemset2DAsync_ptr(devMem, pitch, 5, width, height, 0)); + HIP_CHECK(hipStreamSynchronize(0)); + hipStream_t stream[Ns]; for ( int s = 0; s < Ns; s++ ) { HIP_CHECK(hipStreamCreate(&stream[s])); @@ -2836,8 +2841,9 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemset2D3D") { for ( int s = 0; s < Ns; s++ ) { int startIndex = s * (N/Ns); - HIP_CHECK(dyn_hipMemset2DAsync_ptr(devMem + startIndex, pitch/Ns, - value, width/Ns, height/Ns, stream[s])); + int row = startIndex / width; + HIP_CHECK(dyn_hipMemset2DAsync_ptr(devMem + row * pitch, pitch, value, width, height / Ns, + stream[s])); } for ( int s = 0; s < Ns; s++ ) { HIP_CHECK(hipStreamSynchronize(stream[s])); diff --git a/projects/hip-tests/catch/unit/memory/hipMalloc3D.cc b/projects/hip-tests/catch/unit/memory/hipMalloc3D.cc index d3edad5a2f..7512e5987b 100644 --- a/projects/hip-tests/catch/unit/memory/hipMalloc3D.cc +++ b/projects/hip-tests/catch/unit/memory/hipMalloc3D.cc @@ -59,26 +59,56 @@ static void Malloc3DThreadFunc(int gpu) { } /* - * This verifies the hipMalloc3D API by - * assigning width,height and depth as 10 + * This test allocates via hipMalloc3D and deallotes via hipFree(). + * To verify that the returned memory is again available, we use hipMemGetInfo(). + * On Windows, allocating/deallocating small amounts of device memory will not make a difference in + * the reported available memory because they go into subheaps of size + * PalSettings::subAllocationChunkSize_ and only whenever we trigger a whole subheap + * allocation/deallocation we will see a difference in the reported value. + * To make sure we make a difference in the reported available memory; we perform small several + * allocation whose combined size goes over PalSettings::subAllocationChunkSize_; + * hipMemGetInfo() should indicate the memory went down after we hipFree() all of them */ TEST_CASE("Unit_hipMalloc3D_Basic") { CHECK_IMAGE_SUPPORT - size_t width = SMALL_SIZE * sizeof(char); - size_t height{SMALL_SIZE}, depth{SMALL_SIZE}; - hipPitchedPtr devPitchedPtr; + static constexpr int ChunkSize = 64; // (in megabytes) + static constexpr int NumAllocations = 3; + + size_t width{(ChunkSize * 1024) / NumAllocations}, height{1024}, depth{1}; + hipPitchedPtr devPitchedPtr[NumAllocations]; hipExtent extent = make_hipExtent(width, height, depth); - size_t tot, avail, ptot, pavail; + size_t tot, avail, itot, iavail, ptot, pavail; HIP_CHECK(hipMemGetInfo(&pavail, &ptot)); - REQUIRE(hipMalloc3D(&devPitchedPtr, extent) == hipSuccess); - HIPCHECK(hipFree(devPitchedPtr.ptr)); + for (int i = 0; i < NumAllocations; i++) { + REQUIRE(hipMalloc3D(&devPitchedPtr[i], extent) == hipSuccess); + } + + HIP_CHECK(hipMemGetInfo(&iavail, &itot)); + + if (iavail >= pavail) + WARN( + "hipMemGetInfo() did not report increased memory usage after calling hipMalloc3D(). " + "Before: " + << pavail << " after: " << iavail); + + for (int i = 0; i < NumAllocations; i++) { + HIPCHECK(hipFree(devPitchedPtr[i].ptr)); + } HIP_CHECK(hipMemGetInfo(&avail, &tot)); - if (pavail != avail) { - WARN("Memory leak of hipMalloc3D API in multithreaded scenario"); + // as the runtime might cache some of the allocations and also it is difficult the + // available amount returned is the same as the one we got the first time because of + // other processes running on the system; we consider a success if at least a size + // equivalent to two of the allocations has become available + // This test was too brittle before, when it was expecting avail to be equal to pavail + if (avail < pavail - height * width * depth) { + WARN("Memory leak of hipMalloc3D API in multithreaded scenario." + << " Available memory before the hipMalloc3D() call (bytes): " << pavail + << " Available memory after the call: " << iavail + << " Available memory after hipFree(): " << avail); REQUIRE(false); } } diff --git a/projects/hip-tests/catch/unit/streamperthread/hipGetProcAddressSptApis.cc b/projects/hip-tests/catch/unit/streamperthread/hipGetProcAddressSptApis.cc index ceac3d0cf5..e5c5f94e01 100644 --- a/projects/hip-tests/catch/unit/streamperthread/hipGetProcAddressSptApis.cc +++ b/projects/hip-tests/catch/unit/streamperthread/hipGetProcAddressSptApis.cc @@ -672,22 +672,23 @@ TEST_CASE("Unit_hipGetProcAddress_spt_Memset2D3D") { void* hipMemset2DAsync_spt_ptr = nullptr; void* hipMemset3D_spt_ptr = nullptr; void* hipMemset3DAsync_spt_ptr = nullptr; + hipDriverProcAddressQueryResult symbolStatus = HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND; int currentHipVersion = 0; HIP_CHECK(hipRuntimeGetVersion(¤tHipVersion)); - HIP_CHECK(hipGetProcAddress("hipMemset2D_spt", - &hipMemset2D_spt_ptr, - currentHipVersion, 0, nullptr)); - HIP_CHECK(hipGetProcAddress("hipMemset2DAsync_spt", - &hipMemset2DAsync_spt_ptr, - currentHipVersion, 0, nullptr)); - HIP_CHECK(hipGetProcAddress("hipMemset3D_spt", - &hipMemset3D_spt_ptr, - currentHipVersion, 0, nullptr)); - HIP_CHECK(hipGetProcAddress("hipMemset3DAsync_spt", - &hipMemset3DAsync_spt_ptr, - currentHipVersion, 0, nullptr)); + HIP_CHECK(hipGetProcAddress("hipMemset2D_spt", &hipMemset2D_spt_ptr, currentHipVersion, 0, + &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); + HIP_CHECK(hipGetProcAddress("hipMemset2DAsync_spt", &hipMemset2DAsync_spt_ptr, currentHipVersion, + 0, &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); + HIP_CHECK(hipGetProcAddress("hipMemset3D_spt", &hipMemset3D_spt_ptr, currentHipVersion, 0, + &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); + HIP_CHECK(hipGetProcAddress("hipMemset3DAsync_spt", &hipMemset3DAsync_spt_ptr, currentHipVersion, + 0, &symbolStatus)); + REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS); hipError_t (*dyn_hipMemset2D_spt_ptr)(void *, size_t, int, size_t, size_t) = reinterpret_cast @@ -747,12 +748,17 @@ TEST_CASE("Unit_hipGetProcAddress_spt_Memset2D3D") { HIP_CHECK(hipStreamCreate(&stream[s])); } + // set the whole matrix first to something different than 'value' + HIP_CHECK(dyn_hipMemset2DAsync_spt_ptr(devMem, pitch, 5, width, height, 0)); + HIP_CHECK(hipStreamSynchronize(0)); + for ( int s = 0; s < Ns; s++ ) { int startIndex = s * (N/Ns); - HIP_CHECK(dyn_hipMemset2DAsync_spt_ptr(devMem + startIndex, pitch/Ns, - value, width/Ns, height/Ns, + int row = startIndex / width; + HIP_CHECK(dyn_hipMemset2DAsync_spt_ptr(devMem + row * pitch, pitch, value, width, height / Ns, stream[s])); } + for ( int s = 0; s < Ns; s++ ) { HIP_CHECK(hipStreamSynchronize(stream[s])); }