SWDEV-534207 - solve phoenix mem test failures (#218)

* make sure symbolStatus after hipGetProcAddress() is the expected in the tests that are failing on Phoenix
* fix that calls to hipMemset2DAsync() and hipMemset2DAsync_spt() to set values on submatrices where not taking into account the pitch, causing test failures on Phoenix
* before testing whether hipMemset2DAsync() works, initialize the whole matrix to a known value. This makes sure the test fails even if the uninitialized memory returned after hipMallocPitch() happens to have the expected value
* Sbefore testing whether hipMemset2DAsync_spt() works, initialize the whole matrix to a known value. This makes sure the test fails even if the uninitialized memory returned after hipMallocPitch() happens to have the expected value
* fix up compiler error: dyn_hipMemset2DAsync_ptr should have been dyn_hipMemset2DAsync_spt_ptr
* when Unit_hipMalloc3D_Basic fails due to a potential memory leak, print the values that hipMemGetInfo() returns before and after the allocation/deallocation pair.
* Also print intermediate free memory
* Make sure Unit_hipMalloc3D_Basic allocates at least PalSettings::subAllocationChunkSize_ on Windows. Otherwise hipMemGetInfo() will not report an increase on available memory after hipFree() is called, as the minimum amount of memory that cause a call to the CoarseMemorySubAllocator::Create() is that chunk size
* Fix up previous commit; allocated too much memory
* skip some extent sizes in Unit_hipFreeMipmappedArrayImplicitSyncArray if allocating them would be require more memory than the actual totalGlobalMem of the device
* Do not expect an exact match when comparing the memory available memory before and after hipMalloc3D() + hipFree()
* Do not allocate more memory than the total GPU memory in Unit_hipFreeMipmappedArrayImplicitSyncArray
* fix expected available memory amount comparison in hipMalloc3D - Basic
* use SUCCEED() macro in Unit_hipFreeMipmappedArrayImplicitSyncArray to log more information when there is not enough memory for the mipmapped array to be allocated
* fix formatting

[ROCm/hip-tests commit: 4b2ed7653f]
This commit is contained in:
Hernandez, Gerardo
2025-07-18 09:15:19 +01:00
committed by GitHub
orang tua 8c88657993
melakukan b8ef4025e2
4 mengubah file dengan 112 tambahan dan 54 penghapusan
@@ -24,21 +24,24 @@ THE SOFTWARE.
#include <hip_test_common.hh>
#include "hipArrayCommon.hh"
#include "utils.hh"
#include <array>
/*
* hipFreeMipmappedArray API test scenarios
* 1. Check that hipFreeMipmappedArray implicitly synchronises the device.
* 2. Perform multiple allocations and then call hipFreeMipmappedArray on each pointer concurrently (from unique
* threads) for different memory types and different allocation sizes.
* 2. Perform multiple allocations and then call hipFreeMipmappedArray on each pointer concurrently
* (from unique threads) for different memory types and different allocation sizes.
* 3. Pass nullptr as argument and check that correct error code is returned.
* 4. Call hipFreeMipmappedArray twice on the same pointer and check that the implementation handles the second
* call correctly.
* 4. Call hipFreeMipmappedArray twice on the same pointer and check that the implementation handles
* the second call correctly.
*/
TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayImplicitSyncArray", "", char, float) {
hipMipmappedArray_t arrayPtr{};
hipExtent extent{};
hipChannelFormatDesc desc = hipCreateChannelDesc<TestType>();
hipDeviceProp_t props;
std::array<unsigned int, 3> levels = {1, 5, 7};
#if HT_AMD
const unsigned int flags = hipArrayDefault;
@@ -50,16 +53,30 @@ TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayImplicitSyncArray", "", char, floa
extent.height = GENERATE(64, 256, 1024);
extent.depth = GENERATE(0, 64, 256, 1024);
const unsigned int numLevels = GENERATE(1, 5, 7);
HIP_CHECK(hipGetDeviceProperties(&props, 0))
HIP_CHECK_IGNORED_RETURN(hipMallocMipmappedArray(&arrayPtr, &desc, extent, numLevels, flags),
hipErrorNotSupported);
for (auto numLevels : levels) {
if (extent.width * extent.height * extent.depth * numLevels * sizeof(TestType) >
props.totalGlobalMem) {
// some devices will not have enough memory allocate the 6GB required for the biggest extent
// We skip the test in that case (and no warning is needed)
SUCCEED(
"Device does not have enough global memory to allocate a mipmapped array using this "
" extent: ("
<< extent.width << ", " << extent.height << ", " << extent.depth << ") and " << numLevels
<< " levels");
continue;
}
LaunchDelayKernel(std::chrono::milliseconds{50}, nullptr);
// make sure device is busy
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
HIP_CHECK(hipFreeMipmappedArray(arrayPtr));
HIP_CHECK(hipStreamQuery(nullptr));
HIP_CHECK_IGNORED_RETURN(hipMallocMipmappedArray(&arrayPtr, &desc, extent, numLevels, flags),
hipErrorNotSupported);
LaunchDelayKernel(std::chrono::milliseconds{50}, nullptr);
// make sure device is busy
HIP_CHECK_ERROR(hipStreamQuery(nullptr), hipErrorNotReady);
HIP_CHECK(hipFreeMipmappedArray(arrayPtr));
HIP_CHECK(hipStreamQuery(nullptr));
}
}
TEST_CASE("Unit_hipFreeMipmappedArray_Negative_Nullptr") {
@@ -111,14 +128,13 @@ TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayMultiTArray", "", char, int) {
int i = 0;
for (; i < ptrs.size(); i++) {
if (hipErrorOutOfMemory == hipMallocMipmappedArray(&ptrs[i], &desc, extent,
numLevels, flags)) {
if (hipErrorOutOfMemory == hipMallocMipmappedArray(&ptrs[i], &desc, extent, numLevels, flags)) {
break;
}
}
for (int j = 0; j < i; j++) {
threads.emplace_back([ptrs,j] {
threads.emplace_back([ptrs, j] {
if (hipSuccess != hipFreeMipmappedArray(ptrs[j])) {
return;
}
@@ -2761,22 +2761,23 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemset2D3D") {
void* hipMemset2DAsync_ptr = nullptr;
void* hipMemset3D_ptr = nullptr;
void* hipMemset3DAsync_ptr = nullptr;
hipDriverProcAddressQueryResult symbolStatus = HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND;
int currentHipVersion = 0;
HIP_CHECK(hipRuntimeGetVersion(&currentHipVersion));
HIP_CHECK(hipGetProcAddress("hipMemset2D",
&hipMemset2D_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset2DAsync",
&hipMemset2DAsync_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset3D",
&hipMemset3D_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset3DAsync",
&hipMemset3DAsync_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(
hipGetProcAddress("hipMemset2D", &hipMemset2D_ptr, currentHipVersion, 0, &symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
HIP_CHECK(hipGetProcAddress("hipMemset2DAsync", &hipMemset2DAsync_ptr, currentHipVersion, 0,
&symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
HIP_CHECK(
hipGetProcAddress("hipMemset3D", &hipMemset3D_ptr, currentHipVersion, 0, &symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
HIP_CHECK(hipGetProcAddress("hipMemset3DAsync", &hipMemset3DAsync_ptr, currentHipVersion, 0,
&symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
hipError_t (*dyn_hipMemset2D_ptr)(void *, size_t, int, size_t, size_t) =
reinterpret_cast<hipError_t (*)(void *, size_t, int, size_t, size_t)>
@@ -2829,6 +2830,10 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemset2D3D") {
&pitch, width, height));
REQUIRE(devMem != nullptr);
// set the whole matrix first to something different than 'value'
HIP_CHECK(dyn_hipMemset2DAsync_ptr(devMem, pitch, 5, width, height, 0));
HIP_CHECK(hipStreamSynchronize(0));
hipStream_t stream[Ns];
for ( int s = 0; s < Ns; s++ ) {
HIP_CHECK(hipStreamCreate(&stream[s]));
@@ -2836,8 +2841,9 @@ TEST_CASE("Unit_hipGetProcAddress_MemoryApisMemset2D3D") {
for ( int s = 0; s < Ns; s++ ) {
int startIndex = s * (N/Ns);
HIP_CHECK(dyn_hipMemset2DAsync_ptr(devMem + startIndex, pitch/Ns,
value, width/Ns, height/Ns, stream[s]));
int row = startIndex / width;
HIP_CHECK(dyn_hipMemset2DAsync_ptr(devMem + row * pitch, pitch, value, width, height / Ns,
stream[s]));
}
for ( int s = 0; s < Ns; s++ ) {
HIP_CHECK(hipStreamSynchronize(stream[s]));
@@ -59,26 +59,56 @@ static void Malloc3DThreadFunc(int gpu) {
}
/*
* This verifies the hipMalloc3D API by
* assigning width,height and depth as 10
* This test allocates via hipMalloc3D and deallotes via hipFree().
* To verify that the returned memory is again available, we use hipMemGetInfo().
* On Windows, allocating/deallocating small amounts of device memory will not make a difference in
* the reported available memory because they go into subheaps of size
* PalSettings::subAllocationChunkSize_ and only whenever we trigger a whole subheap
* allocation/deallocation we will see a difference in the reported value.
* To make sure we make a difference in the reported available memory; we perform small several
* allocation whose combined size goes over PalSettings::subAllocationChunkSize_;
* hipMemGetInfo() should indicate the memory went down after we hipFree() all of them
*/
TEST_CASE("Unit_hipMalloc3D_Basic") {
CHECK_IMAGE_SUPPORT
size_t width = SMALL_SIZE * sizeof(char);
size_t height{SMALL_SIZE}, depth{SMALL_SIZE};
hipPitchedPtr devPitchedPtr;
static constexpr int ChunkSize = 64; // (in megabytes)
static constexpr int NumAllocations = 3;
size_t width{(ChunkSize * 1024) / NumAllocations}, height{1024}, depth{1};
hipPitchedPtr devPitchedPtr[NumAllocations];
hipExtent extent = make_hipExtent(width, height, depth);
size_t tot, avail, ptot, pavail;
size_t tot, avail, itot, iavail, ptot, pavail;
HIP_CHECK(hipMemGetInfo(&pavail, &ptot));
REQUIRE(hipMalloc3D(&devPitchedPtr, extent) == hipSuccess);
HIPCHECK(hipFree(devPitchedPtr.ptr));
for (int i = 0; i < NumAllocations; i++) {
REQUIRE(hipMalloc3D(&devPitchedPtr[i], extent) == hipSuccess);
}
HIP_CHECK(hipMemGetInfo(&iavail, &itot));
if (iavail >= pavail)
WARN(
"hipMemGetInfo() did not report increased memory usage after calling hipMalloc3D(). "
"Before: "
<< pavail << " after: " << iavail);
for (int i = 0; i < NumAllocations; i++) {
HIPCHECK(hipFree(devPitchedPtr[i].ptr));
}
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if (pavail != avail) {
WARN("Memory leak of hipMalloc3D API in multithreaded scenario");
// as the runtime might cache some of the allocations and also it is difficult the
// available amount returned is the same as the one we got the first time because of
// other processes running on the system; we consider a success if at least a size
// equivalent to two of the allocations has become available
// This test was too brittle before, when it was expecting avail to be equal to pavail
if (avail < pavail - height * width * depth) {
WARN("Memory leak of hipMalloc3D API in multithreaded scenario."
<< " Available memory before the hipMalloc3D() call (bytes): " << pavail
<< " Available memory after the call: " << iavail
<< " Available memory after hipFree(): " << avail);
REQUIRE(false);
}
}
@@ -672,22 +672,23 @@ TEST_CASE("Unit_hipGetProcAddress_spt_Memset2D3D") {
void* hipMemset2DAsync_spt_ptr = nullptr;
void* hipMemset3D_spt_ptr = nullptr;
void* hipMemset3DAsync_spt_ptr = nullptr;
hipDriverProcAddressQueryResult symbolStatus = HIP_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND;
int currentHipVersion = 0;
HIP_CHECK(hipRuntimeGetVersion(&currentHipVersion));
HIP_CHECK(hipGetProcAddress("hipMemset2D_spt",
&hipMemset2D_spt_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset2DAsync_spt",
&hipMemset2DAsync_spt_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset3D_spt",
&hipMemset3D_spt_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset3DAsync_spt",
&hipMemset3DAsync_spt_ptr,
currentHipVersion, 0, nullptr));
HIP_CHECK(hipGetProcAddress("hipMemset2D_spt", &hipMemset2D_spt_ptr, currentHipVersion, 0,
&symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
HIP_CHECK(hipGetProcAddress("hipMemset2DAsync_spt", &hipMemset2DAsync_spt_ptr, currentHipVersion,
0, &symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
HIP_CHECK(hipGetProcAddress("hipMemset3D_spt", &hipMemset3D_spt_ptr, currentHipVersion, 0,
&symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
HIP_CHECK(hipGetProcAddress("hipMemset3DAsync_spt", &hipMemset3DAsync_spt_ptr, currentHipVersion,
0, &symbolStatus));
REQUIRE(symbolStatus == HIP_GET_PROC_ADDRESS_SUCCESS);
hipError_t (*dyn_hipMemset2D_spt_ptr)(void *, size_t, int, size_t, size_t) =
reinterpret_cast<hipError_t (*)(void *, size_t, int, size_t, size_t)>
@@ -747,12 +748,17 @@ TEST_CASE("Unit_hipGetProcAddress_spt_Memset2D3D") {
HIP_CHECK(hipStreamCreate(&stream[s]));
}
// set the whole matrix first to something different than 'value'
HIP_CHECK(dyn_hipMemset2DAsync_spt_ptr(devMem, pitch, 5, width, height, 0));
HIP_CHECK(hipStreamSynchronize(0));
for ( int s = 0; s < Ns; s++ ) {
int startIndex = s * (N/Ns);
HIP_CHECK(dyn_hipMemset2DAsync_spt_ptr(devMem + startIndex, pitch/Ns,
value, width/Ns, height/Ns,
int row = startIndex / width;
HIP_CHECK(dyn_hipMemset2DAsync_spt_ptr(devMem + row * pitch, pitch, value, width, height / Ns,
stream[s]));
}
for ( int s = 0; s < Ns; s++ ) {
HIP_CHECK(hipStreamSynchronize(stream[s]));
}