SWDEV-534207 - Solve more phoenix mem test failures (#545)

* SWDEV-534207 - fix 'Unit_hipFreeMipmappedArrayImplicitSyncArray - float' out of memory error with extent (1024, 1024, 1024) and 1 levels on 740M iGPUs. totalGlobalMem is not really the amount of device memory available for compute

* SWDEV-534207 - compare expected available memory within a range in Unit_hipMalloc3D_Basic; to take into account some bookkeeping overhead (instead of in exact 64MB chunks)

* SWDEV-534207 - fix missing setting of SvmGpuMemoryCreateInfo::interprocess in the 'fine' and 'fine uncached' memory and 'MemorySubAllocator' cases. Coarse allocation was added first; the flag was missed when the other three cases were added

* SWDEV-534207 - allow more room for the check of available memory after hipFree() in Unit_hipMalloc3D_Basic; it was till failing on 740M

---------

Co-authored-by: Gerardo Hernandez <gerardo.hernandez@amd.com>
Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com>
Esse commit está contido em:
systems-assistant[bot]
2025-10-17 15:49:57 +01:00
commit de GitHub
commit dae5b30a1a
3 arquivos alterados com 26 adições e 8 exclusões
@@ -2073,6 +2073,10 @@ bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
createInfo.heaps[2] = Pal::GpuHeapGartUswc;
createInfo.flags.peerWritable = device_->P2PAccessAllowed();
createInfo.mallPolicy = static_cast<Pal::GpuMemMallPolicy>(device_->settings().mallPolicy_);
if (amd::IS_HIP && PAL_HIP_IPC_FLAG) {
// set interprocess for IPC memory support
createInfo.flags.interprocess = 1;
}
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
if (mem_ref != nullptr) {
// Workaround: some chunk memory are not guaranteed to be resident during initial allocation.
@@ -2117,6 +2121,10 @@ bool FineMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
createInfo.flags.useReservedGpuVa = (reserved_va != nullptr);
createInfo.pReservedGpuVaOwner = reserved_va;
createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never;
if (amd::IS_HIP && PAL_HIP_IPC_FLAG) {
//set interprocess for IPC memory support
createInfo.flags.interprocess = 1;
}
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
if ((mem_ref != nullptr) && InitAllocator(mem_ref)) {
// Workaround: some chunk memory are not guaranteed to be resident during initial allocation.
@@ -2137,6 +2145,10 @@ bool FineUncachedMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved
createInfo.pReservedGpuVaOwner = reserved_va;
createInfo.flags.gl2Uncached = true;
createInfo.mallPolicy = Pal::GpuMemMallPolicy::Never;
if (amd::IS_HIP && PAL_HIP_IPC_FLAG) {
//set interprocess for IPC memory support
createInfo.flags.interprocess = 1;
}
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
if ((mem_ref != nullptr) && InitAllocator(mem_ref)) {
// Workaround: some chunk memory are not guaranteed to be resident during initial allocation.
@@ -56,15 +56,20 @@ TEMPLATE_TEST_CASE("Unit_hipFreeMipmappedArrayImplicitSyncArray", "", char, floa
HIP_CHECK(hipGetDeviceProperties(&props, 0))
for (auto numLevels : levels) {
INFO(" extent: (" << extent.width << ", " << extent.height << ", " << extent.depth << ") and "
<< numLevels << " levels. Total VRAM: " << props.totalGlobalMem);
if (extent.width * extent.height * extent.depth * numLevels * sizeof(TestType) >
props.totalGlobalMem) {
props.totalGlobalMem / 2) {
// some devices will not have enough memory allocate the 6GB required for the biggest extent
// We skip the test in that case (and no warning is needed)
// We would skip the test if the extent would require more than half of the global memory.
// Note that totalGlobalMem is not an exact measurement of the available memory for
// compute and we cannot use it as an exact value, so we use half
// (we use SUCCEED as no warning is needed)
SUCCEED(
"Device does not have enough global memory to allocate a mipmapped array using this "
" extent: ("
<< extent.width << ", " << extent.height << ", " << extent.depth << ") and " << numLevels
<< " levels");
"Device might not have enough global memory to allocate a mipmapped array using this "
"extent; "
"test will not be run. Total global memory: "
<< props.totalGlobalMem);
continue;
}
@@ -101,8 +101,9 @@ TEST_CASE("Unit_hipMalloc3D_Basic") {
// available amount returned is the same as the one we got the first time because of
// other processes running on the system; we consider a success if at least a size
// equivalent to two of the allocations has become available
// This test was too brittle before, when it was expecting avail to be equal to pavail
if (avail < pavail - height * width * depth) {
// (give or take 4MB, as there is bookkeeping overhead)
// This test was too brittle before, when it was expecting 'avail' to be equal to 'pavail'
if (avail < pavail - height * width * depth - 4 * 1024 * 1024) {
WARN("Memory leak of hipMalloc3D API in multithreaded scenario."
<< " Available memory before the hipMalloc3D() call (bytes): " << pavail
<< " Available memory after the call: " << iavail