diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp index 1d4a24d4b2..2fd57ee4f9 100644 --- a/rocclr/runtime/device/pal/paldevice.cpp +++ b/rocclr/runtime/device/pal/paldevice.cpp @@ -213,7 +213,8 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, // Report 512MB for all offline devices Pal::GpuMemoryHeapProperties heaps[Pal::GpuHeapCount]; - heaps[Pal::GpuHeapLocal].heapSize = 512 * Mi; + heaps[Pal::GpuHeapLocal].heapSize = + heaps[Pal::GpuHeapLocal].physicalHeapSize = 512 * Mi; Pal::WorkStationCaps wscaps = {}; @@ -301,7 +302,12 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.globalMemCacheType_ = CL_NONE; } - uint64_t localRAM = heaps[Pal::GpuHeapLocal].heapSize + heaps[Pal::GpuHeapInvisible].heapSize; + uint64_t localRAM; + if (GPU_ADD_HBCC_SIZE) { + localRAM = heaps[Pal::GpuHeapLocal].heapSize + heaps[Pal::GpuHeapInvisible].heapSize; + } else { + localRAM = heaps[Pal::GpuHeapLocal].physicalHeapSize + heaps[Pal::GpuHeapInvisible].physicalHeapSize; + } info_.globalMemSize_ = (static_cast(std::min(GPU_MAX_HEAP_SIZE, 100u)) * static_cast(localRAM) / 100u); @@ -316,8 +322,13 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, } // Find the largest heap form FB memory - info_.maxMemAllocSize_ = std::max(cl_ulong(heaps[Pal::GpuHeapLocal].heapSize), - cl_ulong(heaps[Pal::GpuHeapInvisible].heapSize)); + if (GPU_ADD_HBCC_SIZE) { + info_.maxMemAllocSize_ = std::max(cl_ulong(heaps[Pal::GpuHeapLocal].heapSize), + cl_ulong(heaps[Pal::GpuHeapInvisible].heapSize)); + } else { + info_.maxMemAllocSize_ = std::max(cl_ulong(heaps[Pal::GpuHeapLocal].physicalHeapSize), + cl_ulong(heaps[Pal::GpuHeapInvisible].physicalHeapSize)); + } #if defined(ATI_OS_WIN) if (settings().apuSystem_) { diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp index cb98f0365e..4563fe2ad2 100644 --- a/rocclr/runtime/utils/flags.hpp +++ b/rocclr/runtime/utils/flags.hpp @@ -187,6 +187,8 @@ release(bool, GPU_ANALYZE_HANG, false, \ "1 = Enables GPU hang analysis") \ release(uint, GPU_MAX_REMOTE_MEM_SIZE, 2, \ "Maximum size (in Ki) that allows device memory substitution with system") \ +release(bool, GPU_ADD_HBCC_SIZE, false, \ + "Add HBCC size to the reported device memory") \ release_on_stg(uint, GPU_WAVE_LIMIT_CU_PER_SH, 0, \ "Assume the number of CU per SH for wave limiter") \ release_on_stg(uint, GPU_WAVE_LIMIT_MAX_WAVE, 10, \