diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp index 09b3b34d8c..189ebf48fb 100644 --- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp @@ -419,6 +419,19 @@ void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) { createInfo->heaps[0] = Pal::GpuHeapLocal; break; } + +#if !IS_MAINLINE + // Pick the appropriate mall policy based on the mem type + switch (memoryType()) { + case Local: + case Scratch: + createInfo->mallPolicy = static_cast(dev().settings().mallPolicy_); + break; + default: + createInfo->mallPolicy = Pal::GpuMemMallPolicy::Never; + break; + } +#endif } // ================================================================================================ @@ -1873,7 +1886,7 @@ bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.priority = Pal::GpuMemPriority::Normal; createInfo.heapCount = 1; createInfo.heaps[0] = Pal::GpuHeapInvisible; - createInfo.flags.peerWritable = device_->P2PAccessAllowed(); + createInfo.flags.peerWritable = device_->P2PAccessAllowed();.mallPolicy_);) GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { return InitAllocator(mem_ref); @@ -1892,7 +1905,7 @@ bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) { createInfo.pReservedGpuVaOwner = reserved_va; createInfo.heapCount = 2; createInfo.heaps[0] = Pal::GpuHeapInvisible; - createInfo.heaps[1] = Pal::GpuHeapLocal; + createInfo.heaps[1] = Pal::GpuHeapLocal;.mallPolicy_);) GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo); if (mem_ref != nullptr) { return InitAllocator(mem_ref); diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp index d9a3bd99ad..2f9df4f4cd 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp @@ -151,6 +151,7 @@ Settings::Settings() { enableHwP2P_ = false; imageBufferWar_ = false; disableSdma_ = PAL_DISABLE_SDMA; + mallPolicy_ = 0; } bool Settings::create(const Pal::DeviceProperties& palProp, @@ -551,6 +552,10 @@ void Settings::override() { enableCoopGroups_ = GPU_ENABLE_COOP_GROUPS; enableCoopMultiDeviceGroups_ = GPU_ENABLE_COOP_GROUPS; } + + if (!flagIsDefault(PAL_MALL_POLICY)) { + mallPolicy_ = PAL_MALL_POLICY; + } } } // namespace pal diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp index 65eb6f7927..c5928a5f1f 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp @@ -94,6 +94,7 @@ class Settings : public device::Settings { uint64_t maxAllocSize_; //!< Maximum single allocation size uint rgpSqttDispCount_; //!< The number of dispatches captured in SQTT uint maxCmdBuffers_; //!< Maximum number of command buffers allocated per queue + uint mallPolicy_; //!< 0 - default, 1 - always bypass, 2 - always put uint64_t subAllocationMinSize_; //!< Minimum size allowed for suballocations uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations diff --git a/projects/clr/rocclr/runtime/utils/flags.hpp b/projects/clr/rocclr/runtime/utils/flags.hpp index 9463bef5f7..779baade7c 100644 --- a/projects/clr/rocclr/runtime/utils/flags.hpp +++ b/projects/clr/rocclr/runtime/utils/flags.hpp @@ -167,6 +167,11 @@ release_on_stg(bool, PAL_DISABLE_SDMA, false, \ "1 = Disable SDMA for PAL") \ release(uint, PAL_RGP_DISP_COUNT, 50, \ "The number of dispatches for RGP capture with SQTT") \ +release(uint, PAL_MALL_POLICY, 0, \ + "Controls the behaviour of allocations with respect to the MALL" \ + "0 = MALL policy is decided by KMD" \ + "1 = Allocations are never put through the MALL" \ + "2 = Allocations will always be put through the MALL") \ release(bool, GPU_ENABLE_WAVE32_MODE, true, \ "Enables Wave32 compilation in HW if available") \ release(bool, GPU_ENABLE_LC, true, \