From fa8e88b108ce9511bde17cb62444f4180216234c Mon Sep 17 00:00:00 2001 From: German Date: Fri, 8 Dec 2023 14:43:55 -0500 Subject: [PATCH] SWDEV-432575 - Disable direct map for persistent memory Persistent memory should use direct access for write map and indirect for read map. Change-Id: I9fc84836d60088b24012ed25f7ef8c16e33796a3 [ROCm/clr commit: c8b3253a246569431887e70b6c22e8893bd5150a] --- projects/clr/rocclr/device/pal/palmemory.cpp | 5 +++-- projects/clr/rocclr/device/pal/palmemory.hpp | 6 +++--- projects/clr/rocclr/device/pal/palresource.cpp | 13 +++++-------- projects/clr/rocclr/device/pal/palresource.hpp | 8 ++++++-- projects/clr/rocclr/device/pal/palsettings.cpp | 6 ------ projects/clr/rocclr/device/pal/palsettings.hpp | 3 +-- 6 files changed, 18 insertions(+), 23 deletions(-) diff --git a/projects/clr/rocclr/device/pal/palmemory.cpp b/projects/clr/rocclr/device/pal/palmemory.cpp index cf45c0cc34..c90725efec 100644 --- a/projects/clr/rocclr/device/pal/palmemory.cpp +++ b/projects/clr/rocclr/device/pal/palmemory.cpp @@ -109,7 +109,8 @@ bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params // Assume that allocations will be placed into visible heap when ReBar is enabled // Only enable this assumption for small size local buffers constexpr size_t kLargeAlloc = (1ull << 27); - if (!amd::IS_HIP && (memType == Local) && desc().buffer_ && (size() < kLargeAlloc) && dev().info().largeBar_) { + if (!amd::IS_HIP && (memType == Local) && desc().buffer_ && (size() < kLargeAlloc) && + dev().info().largeBar_) { memType = Persistent; } // Create a resource in PAL @@ -1071,7 +1072,7 @@ void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& regi //! runtime can't use it directly, //! because CAL volume map doesn't work properly. //! @todo arrays can be added for persistent lock with some CAL changes - else if((isPersistentDirectMap(mapFlags & CL_MAP_WRITE) && (getMapCount() == 0)) || + else if ((isPersistentDirectMap(mapFlags & CL_MAP_WRITE) && (getMapCount() == 0)) || isPersistentMapped()) { if (nullptr == map(nullptr)) { useRemoteResource = true; diff --git a/projects/clr/rocclr/device/pal/palmemory.hpp b/projects/clr/rocclr/device/pal/palmemory.hpp index 4a557c7ee7..3824769a5d 100644 --- a/projects/clr/rocclr/device/pal/palmemory.hpp +++ b/projects/clr/rocclr/device/pal/palmemory.hpp @@ -158,9 +158,9 @@ class Memory : public device::Memory, public Resource { //! forced USWC memory on APU and will cause a switch to //! indirect map for allocations with a possibility of host read bool isDirectMap() { - return (isCacheable() || !isHostMemDirectAccess() || - (owner()->getMemFlags() & - (CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY))); + return (isCacheable() || (!isHostMemDirectAccess() && !IsPersistent()) || + (owner()->getMemFlags() & + (CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY))); } //! Quick view update for managed buffers. It should avoid expensive object allocations diff --git a/projects/clr/rocclr/device/pal/palresource.cpp b/projects/clr/rocclr/device/pal/palresource.cpp index 77baa8d255..37b5338921 100644 --- a/projects/clr/rocclr/device/pal/palresource.cpp +++ b/projects/clr/rocclr/device/pal/palresource.cpp @@ -636,8 +636,7 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) { Pal::ImageTiling tiling = forceLinear ? Pal::ImageTiling::Linear : Pal::ImageTiling::Optimal; uint32_t rowPitch = 0; - if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) || - (memoryType() == ImageBuffer)) { + if (memoryType() == ImageBuffer) { tiling = Pal::ImageTiling::Linear; } else if (memoryType() == ImageExternalBuffer) { // We cannot get tiling info from vulkan/d3d driver now. So assume it to be optimal. @@ -1935,13 +1934,11 @@ bool Resource::isPersistentDirectMap(bool writeMap) const { // If direct map is possible, then validate it with the current tiling if (directMap && desc().tiled_) { - //!@note IOL for Linux doesn't support tiling aperture - // and runtime doesn't force linear images in persistent - directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_; - } - + // Latest HW does have tiling apertures + directMap = false; + } if (memoryType() == View) { - directMap = viewOwner_->isPersistentDirectMap(); + directMap = viewOwner_->isPersistentDirectMap(writeMap); } return directMap; diff --git a/projects/clr/rocclr/device/pal/palresource.hpp b/projects/clr/rocclr/device/pal/palresource.hpp index 4236e6854d..0baa761863 100644 --- a/projects/clr/rocclr/device/pal/palresource.hpp +++ b/projects/clr/rocclr/device/pal/palresource.hpp @@ -371,8 +371,12 @@ class Resource : public amd::HeapObject { //! Retunrs true if memory type matches specified bool isMemoryType(MemoryType memType) const; - //! Returns TRUE if resource was allocated as cacheable - bool isCacheable() const { return (isMemoryType(Remote) || isMemoryType(Pinned)) ? true : false; } + //! Returns TRUE if resource was allocated as CPU accessible and cacheable + bool isCacheable() const { return (isMemoryType(Remote) || + isMemoryType(Pinned)) ? true : false; } + + //! Returns TRUE if resource was allocated as CPU visible device memory + bool IsPersistent() const { return isMemoryType(Persistent) ? true : false; } bool glAcquire(); bool glRelease(); diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index 704ef0292e..a4902e95f1 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -101,10 +101,6 @@ Settings::Settings() { // Number of compute rings. numComputeRings_ = 0; - // Controls tiled images in persistent - //!@note IOL for Linux doesn't setup tiling aperture in CMM/QS - linearPersistentImage_ = false; - // Device enqueuing settings numDeviceEvents_ = 1024; numWaitEvents_ = 8; @@ -250,8 +246,6 @@ bool Settings::create(const Pal::DeviceProperties& palProp, case Pal::AsicRevision::Polaris11: case Pal::AsicRevision::Polaris12: case Pal::AsicRevision::Polaris22: - // Disable tiling aperture on VI+ - linearPersistentImage_ = true; // Keep this false even though we have support // singleFpDenorm_ = true; viPlus_ = true; diff --git a/projects/clr/rocclr/device/pal/palsettings.hpp b/projects/clr/rocclr/device/pal/palsettings.hpp index 33448c9211..942c5c91bd 100644 --- a/projects/clr/rocclr/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/device/pal/palsettings.hpp @@ -69,7 +69,6 @@ class Settings : public device::Settings { uint aiPlus_ : 1; //!< AI and post AI features uint gfx10Plus_ : 1; //!< gfx10 and post gfx10 features uint threadTraceEnable_ : 1; //!< Thread trace enable - uint linearPersistentImage_ : 1; //!< Allocates linear images in persistent uint svmAtomics_ : 1; //!< SVM device atomics uint svmFineGrainSystem_ : 1; //!< SVM fine grain system support uint useDeviceQueue_ : 1; //!< Submit to separate device queue @@ -81,7 +80,7 @@ class Settings : public device::Settings { uint disableSdma_ : 1; //!< Disable SDMA support uint alwaysResident_ : 1; //!< Make resources resident at allocation time uint useDeviceKernelArg_ : 1; //!< Use persistent memory for kernel arguments - uint reserved_ : 8; + uint reserved_ : 9; }; uint value_; };