diff --git a/rocclr/device/pal/palresource.cpp b/rocclr/device/pal/palresource.cpp index 370c585ede..45b2df8bb4 100644 --- a/rocclr/device/pal/palresource.cpp +++ b/rocclr/device/pal/palresource.cpp @@ -1828,9 +1828,7 @@ void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resourc // ================================================================================================ void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const { if (desc_.cardMemory_ && !isPersistentDirectMap()) { - // @todo remove const cast Unimplemented(); - // const_cast(dev()).resUnmapLocal(resource); } else { Pal::Result result = resource->Unmap(); if (Pal::Result::Success != result) { @@ -1928,6 +1926,10 @@ bool Resource::isPersistentDirectMap(bool writeMap) const { directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_; } + if (memoryType() == View) { + directMap = viewOwner_->isPersistentDirectMap(); + } + return directMap; } diff --git a/rocclr/device/pal/palsettings.cpp b/rocclr/device/pal/palsettings.cpp index 877498059a..aecb4fde84 100644 --- a/rocclr/device/pal/palsettings.cpp +++ b/rocclr/device/pal/palsettings.cpp @@ -147,6 +147,7 @@ Settings::Settings() { alwaysResident_ = amd::IS_HIP ? true : false; prepinnedMinSize_ = 0; cpDmaCopySizeMax_ = GPU_CP_DMA_COPY_SIZE * Ki; + useDeviceKernelArg_ = flagIsDefault(HIP_FORCE_DEV_KERNARG) ? false : HIP_FORCE_DEV_KERNARG; } bool Settings::create(const Pal::DeviceProperties& palProp, diff --git a/rocclr/device/pal/palsettings.hpp b/rocclr/device/pal/palsettings.hpp index 32d3ad2a14..cb8e3ce890 100644 --- a/rocclr/device/pal/palsettings.hpp +++ b/rocclr/device/pal/palsettings.hpp @@ -80,7 +80,8 @@ class Settings : public device::Settings { uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10 uint disableSdma_ : 1; //!< Disable SDMA support uint alwaysResident_ : 1; //!< Make resources resident at allocation time - uint reserved_ : 9; + uint useDeviceKernelArg_ : 1; //!< Use persistent memory for kernel arguments + uint reserved_ : 8; }; uint value_; }; diff --git a/rocclr/device/pal/palvirtual.cpp b/rocclr/device/pal/palvirtual.cpp index 96a199d9eb..2e4d23c72d 100644 --- a/rocclr/device/pal/palvirtual.cpp +++ b/rocclr/device/pal/palvirtual.cpp @@ -949,8 +949,17 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs, return false; } - if (!managedBuffer_.create(Resource::RemoteUSWC)) { - return false; + // Create buffers for kernel arg management + if (!managedBuffer_.create( + dev().settings().useDeviceKernelArg_ ? Resource::Persistent : Resource::RemoteUSWC)) { + // Try just USWC if persistent memory failed + if (dev().settings().useDeviceKernelArg_) { + if (!managedBuffer_.create(Resource::RemoteUSWC)) { + return false; + } + } else { + return false; + } } // Diable double copy optimization,