From 2404ade2efa55feceadcdbb85f1548b2ad976105 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 27 Sep 2019 18:23:31 -0400 Subject: [PATCH] P4 to Git Change 2005779 by vsytchen@vsytchen-hip-win10 on 2019/09/27 18:12:14 SWDEV-192384 - [HIP CQE][HIPonPAL][19.40] hipBindTexRef1DFetch, hipTextureRef2D are failed on all ASICs for both Win/Lnx The runtime cannot trivially determine all the resources that will be used by a kernel, thus it can fail to make all of them resident. 1. Add new runtime flag PAL_ALWAYS_RESIDENT. Enabling this setting will cause resources to become resident at allocation time. 2. Set the default value of the above flag to true for HIP and false for OCL. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18054/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#79 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#153 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#319 edit --- rocclr/runtime/device/pal/palresource.cpp | 29 +++++++++++++++++++++++ rocclr/runtime/device/pal/palresource.hpp | 2 ++ rocclr/runtime/device/pal/palsettings.cpp | 1 + rocclr/runtime/device/pal/palsettings.hpp | 3 ++- rocclr/runtime/device/pal/palvirtual.cpp | 3 +++ rocclr/runtime/utils/flags.hpp | 4 +++- 6 files changed, 40 insertions(+), 2 deletions(-) diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp index 189ebf48fb..f7e42f5590 100644 --- a/rocclr/runtime/device/pal/palresource.cpp +++ b/rocclr/runtime/device/pal/palresource.cpp @@ -28,6 +28,17 @@ namespace pal { +// ================================================================================================ +Pal::Result GpuMemoryReference::MakeResident() const { + Pal::Result result = Pal::Result::Success; + if (device_.settings().alwaysResident_) { + Pal::GpuMemoryRef memRef = {}; + memRef.pGpuMemory = gpuMem_; + result = device_.iDev()->AddGpuMemoryReferences(1, &memRef, nullptr, Pal::GpuMemoryRefCantTrim); + } + return result; +} + // ================================================================================================ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, const Pal::GpuMemoryCreateInfo& createInfo) { @@ -46,6 +57,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, // If cache was freed, then try to allocate again result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_); } + if (result == Pal::Result::Success) { + result = memRef->MakeResident(); + } if (result != Pal::Result::Success) { memRef->release(); return nullptr; @@ -71,6 +85,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, Pal::VaRange vaRange = Pal::VaRange::Default; if (memRef != nullptr) { result = dev.iDev()->CreatePinnedGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_); + if (result == Pal::Result::Success) { + result = memRef->MakeResident(); + } if (result != Pal::Result::Success) { memRef->release(); return nullptr; @@ -93,6 +110,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev); if (memRef != nullptr) { result = dev.iDev()->CreateSvmGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_); + if (result == Pal::Result::Success) { + result = memRef->MakeResident(); + } if (result != Pal::Result::Success) { memRef->release(); return nullptr; @@ -117,6 +137,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, if (memRef != nullptr) { result = dev.iDev()->OpenExternalSharedGpuMemory(openInfo, &memRef[1], &createInfo, &memRef->gpuMem_); + if (result == Pal::Result::Success) { + result = memRef->MakeResident(); + } if (result != Pal::Result::Success) { memRef->release(); return nullptr; @@ -144,6 +167,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, if (memRef != nullptr) { result = dev.iDev()->OpenExternalSharedImage(openInfo, imgMem, &memRef[1], &createInfo, image, &memRef->gpuMem_); + if (result == Pal::Result::Success) { + result = memRef->MakeResident(); + } if (result != Pal::Result::Success) { memRef->release(); return nullptr; @@ -164,6 +190,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev, GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev); if (memRef != nullptr) { result = dev.iDev()->OpenPeerGpuMemory(openInfo, &memRef[1], &memRef->gpuMem_); + if (result == Pal::Result::Success) { + result = memRef->MakeResident(); + } if (result != Pal::Result::Success) { memRef->release(); return nullptr; diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp index 9458382fff..603fa9a34c 100644 --- a/rocclr/runtime/device/pal/palresource.hpp +++ b/rocclr/runtime/device/pal/palresource.hpp @@ -43,6 +43,8 @@ class GpuMemoryReference : public amd::ReferenceCountedObject { //! Get PAL memory object Pal::IGpuMemory* iMem() const { return gpuMem_; } + Pal::Result MakeResident() const; + Pal::IGpuMemory* gpuMem_; //!< PAL GPU memory object void* cpuAddress_; //!< CPU address of this memory const Device& device_; //!< GPU device diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp index 2f9df4f4cd..8ea11b3954 100644 --- a/rocclr/runtime/device/pal/palsettings.cpp +++ b/rocclr/runtime/device/pal/palsettings.cpp @@ -152,6 +152,7 @@ Settings::Settings() { imageBufferWar_ = false; disableSdma_ = PAL_DISABLE_SDMA; mallPolicy_ = 0; + alwaysResident_ = amd::IS_HIP ? true : ((!flagIsDefault(PAL_ALWAYS_RESIDENT)) ? PAL_ALWAYS_RESIDENT : false); } bool Settings::create(const Pal::DeviceProperties& palProp, diff --git a/rocclr/runtime/device/pal/palsettings.hpp b/rocclr/runtime/device/pal/palsettings.hpp index c5928a5f1f..b92f62df36 100644 --- a/rocclr/runtime/device/pal/palsettings.hpp +++ b/rocclr/runtime/device/pal/palsettings.hpp @@ -64,7 +64,8 @@ class Settings : public device::Settings { uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10 uint disableSdma_ : 1; //!< Disable SDMA support - uint reserved_ : 8; + uint alwaysResident_ : 1; //!< Make resources resident at allocation time + uint reserved_ : 7; }; uint value_; }; diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp index 1d4a9eb85a..925e238b51 100644 --- a/rocclr/runtime/device/pal/palvirtual.cpp +++ b/rocclr/runtime/device/pal/palvirtual.cpp @@ -254,6 +254,9 @@ Pal::Result VirtualGPU::Queue::UpdateAppPowerProfile() { } void VirtualGPU::Queue::addCmdMemRef(GpuMemoryReference* mem) { + if (gpu_.dev().settings().alwaysResident_) { + return; + } Pal::IGpuMemory* iMem = mem->iMem(); auto it = memReferences_.find(mem); if (it != memReferences_.end()) { diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp index 779baade7c..552438efee 100644 --- a/rocclr/runtime/utils/flags.hpp +++ b/rocclr/runtime/utils/flags.hpp @@ -202,7 +202,9 @@ release(uint, AMD_SERIALIZE_KERNEL, 0, \ "0x2 = Wait for completion after enqueue 0x3 = both") \ release(uint, AMD_SERIALIZE_COPY, 0, \ "Serialize copies, 0x1 = Wait for completion before enqueue" \ - "0x2 = Wait for completion after enqueue 0x3 = both") + "0x2 = Wait for completion after enqueue 0x3 = both") \ +release(bool, PAL_ALWAYS_RESIDENT, false, \ + "Force memory resources to become resident at allocation time") namespace amd {