From 2404ade2efa55feceadcdbb85f1548b2ad976105 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 27 Sep 2019 18:23:31 -0400
Subject: [PATCH] P4 to Git Change 2005779 by vsytchen@vsytchen-hip-win10 on
2019/09/27 18:12:14
SWDEV-192384 - [HIP CQE][HIPonPAL][19.40] hipBindTexRef1DFetch, hipTextureRef2D are failed on all ASICs for both Win/Lnx
The runtime cannot trivially determine all the resources that will be used by a kernel, thus it can fail to make all of them resident.
1. Add new runtime flag PAL_ALWAYS_RESIDENT. Enabling this setting will cause resources to become resident at allocation time.
2. Set the default value of the above flag to true for HIP and false for OCL.
ReviewBoardURL = http://ocltc.amd.com/reviews/r/18054/diff/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#153 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#319 edit
---
rocclr/runtime/device/pal/palresource.cpp | 29 +++++++++++++++++++++++
rocclr/runtime/device/pal/palresource.hpp | 2 ++
rocclr/runtime/device/pal/palsettings.cpp | 1 +
rocclr/runtime/device/pal/palsettings.hpp | 3 ++-
rocclr/runtime/device/pal/palvirtual.cpp | 3 +++
rocclr/runtime/utils/flags.hpp | 4 +++-
6 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp
index 189ebf48fb..f7e42f5590 100644
--- a/rocclr/runtime/device/pal/palresource.cpp
+++ b/rocclr/runtime/device/pal/palresource.cpp
@@ -28,6 +28,17 @@
namespace pal {
+// ================================================================================================
+Pal::Result GpuMemoryReference::MakeResident() const {
+ Pal::Result result = Pal::Result::Success;
+ if (device_.settings().alwaysResident_) {
+ Pal::GpuMemoryRef memRef = {};
+ memRef.pGpuMemory = gpuMem_;
+ result = device_.iDev()->AddGpuMemoryReferences(1, &memRef, nullptr, Pal::GpuMemoryRefCantTrim);
+ }
+ return result;
+}
+
// ================================================================================================
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
const Pal::GpuMemoryCreateInfo& createInfo) {
@@ -46,6 +57,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
// If cache was freed, then try to allocate again
result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
}
+ if (result == Pal::Result::Success) {
+ result = memRef->MakeResident();
+ }
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
@@ -71,6 +85,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
Pal::VaRange vaRange = Pal::VaRange::Default;
if (memRef != nullptr) {
result = dev.iDev()->CreatePinnedGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
+ if (result == Pal::Result::Success) {
+ result = memRef->MakeResident();
+ }
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
@@ -93,6 +110,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
if (memRef != nullptr) {
result = dev.iDev()->CreateSvmGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
+ if (result == Pal::Result::Success) {
+ result = memRef->MakeResident();
+ }
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
@@ -117,6 +137,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
if (memRef != nullptr) {
result = dev.iDev()->OpenExternalSharedGpuMemory(openInfo, &memRef[1], &createInfo,
&memRef->gpuMem_);
+ if (result == Pal::Result::Success) {
+ result = memRef->MakeResident();
+ }
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
@@ -144,6 +167,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
if (memRef != nullptr) {
result = dev.iDev()->OpenExternalSharedImage(openInfo, imgMem, &memRef[1], &createInfo, image,
&memRef->gpuMem_);
+ if (result == Pal::Result::Success) {
+ result = memRef->MakeResident();
+ }
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
@@ -164,6 +190,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
if (memRef != nullptr) {
result = dev.iDev()->OpenPeerGpuMemory(openInfo, &memRef[1], &memRef->gpuMem_);
+ if (result == Pal::Result::Success) {
+ result = memRef->MakeResident();
+ }
if (result != Pal::Result::Success) {
memRef->release();
return nullptr;
diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp
index 9458382fff..603fa9a34c 100644
--- a/rocclr/runtime/device/pal/palresource.hpp
+++ b/rocclr/runtime/device/pal/palresource.hpp
@@ -43,6 +43,8 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
//! Get PAL memory object
Pal::IGpuMemory* iMem() const { return gpuMem_; }
+ Pal::Result MakeResident() const;
+
Pal::IGpuMemory* gpuMem_; //!< PAL GPU memory object
void* cpuAddress_; //!< CPU address of this memory
const Device& device_; //!< GPU device
diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp
index 2f9df4f4cd..8ea11b3954 100644
--- a/rocclr/runtime/device/pal/palsettings.cpp
+++ b/rocclr/runtime/device/pal/palsettings.cpp
@@ -152,6 +152,7 @@ Settings::Settings() {
imageBufferWar_ = false;
disableSdma_ = PAL_DISABLE_SDMA;
mallPolicy_ = 0;
+ alwaysResident_ = amd::IS_HIP ? true : ((!flagIsDefault(PAL_ALWAYS_RESIDENT)) ? PAL_ALWAYS_RESIDENT : false);
}
bool Settings::create(const Pal::DeviceProperties& palProp,
diff --git a/rocclr/runtime/device/pal/palsettings.hpp b/rocclr/runtime/device/pal/palsettings.hpp
index c5928a5f1f..b92f62df36 100644
--- a/rocclr/runtime/device/pal/palsettings.hpp
+++ b/rocclr/runtime/device/pal/palsettings.hpp
@@ -64,7 +64,8 @@ class Settings : public device::Settings {
uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing
uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10
uint disableSdma_ : 1; //!< Disable SDMA support
- uint reserved_ : 8;
+ uint alwaysResident_ : 1; //!< Make resources resident at allocation time
+ uint reserved_ : 7;
};
uint value_;
};
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index 1d4a9eb85a..925e238b51 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -254,6 +254,9 @@ Pal::Result VirtualGPU::Queue::UpdateAppPowerProfile() {
}
void VirtualGPU::Queue::addCmdMemRef(GpuMemoryReference* mem) {
+ if (gpu_.dev().settings().alwaysResident_) {
+ return;
+ }
Pal::IGpuMemory* iMem = mem->iMem();
auto it = memReferences_.find(mem);
if (it != memReferences_.end()) {
diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp
index 779baade7c..552438efee 100644
--- a/rocclr/runtime/utils/flags.hpp
+++ b/rocclr/runtime/utils/flags.hpp
@@ -202,7 +202,9 @@ release(uint, AMD_SERIALIZE_KERNEL, 0, \
"0x2 = Wait for completion after enqueue 0x3 = both") \
release(uint, AMD_SERIALIZE_COPY, 0, \
"Serialize copies, 0x1 = Wait for completion before enqueue" \
- "0x2 = Wait for completion after enqueue 0x3 = both")
+ "0x2 = Wait for completion after enqueue 0x3 = both") \
+release(bool, PAL_ALWAYS_RESIDENT, false, \
+ "Force memory resources to become resident at allocation time")
namespace amd {