P4 to Git Change 2005779 by vsytchen@vsytchen-hip-win10 on 2019/09/27 18:12:14
SWDEV-192384 - [HIP CQE][HIPonPAL][19.40] hipBindTexRef1DFetch, hipTextureRef2D are failed on all ASICs for both Win/Lnx The runtime cannot trivially determine all the resources that will be used by a kernel, thus it can fail to make all of them resident. 1. Add new runtime flag PAL_ALWAYS_RESIDENT. Enabling this setting will cause resources to become resident at allocation time. 2. Set the default value of the above flag to true for HIP and false for OCL. ReviewBoardURL = http://ocltc.amd.com/reviews/r/18054/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#79 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#27 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#153 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#319 edit
This commit is contained in:
@@ -28,6 +28,17 @@
|
||||
|
||||
namespace pal {
|
||||
|
||||
// ================================================================================================
|
||||
Pal::Result GpuMemoryReference::MakeResident() const {
|
||||
Pal::Result result = Pal::Result::Success;
|
||||
if (device_.settings().alwaysResident_) {
|
||||
Pal::GpuMemoryRef memRef = {};
|
||||
memRef.pGpuMemory = gpuMem_;
|
||||
result = device_.iDev()->AddGpuMemoryReferences(1, &memRef, nullptr, Pal::GpuMemoryRefCantTrim);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
const Pal::GpuMemoryCreateInfo& createInfo) {
|
||||
@@ -46,6 +57,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
// If cache was freed, then try to allocate again
|
||||
result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
||||
}
|
||||
if (result == Pal::Result::Success) {
|
||||
result = memRef->MakeResident();
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
memRef->release();
|
||||
return nullptr;
|
||||
@@ -71,6 +85,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
Pal::VaRange vaRange = Pal::VaRange::Default;
|
||||
if (memRef != nullptr) {
|
||||
result = dev.iDev()->CreatePinnedGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
||||
if (result == Pal::Result::Success) {
|
||||
result = memRef->MakeResident();
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
memRef->release();
|
||||
return nullptr;
|
||||
@@ -93,6 +110,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
||||
if (memRef != nullptr) {
|
||||
result = dev.iDev()->CreateSvmGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
||||
if (result == Pal::Result::Success) {
|
||||
result = memRef->MakeResident();
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
memRef->release();
|
||||
return nullptr;
|
||||
@@ -117,6 +137,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
if (memRef != nullptr) {
|
||||
result = dev.iDev()->OpenExternalSharedGpuMemory(openInfo, &memRef[1], &createInfo,
|
||||
&memRef->gpuMem_);
|
||||
if (result == Pal::Result::Success) {
|
||||
result = memRef->MakeResident();
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
memRef->release();
|
||||
return nullptr;
|
||||
@@ -144,6 +167,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
if (memRef != nullptr) {
|
||||
result = dev.iDev()->OpenExternalSharedImage(openInfo, imgMem, &memRef[1], &createInfo, image,
|
||||
&memRef->gpuMem_);
|
||||
if (result == Pal::Result::Success) {
|
||||
result = memRef->MakeResident();
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
memRef->release();
|
||||
return nullptr;
|
||||
@@ -164,6 +190,9 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
||||
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
||||
if (memRef != nullptr) {
|
||||
result = dev.iDev()->OpenPeerGpuMemory(openInfo, &memRef[1], &memRef->gpuMem_);
|
||||
if (result == Pal::Result::Success) {
|
||||
result = memRef->MakeResident();
|
||||
}
|
||||
if (result != Pal::Result::Success) {
|
||||
memRef->release();
|
||||
return nullptr;
|
||||
|
||||
@@ -43,6 +43,8 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
|
||||
//! Get PAL memory object
|
||||
Pal::IGpuMemory* iMem() const { return gpuMem_; }
|
||||
|
||||
Pal::Result MakeResident() const;
|
||||
|
||||
Pal::IGpuMemory* gpuMem_; //!< PAL GPU memory object
|
||||
void* cpuAddress_; //!< CPU address of this memory
|
||||
const Device& device_; //!< GPU device
|
||||
|
||||
@@ -152,6 +152,7 @@ Settings::Settings() {
|
||||
imageBufferWar_ = false;
|
||||
disableSdma_ = PAL_DISABLE_SDMA;
|
||||
mallPolicy_ = 0;
|
||||
alwaysResident_ = amd::IS_HIP ? true : ((!flagIsDefault(PAL_ALWAYS_RESIDENT)) ? PAL_ALWAYS_RESIDENT : false);
|
||||
}
|
||||
|
||||
bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
|
||||
@@ -64,7 +64,8 @@ class Settings : public device::Settings {
|
||||
uint enableHwP2P_ : 1; //!< Forces HW P2P path for testing
|
||||
uint imageBufferWar_ : 1; //!< Image buffer workaround for Gfx10
|
||||
uint disableSdma_ : 1; //!< Disable SDMA support
|
||||
uint reserved_ : 8;
|
||||
uint alwaysResident_ : 1; //!< Make resources resident at allocation time
|
||||
uint reserved_ : 7;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
@@ -254,6 +254,9 @@ Pal::Result VirtualGPU::Queue::UpdateAppPowerProfile() {
|
||||
}
|
||||
|
||||
void VirtualGPU::Queue::addCmdMemRef(GpuMemoryReference* mem) {
|
||||
if (gpu_.dev().settings().alwaysResident_) {
|
||||
return;
|
||||
}
|
||||
Pal::IGpuMemory* iMem = mem->iMem();
|
||||
auto it = memReferences_.find(mem);
|
||||
if (it != memReferences_.end()) {
|
||||
|
||||
@@ -202,7 +202,9 @@ release(uint, AMD_SERIALIZE_KERNEL, 0, \
|
||||
"0x2 = Wait for completion after enqueue 0x3 = both") \
|
||||
release(uint, AMD_SERIALIZE_COPY, 0, \
|
||||
"Serialize copies, 0x1 = Wait for completion before enqueue" \
|
||||
"0x2 = Wait for completion after enqueue 0x3 = both")
|
||||
"0x2 = Wait for completion after enqueue 0x3 = both") \
|
||||
release(bool, PAL_ALWAYS_RESIDENT, false, \
|
||||
"Force memory resources to become resident at allocation time")
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user