From b925842e4ebb3df5ab39478d960dbd33def59f5d Mon Sep 17 00:00:00 2001
From: foreman
Date: Mon, 31 Oct 2016 10:47:23 -0400
Subject: [PATCH] P4 to Git Change 1333973 by rili@rili-opencl-pal-stg on
2016/10/31 10:41:30
SWDEV-95903 - Implement SVM on PAL feature.
1. Implement SVM Coarse Grain
2. Implement SVM Fine Grain Buffer
Review:http://ocltc.amd.com/reviews/r/11706/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#36 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#259 edit
---
rocclr/runtime/device/pal/paldevice.cpp | 4 ++
rocclr/runtime/device/pal/palresource.cpp | 56 +++++++++++++++++++++++
rocclr/runtime/device/pal/palresource.hpp | 4 ++
rocclr/runtime/device/pal/palsettings.cpp | 3 ++
rocclr/runtime/device/pal/palvirtual.cpp | 3 +-
rocclr/runtime/utils/flags.hpp | 2 +-
6 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index d8afae94e6..824f713afb 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -1063,8 +1063,12 @@ Device::init()
info.flags.disableGpuTimeout = true;
#ifdef ATI_BITS_32
info.flags.force32BitVaSpace = true;
+ info.flags.enableSvmMode = false;
+#else
+ info.flags.enableSvmMode = true;
#endif
info.pSettingsPath = "OCL";
+ info.maxSvmSize = static_cast(OCL_SET_SVM_SIZE * Mi);
// PAL init
if (Pal::Result::Success !=
diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp
index 56691f4dee..cb9ce1ecb6 100644
--- a/rocclr/runtime/device/pal/palresource.cpp
+++ b/rocclr/runtime/device/pal/palresource.cpp
@@ -80,6 +80,32 @@ GpuMemoryReference::Create(
return memRef;
}
+GpuMemoryReference*
+GpuMemoryReference::Create(
+ const Device& dev,
+ const Pal::SvmGpuMemoryCreateInfo& createInfo)
+{
+ Pal::Result result;
+ size_t gpuMemSize = dev.iDev()->GetSvmGpuMemorySize(createInfo, &result);
+ if (result != Pal::Result::Success) {
+ return nullptr;
+ }
+
+ GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference();
+ if (memRef != nullptr) {
+ result = dev.iDev()->CreateSvmGpuMemory(createInfo,
+ &memRef[1], &memRef->gpuMem_);
+ if (result != Pal::Result::Success) {
+ memRef->release();
+ return nullptr;
+ }
+ }
+ // Update free memory size counters
+ const_cast(dev).updateFreeMemory(
+ Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false);
+ return memRef;
+}
+
GpuMemoryReference*
GpuMemoryReference::Create(
const Device& dev,
@@ -997,6 +1023,36 @@ Resource::create(MemoryType memType, CreateParams* params)
return true;
}
+ if ((nullptr != params) &&
+ (nullptr != params->owner_) &&
+ (nullptr != params->owner_->getSvmPtr())) {
+ // @todo 64K alignment is too big
+ uint allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment);
+ if (memoryType() == Remote) {
+ Pal::SvmGpuMemoryCreateInfo createInfo = {};
+ createInfo.size = allocSize;
+ createInfo.alignment = MaxGpuAlignment;
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ }
+ else {
+ Pal::GpuMemoryCreateInfo createInfo = {};
+ createInfo.size = allocSize;
+ createInfo.alignment = MaxGpuAlignment;
+ createInfo.vaRange = Pal::VaRange::Svm;
+ createInfo.priority = Pal::GpuMemPriority::Normal;
+ memTypeToHeap(&createInfo);
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ }
+ if (nullptr == memRef_) {
+ LogError("Failed PAL memory allocation!");
+ return false;
+ }
+ desc_.cardMemory_ = false;
+ desc_.SVMRes_ = true;
+ params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr));
+ return true;
+ }
+
Pal::GpuMemoryCreateInfo createInfo = {};
createInfo.size = desc().width_ * elementSize_;
// @todo 64K alignment is too big
diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp
index ca489a6d69..f4343ec7e9 100644
--- a/rocclr/runtime/device/pal/palresource.hpp
+++ b/rocclr/runtime/device/pal/palresource.hpp
@@ -28,6 +28,10 @@ public:
const Device& dev,
const Pal::PinnedGpuMemoryCreateInfo& createInfo);
+ static GpuMemoryReference* Create(
+ const Device& dev,
+ const Pal::SvmGpuMemoryCreateInfo& createInfo);
+
static GpuMemoryReference* Create(
const Device& dev,
const Pal::ExternalResourceOpenInfo& openInfo);
diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp
index fee8716d74..12ff991110 100644
--- a/rocclr/runtime/device/pal/palsettings.cpp
+++ b/rocclr/runtime/device/pal/palsettings.cpp
@@ -74,6 +74,9 @@ Settings::Settings()
// GPU device by default
apuSystem_ = false;
+ // Fine-Grained System is disabled by default
+ svmFineGrainSystem_ = false;
+
// Disable 64 bit pointers support by default
use64BitPtr_ = false;
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index f92ba673cc..407cccd3bf 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -3192,8 +3192,7 @@ VirtualGPU::processMemObjectsHSA(
svmMem = amd::SvmManager::FindSvmBuffer(
*reinterpret_cast(params + desc.offset_));
if (!svmMem) {
- Unimplemented();
- //flushCUCaches();
+ flushCUCaches();
// Clear memory dependency state
const static bool All = true;
memoryDependency().clear(!All);
diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp
index 587dc71c41..de052b77d7 100644
--- a/rocclr/runtime/utils/flags.hpp
+++ b/rocclr/runtime/utils/flags.hpp
@@ -171,7 +171,7 @@ release(int, AMD_GPU_FORCE_SINGLE_FP_DENORM, -1, \
"Force denorm for single precision: -1 - don't force, 0 - disable, 1 - enable") \
debug(bool, OCL_FORCE_CPU_SVM, false, \
"force svm support for CPU") \
-release(uint, OCL_SET_SVM_SIZE, 4096, \
+release(uint, OCL_SET_SVM_SIZE, 16384, \
"set SVM space size for discrete GPU") \
debug(uint, OCL_SYSMEM_REQUIREMENT, 2, \
"Use flag to change the minimum requirement of system memory not to downgrade") \