From b925842e4ebb3df5ab39478d960dbd33def59f5d Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 31 Oct 2016 10:47:23 -0400 Subject: [PATCH] P4 to Git Change 1333973 by rili@rili-opencl-pal-stg on 2016/10/31 10:41:30 SWDEV-95903 - Implement SVM on PAL feature. 1. Implement SVM Coarse Grain 2. Implement SVM Fine Grain Buffer Review:http://ocltc.amd.com/reviews/r/11706/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#36 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#259 edit --- rocclr/runtime/device/pal/paldevice.cpp | 4 ++ rocclr/runtime/device/pal/palresource.cpp | 56 +++++++++++++++++++++++ rocclr/runtime/device/pal/palresource.hpp | 4 ++ rocclr/runtime/device/pal/palsettings.cpp | 3 ++ rocclr/runtime/device/pal/palvirtual.cpp | 3 +- rocclr/runtime/utils/flags.hpp | 2 +- 6 files changed, 69 insertions(+), 3 deletions(-) diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp index d8afae94e6..824f713afb 100644 --- a/rocclr/runtime/device/pal/paldevice.cpp +++ b/rocclr/runtime/device/pal/paldevice.cpp @@ -1063,8 +1063,12 @@ Device::init() info.flags.disableGpuTimeout = true; #ifdef ATI_BITS_32 info.flags.force32BitVaSpace = true; + info.flags.enableSvmMode = false; +#else + info.flags.enableSvmMode = true; #endif info.pSettingsPath = "OCL"; + info.maxSvmSize = static_cast(OCL_SET_SVM_SIZE * Mi); // PAL init if (Pal::Result::Success != diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp index 56691f4dee..cb9ce1ecb6 100644 --- a/rocclr/runtime/device/pal/palresource.cpp +++ b/rocclr/runtime/device/pal/palresource.cpp @@ -80,6 +80,32 @@ GpuMemoryReference::Create( return memRef; } +GpuMemoryReference* +GpuMemoryReference::Create( + const Device& dev, + const Pal::SvmGpuMemoryCreateInfo& createInfo) +{ + Pal::Result result; + size_t gpuMemSize = dev.iDev()->GetSvmGpuMemorySize(createInfo, &result); + if (result != Pal::Result::Success) { + return nullptr; + } + + GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(); + if (memRef != nullptr) { + result = dev.iDev()->CreateSvmGpuMemory(createInfo, + &memRef[1], &memRef->gpuMem_); + if (result != Pal::Result::Success) { + memRef->release(); + return nullptr; + } + } + // Update free memory size counters + const_cast(dev).updateFreeMemory( + Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false); + return memRef; +} + GpuMemoryReference* GpuMemoryReference::Create( const Device& dev, @@ -997,6 +1023,36 @@ Resource::create(MemoryType memType, CreateParams* params) return true; } + if ((nullptr != params) && + (nullptr != params->owner_) && + (nullptr != params->owner_->getSvmPtr())) { + // @todo 64K alignment is too big + uint allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment); + if (memoryType() == Remote) { + Pal::SvmGpuMemoryCreateInfo createInfo = {}; + createInfo.size = allocSize; + createInfo.alignment = MaxGpuAlignment; + memRef_ = GpuMemoryReference::Create(dev(), createInfo); + } + else { + Pal::GpuMemoryCreateInfo createInfo = {}; + createInfo.size = allocSize; + createInfo.alignment = MaxGpuAlignment; + createInfo.vaRange = Pal::VaRange::Svm; + createInfo.priority = Pal::GpuMemPriority::Normal; + memTypeToHeap(&createInfo); + memRef_ = GpuMemoryReference::Create(dev(), createInfo); + } + if (nullptr == memRef_) { + LogError("Failed PAL memory allocation!"); + return false; + } + desc_.cardMemory_ = false; + desc_.SVMRes_ = true; + params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr)); + return true; + } + Pal::GpuMemoryCreateInfo createInfo = {}; createInfo.size = desc().width_ * elementSize_; // @todo 64K alignment is too big diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp index ca489a6d69..f4343ec7e9 100644 --- a/rocclr/runtime/device/pal/palresource.hpp +++ b/rocclr/runtime/device/pal/palresource.hpp @@ -28,6 +28,10 @@ public: const Device& dev, const Pal::PinnedGpuMemoryCreateInfo& createInfo); + static GpuMemoryReference* Create( + const Device& dev, + const Pal::SvmGpuMemoryCreateInfo& createInfo); + static GpuMemoryReference* Create( const Device& dev, const Pal::ExternalResourceOpenInfo& openInfo); diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp index fee8716d74..12ff991110 100644 --- a/rocclr/runtime/device/pal/palsettings.cpp +++ b/rocclr/runtime/device/pal/palsettings.cpp @@ -74,6 +74,9 @@ Settings::Settings() // GPU device by default apuSystem_ = false; + // Fine-Grained System is disabled by default + svmFineGrainSystem_ = false; + // Disable 64 bit pointers support by default use64BitPtr_ = false; diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp index f92ba673cc..407cccd3bf 100644 --- a/rocclr/runtime/device/pal/palvirtual.cpp +++ b/rocclr/runtime/device/pal/palvirtual.cpp @@ -3192,8 +3192,7 @@ VirtualGPU::processMemObjectsHSA( svmMem = amd::SvmManager::FindSvmBuffer( *reinterpret_cast(params + desc.offset_)); if (!svmMem) { - Unimplemented(); - //flushCUCaches(); + flushCUCaches(); // Clear memory dependency state const static bool All = true; memoryDependency().clear(!All); diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp index 587dc71c41..de052b77d7 100644 --- a/rocclr/runtime/utils/flags.hpp +++ b/rocclr/runtime/utils/flags.hpp @@ -171,7 +171,7 @@ release(int, AMD_GPU_FORCE_SINGLE_FP_DENORM, -1, \ "Force denorm for single precision: -1 - don't force, 0 - disable, 1 - enable") \ debug(bool, OCL_FORCE_CPU_SVM, false, \ "force svm support for CPU") \ -release(uint, OCL_SET_SVM_SIZE, 4096, \ +release(uint, OCL_SET_SVM_SIZE, 16384, \ "set SVM space size for discrete GPU") \ debug(uint, OCL_SYSMEM_REQUIREMENT, 2, \ "Use flag to change the minimum requirement of system memory not to downgrade") \