From 3f79785a96950a80ae8e5432e5eebd89e8d22696 Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 8 May 2018 15:47:58 -0400
Subject: [PATCH] P4 to Git Change 1552021 by gandryey@gera-w8 on 2018/05/08
15:00:13
SWDEV-151981 - Removal of CPU support on Windows
- Part 5. Remove runtime logic that considered CPU device support.
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#130 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#92 edit
---
rocclr/runtime/platform/command.cpp | 255 +++++++++++++---------------
rocclr/runtime/platform/context.cpp | 59 +++----
rocclr/runtime/platform/memory.cpp | 15 +-
rocclr/runtime/platform/program.cpp | 2 +-
4 files changed, 140 insertions(+), 191 deletions(-)
diff --git a/rocclr/runtime/platform/command.cpp b/rocclr/runtime/platform/command.cpp
index 6e53636526..eb6dd0580b 100644
--- a/rocclr/runtime/platform/command.cpp
+++ b/rocclr/runtime/platform/command.cpp
@@ -279,28 +279,24 @@ cl_int NativeFnCommand::invoke() {
}
bool OneMemoryArgCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- device::Memory* mem = memory_->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
- return false;
- }
+ device::Memory* mem = memory_->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
+ return false;
}
return true;
}
bool TwoMemoryArgsCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- device::Memory* mem = memory1_->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
- return false;
- }
- mem = memory2_->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
- return false;
- }
+ device::Memory* mem = memory1_->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
+ return false;
+ }
+ mem = memory2_->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
+ return false;
}
return true;
}
@@ -356,74 +352,68 @@ bool MapMemoryCommand::isEntireMemory() const {
}
void UnmapMemoryCommand::releaseResources() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- //! @todo This is a workaround to a deadlock on indirect map release.
- //! Remove this code when CAL will have a refcounter on memory.
- //! decIndMapCount() has to go back to submitUnmapMemory()
- device::Memory* mem = memory_->getDeviceMemory(queue()->device());
- if (NULL != mem) {
- mem->releaseIndirectMap();
- }
+ //! @todo This is a workaround to a deadlock on indirect map release.
+ //! Remove this code when CAL will have a refcounter on memory.
+ //! decIndMapCount() has to go back to submitUnmapMemory()
+ device::Memory* mem = memory_->getDeviceMemory(queue()->device());
+ if (NULL != mem) {
+ mem->releaseIndirectMap();
}
+
OneMemoryArgCommand::releaseResources();
}
bool MigrateMemObjectsCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- for (const auto& it : memObjects_) {
- device::Memory* mem = it->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
- return false;
- }
+ for (const auto& it : memObjects_) {
+ device::Memory* mem = it->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
+ return false;
}
}
-
return true;
}
cl_int NDRangeKernelCommand::validateMemory() {
const amd::Device& device = queue()->device();
- if (device.info().type_ & CL_DEVICE_TYPE_GPU) {
- // Validate the kernel before submission
- if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
- return CL_OUT_OF_RESOURCES;
- }
+ // Validate the kernel before submission
+ if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
+ return CL_OUT_OF_RESOURCES;
+ }
- const amd::KernelSignature& signature = kernel().signature();
- for (uint i = 0; i != signature.numParameters(); ++i) {
- const amd::KernelParameterDescriptor& desc = signature.at(i);
- // Check if it's a memory object
- if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
- amd::Memory* amdMemory;
- if (kernel().parameters().boundToSvmPointer(device, parameters_, i)) {
- // find the real mem object from svm ptr from the list
- amdMemory = amd::SvmManager::FindSvmBuffer(
- *reinterpret_cast(parameters() + desc.offset_));
- } else {
- amdMemory = *reinterpret_cast(parameters() + desc.offset_);
+ const amd::KernelSignature& signature = kernel().signature();
+ for (uint i = 0; i != signature.numParameters(); ++i) {
+ const amd::KernelParameterDescriptor& desc = signature.at(i);
+ // Check if it's a memory object
+ if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
+ amd::Memory* amdMemory;
+ if (kernel().parameters().boundToSvmPointer(device, parameters_, i)) {
+ // find the real mem object from svm ptr from the list
+ amdMemory = amd::SvmManager::FindSvmBuffer(
+ *reinterpret_cast(parameters() + desc.offset_));
+ } else {
+ amdMemory = *reinterpret_cast(parameters() + desc.offset_);
+ }
+ if (amdMemory != NULL) {
+ if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
+ // Make sure argument size isn't bigger than the device limit
+ if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
+ LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
+ return CL_OUT_OF_RESOURCES;
+ }
}
- if (amdMemory != NULL) {
- if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
- // Make sure argument size isn't bigger than the device limit
- if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
- LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
- return CL_OUT_OF_RESOURCES;
- }
- }
- device::Memory* mem = amdMemory->getDeviceMemory(device);
- if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
- if (device.reallocMemory(*amdMemory)) {
- mem = amdMemory->getDeviceMemory(device);
- } else {
- mem = NULL;
- }
- }
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
- return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ device::Memory* mem = amdMemory->getDeviceMemory(device);
+ if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
+ if (device.reallocMemory(*amdMemory)) {
+ mem = amdMemory->getDeviceMemory(device);
+ } else {
+ mem = NULL;
}
}
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
+ return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ }
}
}
}
@@ -432,15 +422,13 @@ cl_int NDRangeKernelCommand::validateMemory() {
bool ExtObjectsCommand::validateMemory() {
bool retVal = true;
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- for (const auto& it : memObjects_) {
- device::Memory* mem = it->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
- return false;
- }
- retVal = processGLResource(mem);
+ for (const auto& it : memObjects_) {
+ device::Memory* mem = it->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
+ return false;
}
+ retVal = processGLResource(mem);
}
return retVal;
}
@@ -454,33 +442,28 @@ bool ReleaseExtObjectsCommand::processGLResource(device::Memory* mem) {
}
bool MakeBuffersResidentCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- for (const auto& it : memObjects_) {
- device::Memory* mem = it->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
- return false;
- }
+ for (const auto& it : memObjects_) {
+ device::Memory* mem = it->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", it->getSize());
+ return false;
}
}
-
return true;
}
+
bool ThreadTraceMemObjectsCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- for (auto it = memObjects_.cbegin(); it != memObjects_.cend(); it++) {
- device::Memory* mem = (*it)->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- for (auto tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) {
- device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device());
- delete tmpMem;
- }
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize());
- return false;
+ for (auto it = memObjects_.cbegin(); it != memObjects_.cend(); it++) {
+ device::Memory* mem = (*it)->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ for (auto tmpIt = memObjects_.cbegin(); tmpIt != it; tmpIt++) {
+ device::Memory* tmpMem = (*tmpIt)->getDeviceMemory(queue()->device());
+ delete tmpMem;
}
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", (*it)->getSize());
+ return false;
}
}
-
return true;
}
@@ -527,56 +510,52 @@ void TransferBufferFileCommand::submit(device::VirtualDevice& device) {
}
bool TransferBufferFileCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- // Check if the destination buffer has direct host access
- if (!(memory_->getMemFlags() &
- (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD))) {
- // Allocate staging buffers
- for (uint i = 0; i < NumStagingBuffers; ++i) {
- staging_[i] = new (memory_->getContext())
- Buffer(memory_->getContext(), StagingBufferMemType, StagingBufferSize);
- if (NULL == staging_[i] || !staging_[i]->create(nullptr)) {
- return false;
- }
- device::Memory* mem = staging_[i]->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate staging buffer - 0x%08X bytes!", staging_[i]->getSize());
- return false;
- }
+ // Check if the destination buffer has direct host access
+ if (!(memory_->getMemFlags() &
+ (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD))) {
+ // Allocate staging buffers
+ for (uint i = 0; i < NumStagingBuffers; ++i) {
+ staging_[i] = new (memory_->getContext())
+ Buffer(memory_->getContext(), StagingBufferMemType, StagingBufferSize);
+ if (NULL == staging_[i] || !staging_[i]->create(nullptr)) {
+ return false;
+ }
+ device::Memory* mem = staging_[i]->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate staging buffer - 0x%08X bytes!", staging_[i]->getSize());
+ return false;
}
}
+ }
- device::Memory* mem = memory_->getDeviceMemory(queue()->device());
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
- return false;
- }
+ device::Memory* mem = memory_->getDeviceMemory(queue()->device());
+ if (NULL == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory_->getSize());
+ return false;
}
return true;
}
bool CopyMemoryP2PCommand::validateMemory() {
- if (queue()->device().info().type_ & CL_DEVICE_TYPE_GPU) {
- const std::vector& devices = memory1_->getContext().devices();
- if (devices.size() != 1) {
- LogError("Can't allocate memory object for P2P extension");
- return false;
- }
- device::Memory* mem = memory1_->getDeviceMemory(*devices[0]);
- if (nullptr == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
- return false;
- }
- const std::vector& devices2 = memory2_->getContext().devices();
- if (devices2.size() != 1) {
- LogError("Can't allocate memory object for P2P extension");
- return false;
- }
- mem = memory2_->getDeviceMemory(*devices2[0]);
- if (nullptr == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
- return false;
- }
+ const std::vector& devices = memory1_->getContext().devices();
+ if (devices.size() != 1) {
+ LogError("Can't allocate memory object for P2P extension");
+ return false;
+ }
+ device::Memory* mem = memory1_->getDeviceMemory(*devices[0]);
+ if (nullptr == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory1_->getSize());
+ return false;
+ }
+ const std::vector& devices2 = memory2_->getContext().devices();
+ if (devices2.size() != 1) {
+ LogError("Can't allocate memory object for P2P extension");
+ return false;
+ }
+ mem = memory2_->getDeviceMemory(*devices2[0]);
+ if (nullptr == mem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memory2_->getSize());
+ return false;
}
return true;
}
diff --git a/rocclr/runtime/platform/context.cpp b/rocclr/runtime/platform/context.cpp
index e05e866c18..17b106e95b 100644
--- a/rocclr/runtime/platform/context.cpp
+++ b/rocclr/runtime/platform/context.cpp
@@ -40,16 +40,10 @@ Context::Context(const std::vector& devices, const Info& info)
}
}
if (svmAllocDevice_.size() > 1) {
- // make sure the CPU is the last device to do allocation.
- if ((svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU)) {
- std::swap(svmAllocDevice_.front(), svmAllocDevice_.back());
- }
-
uint isFirstDeviceFGSEnabled = svmAllocDevice_.front()->isFineGrainedSystem(true);
for (auto& dev : svmAllocDevice_) {
// allocation on fine - grained system incapable device first
- if (isFirstDeviceFGSEnabled && (dev->type() == CL_DEVICE_TYPE_GPU) &&
- (!(dev->isFineGrainedSystem(true)))) {
+ if (isFirstDeviceFGSEnabled && !dev->isFineGrainedSystem(true)) {
std::swap(svmAllocDevice_.front(), dev);
break;
}
@@ -288,42 +282,29 @@ void* Context::svmAlloc(size_t size, size_t alignment, cl_svm_mem_flags flags) {
return NULL;
}
- if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
- return AlignedMemory::allocate(size, alignment);
- } else {
- void* svmPtrAlloced = NULL;
- void* tempPtr = NULL;
-
- amd::ScopedLock lock(&ctxLock_);
- for (const auto& dev : svmAllocDevice_) {
- if (dev->type() == CL_DEVICE_TYPE_GPU) {
- // check if the device support svm platform atomics,
- // skipped allocation for platform atomics if not supported by this device
- if ((flags & CL_MEM_SVM_ATOMICS) &&
- !(dev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) {
- continue;
- }
- svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced);
- if (svmPtrAlloced == NULL) {
- return NULL;
- }
- }
- }
- return svmPtrAlloced;
- }
-}
-
-void Context::svmFree(void* ptr) const {
- if (svmAllocDevice_.front()->type() == CL_DEVICE_TYPE_CPU) {
- AlignedMemory::deallocate(ptr);
- return;
- }
+ void* svmPtrAlloced = NULL;
+ void* tempPtr = NULL;
amd::ScopedLock lock(&ctxLock_);
for (const auto& dev : svmAllocDevice_) {
- if (dev->type() == CL_DEVICE_TYPE_GPU) {
- dev->svmFree(ptr);
+ // check if the device support svm platform atomics,
+ // skipped allocation for platform atomics if not supported by this device
+ if ((flags & CL_MEM_SVM_ATOMICS) &&
+ !(dev->info().svmCapabilities_ & CL_DEVICE_SVM_ATOMICS)) {
+ continue;
}
+ svmPtrAlloced = dev->svmAlloc(*this, size, alignment, flags, svmPtrAlloced);
+ if (svmPtrAlloced == NULL) {
+ return NULL;
+ }
+ }
+ return svmPtrAlloced;
+}
+
+void Context::svmFree(void* ptr) const {
+ amd::ScopedLock lock(&ctxLock_);
+ for (const auto& dev : svmAllocDevice_) {
+ dev->svmFree(ptr);
}
return;
}
diff --git a/rocclr/runtime/platform/memory.cpp b/rocclr/runtime/platform/memory.cpp
index 90a2432212..c3a5b119b4 100644
--- a/rocclr/runtime/platform/memory.cpp
+++ b/rocclr/runtime/platform/memory.cpp
@@ -156,14 +156,6 @@ bool Memory::allocHostMemory(void* initFrom, bool allocHostMem, bool forceCopy)
const std::vector& devices = context_().devices();
- // Find if a non GPU device was created with the context
- for (size_t i = 0; i < devices.size(); i++) {
- if (!(devices[i]->info().type_ & CL_DEVICE_TYPE_GPU)) {
- allocHostMem = true;
- break;
- }
- }
-
// This allocation is necessary to use coherency mechanism
// for the initialization
if (getMemFlags() & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) {
@@ -243,11 +235,8 @@ bool Memory::create(void* initFrom, bool sysMemAlloc) {
for (size_t i = 0; i < devices.size(); i++) {
deviceAlloced_[devices[i]] = AllocInit;
- // Only GPU devices have device memory objects
- if (devices[i]->info().type_ & CL_DEVICE_TYPE_GPU) {
- deviceMemories_[i].ref_ = devices[i];
- deviceMemories_[i].value_ = NULL;
- }
+ deviceMemories_[i].ref_ = devices[i];
+ deviceMemories_[i].value_ = NULL;
if (DISABLE_DEFERRED_ALLOC) {
device::Memory* mem = getDeviceMemory(*devices[i]);
diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp
index ffcd627b3e..6338f444ad 100644
--- a/rocclr/runtime/platform/program.cpp
+++ b/rocclr/runtime/platform/program.cpp
@@ -605,7 +605,7 @@ bool Program::ParseAllOptions(const std::string& options, option::Options& parse
bool Symbol::setDeviceKernel(const Device& device, const device::Kernel* func, bool noAlias) {
// FIXME_lmoriche: check that the signatures are compatible
- if (deviceKernels_.size() == 0 || device.type() == CL_DEVICE_TYPE_CPU) {
+ if (deviceKernels_.size() == 0) {
signature_ = func->signature();
}