From fd627bea6d394ace3df6de65faec7df726dab1f0 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 24 May 2018 18:12:49 -0400
Subject: [PATCH] P4 to Git Change 1559366 by gandryey@gera-w8 on 2018/05/24
18:06:45
SWDEV-79445 - OCL generic changes and code clean-up
- Combine validateMemory() and arguments capture() under a single function. Rename validateMemory() in NDRangeKernelCommand class to captureAndValidate()
http://ocltc.amd.com/reviews/r/14964/
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#22 edit
[ROCm/clr commit: 73dd22dba8e10e58727dbd996d58f30e720593d5]
---
.../clr/rocclr/runtime/platform/command.cpp | 48 ++-----------------
.../clr/rocclr/runtime/platform/command.hpp | 2 +-
.../clr/rocclr/runtime/platform/kernel.cpp | 14 +++++-
.../clr/rocclr/runtime/platform/kernel.hpp | 2 +-
4 files changed, 18 insertions(+), 48 deletions(-)
diff --git a/projects/clr/rocclr/runtime/platform/command.cpp b/projects/clr/rocclr/runtime/platform/command.cpp
index 7f61b09b5e..7e5a620ac2 100644
--- a/projects/clr/rocclr/runtime/platform/command.cpp
+++ b/projects/clr/rocclr/runtime/platform/command.cpp
@@ -387,56 +387,16 @@ bool MigrateMemObjectsCommand::validateMemory() {
return true;
}
-cl_int NDRangeKernelCommand::validateMemory() {
+cl_int NDRangeKernelCommand::captureAndValidate() {
const amd::Device& device = queue()->device();
// Validate the kernel before submission
if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
return CL_OUT_OF_RESOURCES;
}
- // Runtime disables deferred memory allocation for single device.
- // Hence ignore memory validations
- if (queue()->context().devices().size() > 1) {
- amd::Memory* const* memories = reinterpret_cast(
- kernel().parameters().values() + kernel().parameters().memoryObjOffset());
-
- const amd::KernelSignature& signature = kernel().signature();
- for (uint i = 0; i != signature.numParameters(); ++i) {
- const amd::KernelParameterDescriptor& desc = signature.at(i);
- // Check if it's a memory object
- if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
- amd::Memory* amdMemory = memories[desc.info_.arrayIndex_];
- if (amdMemory != NULL) {
- if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
- // Make sure argument size isn't bigger than the device limit
- if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
- LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
- return CL_OUT_OF_RESOURCES;
- }
- }
- device::Memory* mem = amdMemory->getDeviceMemory(device);
- if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
- if (device.reallocMemory(*amdMemory)) {
- mem = amdMemory->getDeviceMemory(device);
- } else {
- mem = NULL;
- }
- }
- if (NULL == mem) {
- LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
- return CL_MEM_OBJECT_ALLOCATION_FAILURE;
- }
- }
- }
- }
- }
-
- parameters_ = kernel().parameters().capture(device);
- if (nullptr == parameters_) {
- return CL_OUT_OF_HOST_MEMORY;
- }
-
- return CL_SUCCESS;
+ cl_int error;
+ parameters_ = kernel().parameters().capture(device, &error);
+ return error;
}
bool ExtObjectsCommand::validateMemory() {
diff --git a/projects/clr/rocclr/runtime/platform/command.hpp b/projects/clr/rocclr/runtime/platform/command.hpp
index 424340e3c0..e979112ada 100644
--- a/projects/clr/rocclr/runtime/platform/command.hpp
+++ b/projects/clr/rocclr/runtime/platform/command.hpp
@@ -775,7 +775,7 @@ class NDRangeKernelCommand : public Command {
//! Set the local work size.
void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; }
- cl_int validateMemory();
+ cl_int captureAndValidate();
};
class NativeFnCommand : public Command {
diff --git a/projects/clr/rocclr/runtime/platform/kernel.cpp b/projects/clr/rocclr/runtime/platform/kernel.cpp
index 8d0005a727..17d093b516 100644
--- a/projects/clr/rocclr/runtime/platform/kernel.cpp
+++ b/projects/clr/rocclr/runtime/platform/kernel.cpp
@@ -132,7 +132,8 @@ void KernelParameters::set(size_t index, size_t size, const void* value, bool sv
desc.info_.defined_ = true;
}
-address KernelParameters::capture(const Device& device) {
+address KernelParameters::capture(const Device& device, cl_int* error) {
+ *error = CL_SUCCESS;
//! Information about which arguments are SVM pointers is stored after
// the actual parameters, but only if the device has any SVM capability
const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*);
@@ -149,10 +150,17 @@ address KernelParameters::capture(const Device& device) {
Memory* memArg = memoryObjects_[desc.info_.arrayIndex_];
if (memArg != nullptr) {
memArg->retain();
+ device::Memory* devMem = memArg->getDeviceMemory(device);
+ if (nullptr == devMem) {
+ LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memArg->getSize());
+ *error = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+ AlignedMemory::deallocate(mem);
+ return nullptr;
+ }
// Write GPU VA addreess to the arguments
if (!desc.info_.rawPointer_) {
*reinterpret_cast(mem + desc.offset_) = static_cast
- (memArg->getDeviceMemory(device)->virtualAddress());
+ (devMem->virtualAddress());
}
} else if (desc.info_.rawPointer_) {
if (!device.isFineGrainedSystem(true)) {
@@ -181,6 +189,8 @@ address KernelParameters::capture(const Device& device) {
if (0 != execInfoSize) {
::memcpy(last, &execSvmPtr_[0], execInfoSize);
}
+ } else {
+ *error = CL_OUT_OF_HOST_MEMORY;
}
return mem;
diff --git a/projects/clr/rocclr/runtime/platform/kernel.hpp b/projects/clr/rocclr/runtime/platform/kernel.hpp
index d18187784d..838c5d7198 100644
--- a/projects/clr/rocclr/runtime/platform/kernel.hpp
+++ b/projects/clr/rocclr/runtime/platform/kernel.hpp
@@ -171,7 +171,7 @@ class KernelParameters : protected HeapObject {
size_t localMemSize(size_t minDataTypeAlignment) const;
//! Capture the state of the parameters and return the stack base pointer.
- address capture(const Device& device);
+ address capture(const Device& device, cl_int* error);
//! Release the captured state of the parameters.
void release(address parameters, const amd::Device& device) const;