P4 to Git Change 1559366 by gandryey@gera-w8 on 2018/05/24 18:06:45

SWDEV-79445 - OCL generic changes and code clean-up - Combine validateMemory() and arguments capture() under a single function. Rename validateMemory() in NDRangeKernelCommand class to captureAndValidate() http://ocltc.amd.com/reviews/r/14964/ Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#87 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#90 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#28 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#22 edit [ROCm/clr commit: 73dd22dba8]
2018-05-24 18:12:49 -04:00
@@ -387,56 +387,16 @@ bool MigrateMemObjectsCommand::validateMemory() {
  return true;
 }

-cl_int NDRangeKernelCommand::validateMemory() {
+cl_int NDRangeKernelCommand::captureAndValidate() {
  const amd::Device& device = queue()->device();
  // Validate the kernel before submission
  if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
    return CL_OUT_OF_RESOURCES;
  }

-  // Runtime disables deferred memory allocation for single device.
-  // Hence ignore memory validations
-  if (queue()->context().devices().size() > 1) {
-    amd::Memory* const* memories = reinterpret_cast<amd::Memory* const*>(
-      kernel().parameters().values() + kernel().parameters().memoryObjOffset());
-
-    const amd::KernelSignature& signature = kernel().signature();
-    for (uint i = 0; i != signature.numParameters(); ++i) {
-      const amd::KernelParameterDescriptor& desc = signature.at(i);
-      // Check if it's a memory object
-      if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
-        amd::Memory* amdMemory = memories[desc.info_.arrayIndex_];
-        if (amdMemory != NULL) {
-          if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
-            // Make sure argument size isn't bigger than the device limit
-            if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
-              LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
-              return CL_OUT_OF_RESOURCES;
-            }
-          }
-          device::Memory* mem = amdMemory->getDeviceMemory(device);
-          if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
-            if (device.reallocMemory(*amdMemory)) {
-              mem = amdMemory->getDeviceMemory(device);
-            } else {
-              mem = NULL;
-            }
-          }
-          if (NULL == mem) {
-            LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
-            return CL_MEM_OBJECT_ALLOCATION_FAILURE;
-          }
-        }
-      }
-    }
-  }
-
-  parameters_ = kernel().parameters().capture(device);
-  if (nullptr == parameters_) {
-    return CL_OUT_OF_HOST_MEMORY;
-  }
-
-  return CL_SUCCESS;
+  cl_int error;
+  parameters_ = kernel().parameters().capture(device, &error);
+  return error;
 }

 bool ExtObjectsCommand::validateMemory() {
@@ -775,7 +775,7 @@ class NDRangeKernelCommand : public Command {
  //! Set the local work size.
  void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; }

-  cl_int validateMemory();
+  cl_int captureAndValidate();
 };

 class NativeFnCommand : public Command {
@@ -132,7 +132,8 @@ void KernelParameters::set(size_t index, size_t size, const void* value, bool sv
  desc.info_.defined_ = true;
 }

-address KernelParameters::capture(const Device& device) {
+address KernelParameters::capture(const Device& device, cl_int* error) {
+  *error = CL_SUCCESS;
  //! Information about which arguments are SVM pointers is stored after
  // the actual parameters, but only if the device has any SVM capability
  const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*);
@@ -149,10 +150,17 @@ address KernelParameters::capture(const Device& device) {
        Memory* memArg = memoryObjects_[desc.info_.arrayIndex_];
        if (memArg != nullptr) {
          memArg->retain();
+          device::Memory* devMem = memArg->getDeviceMemory(device);
+          if (nullptr == devMem) {
+            LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memArg->getSize());
+            *error = CL_MEM_OBJECT_ALLOCATION_FAILURE;
+            AlignedMemory::deallocate(mem);
+            return nullptr;
+          }
          // Write GPU VA addreess to the arguments
          if (!desc.info_.rawPointer_) {
            *reinterpret_cast<uintptr_t*>(mem + desc.offset_) = static_cast<uintptr_t>
-              (memArg->getDeviceMemory(device)->virtualAddress());
+              (devMem->virtualAddress());
          }
        } else if (desc.info_.rawPointer_) {
          if (!device.isFineGrainedSystem(true)) {
@@ -181,6 +189,8 @@ address KernelParameters::capture(const Device& device) {
    if (0 != execInfoSize) {
      ::memcpy(last, &execSvmPtr_[0], execInfoSize);
    }
+  } else {
+    *error = CL_OUT_OF_HOST_MEMORY;
  }

  return mem;
@@ -171,7 +171,7 @@ class KernelParameters : protected HeapObject {
  size_t localMemSize(size_t minDataTypeAlignment) const;

  //! Capture the state of the parameters and return the stack base pointer.
-  address capture(const Device& device);
+  address capture(const Device& device, cl_int* error);
  //! Release the captured state of the parameters.
  void release(address parameters, const amd::Device& device) const;