diff --git a/projects/clr/rocclr/runtime/platform/command.cpp b/projects/clr/rocclr/runtime/platform/command.cpp index 7f61b09b5e..7e5a620ac2 100644 --- a/projects/clr/rocclr/runtime/platform/command.cpp +++ b/projects/clr/rocclr/runtime/platform/command.cpp @@ -387,56 +387,16 @@ bool MigrateMemObjectsCommand::validateMemory() { return true; } -cl_int NDRangeKernelCommand::validateMemory() { +cl_int NDRangeKernelCommand::captureAndValidate() { const amd::Device& device = queue()->device(); // Validate the kernel before submission if (!queue()->device().validateKernel(kernel(), queue()->vdev())) { return CL_OUT_OF_RESOURCES; } - // Runtime disables deferred memory allocation for single device. - // Hence ignore memory validations - if (queue()->context().devices().size() > 1) { - amd::Memory* const* memories = reinterpret_cast( - kernel().parameters().values() + kernel().parameters().memoryObjOffset()); - - const amd::KernelSignature& signature = kernel().signature(); - for (uint i = 0; i != signature.numParameters(); ++i) { - const amd::KernelParameterDescriptor& desc = signature.at(i); - // Check if it's a memory object - if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) { - amd::Memory* amdMemory = memories[desc.info_.arrayIndex_]; - if (amdMemory != NULL) { - if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) { - // Make sure argument size isn't bigger than the device limit - if (amdMemory->getSize() > device.info().maxConstantBufferSize_) { - LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize()); - return CL_OUT_OF_RESOURCES; - } - } - device::Memory* mem = amdMemory->getDeviceMemory(device); - if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) { - if (device.reallocMemory(*amdMemory)) { - mem = amdMemory->getDeviceMemory(device); - } else { - mem = NULL; - } - } - if (NULL == mem) { - LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize()); - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - } - } - } - } - } - - parameters_ = kernel().parameters().capture(device); - if (nullptr == parameters_) { - return CL_OUT_OF_HOST_MEMORY; - } - - return CL_SUCCESS; + cl_int error; + parameters_ = kernel().parameters().capture(device, &error); + return error; } bool ExtObjectsCommand::validateMemory() { diff --git a/projects/clr/rocclr/runtime/platform/command.hpp b/projects/clr/rocclr/runtime/platform/command.hpp index 424340e3c0..e979112ada 100644 --- a/projects/clr/rocclr/runtime/platform/command.hpp +++ b/projects/clr/rocclr/runtime/platform/command.hpp @@ -775,7 +775,7 @@ class NDRangeKernelCommand : public Command { //! Set the local work size. void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; } - cl_int validateMemory(); + cl_int captureAndValidate(); }; class NativeFnCommand : public Command { diff --git a/projects/clr/rocclr/runtime/platform/kernel.cpp b/projects/clr/rocclr/runtime/platform/kernel.cpp index 8d0005a727..17d093b516 100644 --- a/projects/clr/rocclr/runtime/platform/kernel.cpp +++ b/projects/clr/rocclr/runtime/platform/kernel.cpp @@ -132,7 +132,8 @@ void KernelParameters::set(size_t index, size_t size, const void* value, bool sv desc.info_.defined_ = true; } -address KernelParameters::capture(const Device& device) { +address KernelParameters::capture(const Device& device, cl_int* error) { + *error = CL_SUCCESS; //! Information about which arguments are SVM pointers is stored after // the actual parameters, but only if the device has any SVM capability const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*); @@ -149,10 +150,17 @@ address KernelParameters::capture(const Device& device) { Memory* memArg = memoryObjects_[desc.info_.arrayIndex_]; if (memArg != nullptr) { memArg->retain(); + device::Memory* devMem = memArg->getDeviceMemory(device); + if (nullptr == devMem) { + LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memArg->getSize()); + *error = CL_MEM_OBJECT_ALLOCATION_FAILURE; + AlignedMemory::deallocate(mem); + return nullptr; + } // Write GPU VA addreess to the arguments if (!desc.info_.rawPointer_) { *reinterpret_cast(mem + desc.offset_) = static_cast - (memArg->getDeviceMemory(device)->virtualAddress()); + (devMem->virtualAddress()); } } else if (desc.info_.rawPointer_) { if (!device.isFineGrainedSystem(true)) { @@ -181,6 +189,8 @@ address KernelParameters::capture(const Device& device) { if (0 != execInfoSize) { ::memcpy(last, &execSvmPtr_[0], execInfoSize); } + } else { + *error = CL_OUT_OF_HOST_MEMORY; } return mem; diff --git a/projects/clr/rocclr/runtime/platform/kernel.hpp b/projects/clr/rocclr/runtime/platform/kernel.hpp index d18187784d..838c5d7198 100644 --- a/projects/clr/rocclr/runtime/platform/kernel.hpp +++ b/projects/clr/rocclr/runtime/platform/kernel.hpp @@ -171,7 +171,7 @@ class KernelParameters : protected HeapObject { size_t localMemSize(size_t minDataTypeAlignment) const; //! Capture the state of the parameters and return the stack base pointer. - address capture(const Device& device); + address capture(const Device& device, cl_int* error); //! Release the captured state of the parameters. void release(address parameters, const amd::Device& device) const;