P4 to Git Change 1559366 by gandryey@gera-w8 on 2018/05/24 18:06:45
SWDEV-79445 - OCL generic changes and code clean-up
- Combine validateMemory() and arguments capture() under a single function. Rename validateMemory() in NDRangeKernelCommand class to captureAndValidate()
http://ocltc.amd.com/reviews/r/14964/
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#22 edit
[ROCm/clr commit: 73dd22dba8]
このコミットが含まれているのは:
@@ -387,56 +387,16 @@ bool MigrateMemObjectsCommand::validateMemory() {
|
||||
return true;
|
||||
}
|
||||
|
||||
cl_int NDRangeKernelCommand::validateMemory() {
|
||||
cl_int NDRangeKernelCommand::captureAndValidate() {
|
||||
const amd::Device& device = queue()->device();
|
||||
// Validate the kernel before submission
|
||||
if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// Runtime disables deferred memory allocation for single device.
|
||||
// Hence ignore memory validations
|
||||
if (queue()->context().devices().size() > 1) {
|
||||
amd::Memory* const* memories = reinterpret_cast<amd::Memory* const*>(
|
||||
kernel().parameters().values() + kernel().parameters().memoryObjOffset());
|
||||
|
||||
const amd::KernelSignature& signature = kernel().signature();
|
||||
for (uint i = 0; i != signature.numParameters(); ++i) {
|
||||
const amd::KernelParameterDescriptor& desc = signature.at(i);
|
||||
// Check if it's a memory object
|
||||
if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
|
||||
amd::Memory* amdMemory = memories[desc.info_.arrayIndex_];
|
||||
if (amdMemory != NULL) {
|
||||
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
|
||||
// Make sure argument size isn't bigger than the device limit
|
||||
if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
|
||||
LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
device::Memory* mem = amdMemory->getDeviceMemory(device);
|
||||
if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
|
||||
if (device.reallocMemory(*amdMemory)) {
|
||||
mem = amdMemory->getDeviceMemory(device);
|
||||
} else {
|
||||
mem = NULL;
|
||||
}
|
||||
}
|
||||
if (NULL == mem) {
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
|
||||
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parameters_ = kernel().parameters().capture(device);
|
||||
if (nullptr == parameters_) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
cl_int error;
|
||||
parameters_ = kernel().parameters().capture(device, &error);
|
||||
return error;
|
||||
}
|
||||
|
||||
bool ExtObjectsCommand::validateMemory() {
|
||||
|
||||
@@ -775,7 +775,7 @@ class NDRangeKernelCommand : public Command {
|
||||
//! Set the local work size.
|
||||
void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; }
|
||||
|
||||
cl_int validateMemory();
|
||||
cl_int captureAndValidate();
|
||||
};
|
||||
|
||||
class NativeFnCommand : public Command {
|
||||
|
||||
@@ -132,7 +132,8 @@ void KernelParameters::set(size_t index, size_t size, const void* value, bool sv
|
||||
desc.info_.defined_ = true;
|
||||
}
|
||||
|
||||
address KernelParameters::capture(const Device& device) {
|
||||
address KernelParameters::capture(const Device& device, cl_int* error) {
|
||||
*error = CL_SUCCESS;
|
||||
//! Information about which arguments are SVM pointers is stored after
|
||||
// the actual parameters, but only if the device has any SVM capability
|
||||
const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*);
|
||||
@@ -149,10 +150,17 @@ address KernelParameters::capture(const Device& device) {
|
||||
Memory* memArg = memoryObjects_[desc.info_.arrayIndex_];
|
||||
if (memArg != nullptr) {
|
||||
memArg->retain();
|
||||
device::Memory* devMem = memArg->getDeviceMemory(device);
|
||||
if (nullptr == devMem) {
|
||||
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memArg->getSize());
|
||||
*error = CL_MEM_OBJECT_ALLOCATION_FAILURE;
|
||||
AlignedMemory::deallocate(mem);
|
||||
return nullptr;
|
||||
}
|
||||
// Write GPU VA addreess to the arguments
|
||||
if (!desc.info_.rawPointer_) {
|
||||
*reinterpret_cast<uintptr_t*>(mem + desc.offset_) = static_cast<uintptr_t>
|
||||
(memArg->getDeviceMemory(device)->virtualAddress());
|
||||
(devMem->virtualAddress());
|
||||
}
|
||||
} else if (desc.info_.rawPointer_) {
|
||||
if (!device.isFineGrainedSystem(true)) {
|
||||
@@ -181,6 +189,8 @@ address KernelParameters::capture(const Device& device) {
|
||||
if (0 != execInfoSize) {
|
||||
::memcpy(last, &execSvmPtr_[0], execInfoSize);
|
||||
}
|
||||
} else {
|
||||
*error = CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
return mem;
|
||||
|
||||
@@ -171,7 +171,7 @@ class KernelParameters : protected HeapObject {
|
||||
size_t localMemSize(size_t minDataTypeAlignment) const;
|
||||
|
||||
//! Capture the state of the parameters and return the stack base pointer.
|
||||
address capture(const Device& device);
|
||||
address capture(const Device& device, cl_int* error);
|
||||
//! Release the captured state of the parameters.
|
||||
void release(address parameters, const amd::Device& device) const;
|
||||
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする