P4 to Git Change 1559366 by gandryey@gera-w8 on 2018/05/24 18:06:45

SWDEV-79445 - OCL generic changes and code clean-up
	- Combine validateMemory() and arguments capture() under a single function. Rename validateMemory() in NDRangeKernelCommand class to captureAndValidate()

	http://ocltc.amd.com/reviews/r/14964/

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#90 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#22 edit


[ROCm/clr commit: 73dd22dba8]
このコミットが含まれているのは:
foreman
2018-05-24 18:12:49 -04:00
コミット fd627bea6d
4個のファイルの変更18行の追加48行の削除
+4 -44
ファイルの表示
@@ -387,56 +387,16 @@ bool MigrateMemObjectsCommand::validateMemory() {
return true;
}
cl_int NDRangeKernelCommand::validateMemory() {
cl_int NDRangeKernelCommand::captureAndValidate() {
const amd::Device& device = queue()->device();
// Validate the kernel before submission
if (!queue()->device().validateKernel(kernel(), queue()->vdev())) {
return CL_OUT_OF_RESOURCES;
}
// Runtime disables deferred memory allocation for single device.
// Hence ignore memory validations
if (queue()->context().devices().size() > 1) {
amd::Memory* const* memories = reinterpret_cast<amd::Memory* const*>(
kernel().parameters().values() + kernel().parameters().memoryObjOffset());
const amd::KernelSignature& signature = kernel().signature();
for (uint i = 0; i != signature.numParameters(); ++i) {
const amd::KernelParameterDescriptor& desc = signature.at(i);
// Check if it's a memory object
if ((desc.type_ == T_POINTER) && (desc.size_ != 0)) {
amd::Memory* amdMemory = memories[desc.info_.arrayIndex_];
if (amdMemory != NULL) {
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_CONSTANT) {
// Make sure argument size isn't bigger than the device limit
if (amdMemory->getSize() > device.info().maxConstantBufferSize_) {
LogPrintfError("HW constant buffer is too big (0x%X bytes)!", amdMemory->getSize());
return CL_OUT_OF_RESOURCES;
}
}
device::Memory* mem = amdMemory->getDeviceMemory(device);
if (!kernel().getDeviceKernel(device)->validateMemory(i, amdMemory)) {
if (device.reallocMemory(*amdMemory)) {
mem = amdMemory->getDeviceMemory(device);
} else {
mem = NULL;
}
}
if (NULL == mem) {
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", amdMemory->getSize());
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
}
}
}
}
}
parameters_ = kernel().parameters().capture(device);
if (nullptr == parameters_) {
return CL_OUT_OF_HOST_MEMORY;
}
return CL_SUCCESS;
cl_int error;
parameters_ = kernel().parameters().capture(device, &error);
return error;
}
bool ExtObjectsCommand::validateMemory() {
+1 -1
ファイルの表示
@@ -775,7 +775,7 @@ class NDRangeKernelCommand : public Command {
//! Set the local work size.
void setLocalWorkSize(const NDRange& local) { sizes_.local() = local; }
cl_int validateMemory();
cl_int captureAndValidate();
};
class NativeFnCommand : public Command {
+12 -2
ファイルの表示
@@ -132,7 +132,8 @@ void KernelParameters::set(size_t index, size_t size, const void* value, bool sv
desc.info_.defined_ = true;
}
address KernelParameters::capture(const Device& device) {
address KernelParameters::capture(const Device& device, cl_int* error) {
*error = CL_SUCCESS;
//! Information about which arguments are SVM pointers is stored after
// the actual parameters, but only if the device has any SVM capability
const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*);
@@ -149,10 +150,17 @@ address KernelParameters::capture(const Device& device) {
Memory* memArg = memoryObjects_[desc.info_.arrayIndex_];
if (memArg != nullptr) {
memArg->retain();
device::Memory* devMem = memArg->getDeviceMemory(device);
if (nullptr == devMem) {
LogPrintfError("Can't allocate memory size - 0x%08X bytes!", memArg->getSize());
*error = CL_MEM_OBJECT_ALLOCATION_FAILURE;
AlignedMemory::deallocate(mem);
return nullptr;
}
// Write GPU VA addreess to the arguments
if (!desc.info_.rawPointer_) {
*reinterpret_cast<uintptr_t*>(mem + desc.offset_) = static_cast<uintptr_t>
(memArg->getDeviceMemory(device)->virtualAddress());
(devMem->virtualAddress());
}
} else if (desc.info_.rawPointer_) {
if (!device.isFineGrainedSystem(true)) {
@@ -181,6 +189,8 @@ address KernelParameters::capture(const Device& device) {
if (0 != execInfoSize) {
::memcpy(last, &execSvmPtr_[0], execInfoSize);
}
} else {
*error = CL_OUT_OF_HOST_MEMORY;
}
return mem;
+1 -1
ファイルの表示
@@ -171,7 +171,7 @@ class KernelParameters : protected HeapObject {
size_t localMemSize(size_t minDataTypeAlignment) const;
//! Capture the state of the parameters and return the stack base pointer.
address capture(const Device& device);
address capture(const Device& device, cl_int* error);
//! Release the captured state of the parameters.
void release(address parameters, const amd::Device& device) const;