SWDEV-460948 - Changes to alloc, set, capture under single function.
Change-Id: I7b2d40e99e812b97c53535c5e63c41ad64a8f543
This commit is contained in:
committed by
Karthik Jayaprakash
orang tua
b8c2ac4de4
melakukan
892071aeb2
@@ -274,31 +274,6 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
return hipErrorLaunchFailure;
|
||||
}
|
||||
}
|
||||
address kernargs = nullptr;
|
||||
// 'extra' is a struct that contains the following info: {
|
||||
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
|
||||
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
|
||||
// HIP_LAUNCH_PARAM_END }
|
||||
if (extra != nullptr) {
|
||||
if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE ||
|
||||
extra[4] != HIP_LAUNCH_PARAM_END) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
kernargs = reinterpret_cast<address>(extra[1]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < signature.numParameters(); ++i) {
|
||||
const amd::KernelParameterDescriptor& desc = signature.at(i);
|
||||
if (kernelParams == nullptr) {
|
||||
assert(kernargs != nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
} else {
|
||||
assert(extra == nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernelParams[i],
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
}
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -319,7 +294,6 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
size_t localWorkSize[3] = {blockDimX, blockDimY, blockDimZ};
|
||||
amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
amd::Command::EventWaitList waitList;
|
||||
address kernargs = nullptr;
|
||||
|
||||
bool profileNDRange = (startEvent != nullptr || stopEvent != nullptr);
|
||||
|
||||
@@ -335,10 +309,44 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
// Capture the kernel arguments
|
||||
if (CL_SUCCESS != kernelCommand->captureAndValidate()) {
|
||||
kernelCommand->release();
|
||||
return hipErrorOutOfMemory;
|
||||
address kernargs = nullptr;
|
||||
// 'extra' is a struct that contains the following info: {
|
||||
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
|
||||
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
|
||||
// HIP_LAUNCH_PARAM_END }
|
||||
if (extra != nullptr) {
|
||||
assert(kernelParams == nullptr);
|
||||
if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE ||
|
||||
extra[4] != HIP_LAUNCH_PARAM_END) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
kernargs = reinterpret_cast<address>(extra[1]);
|
||||
}
|
||||
|
||||
if (DEBUG_HIP_KERNARG_COPY_OPT) {
|
||||
if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs)) {
|
||||
kernelCommand->release();
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
} else {
|
||||
for (size_t i = 0; i < kernel->signature().numParameters(); ++i) {
|
||||
const amd::KernelParameterDescriptor& desc = kernel->signature().at(i);
|
||||
if (kernelParams == nullptr) {
|
||||
assert(kernargs != nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
} else {
|
||||
kernel->parameters().set(i, desc.size_, kernelParams[i],
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
}
|
||||
}
|
||||
|
||||
// Capture the kernel arguments
|
||||
if (CL_SUCCESS != kernelCommand->captureAndValidate()) {
|
||||
kernelCommand->release();
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
}
|
||||
|
||||
command = kernelCommand;
|
||||
|
||||
@@ -643,6 +643,27 @@ bool MigrateMemObjectsCommand::validateMemory() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs) {
|
||||
const amd::Device& device = queue()->device();
|
||||
// Validate the kernel before submission
|
||||
if (!queue()->device().validateKernel(kernel(), queue()->vdev(), cooperativeGroups())) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
parameters_ = kernel().parameters().alloc(*queue()->vdev());
|
||||
if (parameters_ == nullptr) {
|
||||
LogError("Cannot allocate memory for parameters_");
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, parameters_)) {
|
||||
LogError("Cannot capture and set the kernel parameters");
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t NDRangeKernelCommand::captureAndValidate() {
|
||||
const amd::Device& device = queue()->device();
|
||||
// Validate the kernel before submission
|
||||
|
||||
@@ -1177,7 +1177,11 @@ class NDRangeKernelCommand : public Command {
|
||||
numWorkgroups_ = numWorkgroups;
|
||||
}
|
||||
|
||||
// Capture kernel parameters and validate
|
||||
int32_t captureAndValidate();
|
||||
|
||||
// Allocate, capture and set kernel parameters
|
||||
int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs);
|
||||
};
|
||||
|
||||
class NativeFnCommand : public Command {
|
||||
|
||||
@@ -85,6 +85,91 @@ size_t KernelParameters::localMemSize(size_t minDataTypeAlignment) const {
|
||||
return memSize;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
address KernelParameters::alloc(device::VirtualDevice& vDev) {
|
||||
|
||||
//! Information about which arguments are SVM pointers is stored after
|
||||
// the actual parameters, but only if the device has any SVM capability
|
||||
const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*);
|
||||
|
||||
address mem = vDev.allocKernelArguments(totalSize_ + execInfoSize, 128);
|
||||
if (mem == nullptr) {
|
||||
mem = reinterpret_cast<address>(AlignedMemory::allocate(totalSize_ + execInfoSize,
|
||||
PARAMETERS_MIN_ALIGNMENT));
|
||||
} else {
|
||||
deviceKernelArgs_ = true;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, address mem) {
|
||||
|
||||
for (size_t idx = 0; idx < signature_.numParameters(); ++idx) {
|
||||
KernelParameterDescriptor& desc = signature_.params()[idx];
|
||||
void* value = nullptr;
|
||||
if (kernelParams != nullptr) {
|
||||
value = kernelParams[idx];
|
||||
} else {
|
||||
value = kernArgs + desc.offset_;
|
||||
}
|
||||
void* param = mem + desc.offset_;
|
||||
uint32_t uint32_value = 0;
|
||||
uint64_t uint64_value = 0;
|
||||
Memory* memArg = nullptr;
|
||||
amd::Memory** memories = reinterpret_cast<amd::Memory**>(mem + memoryObjOffset());
|
||||
if (desc.type_ == T_POINTER && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
|
||||
LP64_SWITCH(uint32_value, uint64_value) = *(LP64_SWITCH(uint32_t*, uint64_t*))value;
|
||||
memArg = amd::MemObjMap::FindMemObj(*reinterpret_cast<const void* const*>(value));
|
||||
memories[desc.info_.arrayIndex_] = memArg;
|
||||
if (memArg != nullptr) {
|
||||
memArg->retain();
|
||||
}
|
||||
desc.info_.rawPointer_ = true;
|
||||
} else if (desc.type_ == T_SAMPLER) {
|
||||
LogError("Cannot handle Sampler now");
|
||||
return false;
|
||||
} else if (desc.type_ == T_QUEUE) {
|
||||
LogError("Cannot handle Queue now");
|
||||
return false;
|
||||
} else {
|
||||
switch (desc.size_) {
|
||||
case 4:
|
||||
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
uint32_value = desc.size_;
|
||||
} else {
|
||||
uint32_value = *(static_cast<const uint32_t*>(value));
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
uint64_value = desc.size_;
|
||||
} else {
|
||||
uint64_value = *(static_cast<const uint64_t*>(value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (desc.size_) {
|
||||
case sizeof(uint32_t):
|
||||
*static_cast<uint32_t*>(param) = uint32_value;
|
||||
break;
|
||||
case sizeof(uint64_t):
|
||||
*static_cast<uint64_t*>(param) = uint64_value;
|
||||
break;
|
||||
default:
|
||||
::memcpy(param, value, desc.size_);
|
||||
break;
|
||||
}
|
||||
desc.info_.defined_ = true;
|
||||
}
|
||||
|
||||
execInfoOffset_ = totalSize_;
|
||||
return true;
|
||||
}
|
||||
|
||||
void KernelParameters::set(size_t index, size_t size, const void* value, bool svmBound) {
|
||||
KernelParameterDescriptor& desc = signature_.params()[index];
|
||||
|
||||
|
||||
@@ -284,6 +284,12 @@ class KernelParameters : protected HeapObject {
|
||||
|
||||
//! Returns true if arguemnts were allocated on device
|
||||
bool deviceKernelArgs() const { return (deviceKernelArgs_ == 1); }
|
||||
|
||||
//! Allocate memory for kernel arguments to be set.
|
||||
address alloc(device::VirtualDevice& vDev);
|
||||
|
||||
//! Capture the arguments from signature and set.
|
||||
bool captureAndSet(void** kernelParams, address kernArgs, address mem);
|
||||
};
|
||||
|
||||
/*! \brief Encapsulates a __kernel function and the argument values
|
||||
|
||||
@@ -251,6 +251,8 @@ release(bool, DEBUG_HIP_GRAPH_DOT_PRINT, false, \
|
||||
"Enable/Disable graph debug dot print dump") \
|
||||
release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \
|
||||
"Force to always use new comgr unbundling action") \
|
||||
release(bool, DEBUG_HIP_KERNARG_COPY_OPT, true, \
|
||||
"Enable/Disable multiple kern arg copies") \
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user