SWDEV-460948 - Changes to alloc, set, capture under single function.

Change-Id: I7b2d40e99e812b97c53535c5e63c41ad64a8f543
This commit is contained in:
kjayapra-amd
2024-05-15 20:27:08 -04:00
committed by Karthik Jayaprakash
orang tua b8c2ac4de4
melakukan 892071aeb2
6 mengubah file dengan 156 tambahan dan 30 penghapusan
+38 -30
Melihat File
@@ -274,31 +274,6 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
return hipErrorLaunchFailure;
}
}
address kernargs = nullptr;
// 'extra' is a struct that contains the following info: {
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
// HIP_LAUNCH_PARAM_END }
if (extra != nullptr) {
if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE ||
extra[4] != HIP_LAUNCH_PARAM_END) {
return hipErrorInvalidValue;
}
kernargs = reinterpret_cast<address>(extra[1]);
}
for (size_t i = 0; i < signature.numParameters(); ++i) {
const amd::KernelParameterDescriptor& desc = signature.at(i);
if (kernelParams == nullptr) {
assert(kernargs != nullptr);
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
desc.type_ == T_POINTER /*svmBound*/);
} else {
assert(extra == nullptr);
kernel->parameters().set(i, desc.size_, kernelParams[i],
desc.type_ == T_POINTER /*svmBound*/);
}
}
return hipSuccess;
}
@@ -319,7 +294,6 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
size_t localWorkSize[3] = {blockDimX, blockDimY, blockDimZ};
amd::NDRangeContainer ndrange(3, globalWorkOffset, globalWorkSize, localWorkSize);
amd::Command::EventWaitList waitList;
address kernargs = nullptr;
bool profileNDRange = (startEvent != nullptr || stopEvent != nullptr);
@@ -335,10 +309,44 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
return hipErrorOutOfMemory;
}
// Capture the kernel arguments
if (CL_SUCCESS != kernelCommand->captureAndValidate()) {
kernelCommand->release();
return hipErrorOutOfMemory;
address kernargs = nullptr;
// 'extra' is a struct that contains the following info: {
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
// HIP_LAUNCH_PARAM_END }
if (extra != nullptr) {
assert(kernelParams == nullptr);
if (extra[0] != HIP_LAUNCH_PARAM_BUFFER_POINTER || extra[2] != HIP_LAUNCH_PARAM_BUFFER_SIZE ||
extra[4] != HIP_LAUNCH_PARAM_END) {
return hipErrorInvalidValue;
}
kernargs = reinterpret_cast<address>(extra[1]);
}
if (DEBUG_HIP_KERNARG_COPY_OPT) {
if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs)) {
kernelCommand->release();
return hipErrorOutOfMemory;
}
} else {
for (size_t i = 0; i < kernel->signature().numParameters(); ++i) {
const amd::KernelParameterDescriptor& desc = kernel->signature().at(i);
if (kernelParams == nullptr) {
assert(kernargs != nullptr);
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
desc.type_ == T_POINTER /*svmBound*/);
} else {
kernel->parameters().set(i, desc.size_, kernelParams[i],
desc.type_ == T_POINTER /*svmBound*/);
}
}
// Capture the kernel arguments
if (CL_SUCCESS != kernelCommand->captureAndValidate()) {
kernelCommand->release();
return hipErrorOutOfMemory;
}
}
command = kernelCommand;
+21
Melihat File
@@ -643,6 +643,27 @@ bool MigrateMemObjectsCommand::validateMemory() {
return true;
}
// =================================================================================================
int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs) {
const amd::Device& device = queue()->device();
// Validate the kernel before submission
if (!queue()->device().validateKernel(kernel(), queue()->vdev(), cooperativeGroups())) {
return CL_OUT_OF_RESOURCES;
}
parameters_ = kernel().parameters().alloc(*queue()->vdev());
if (parameters_ == nullptr) {
LogError("Cannot allocate memory for parameters_");
return CL_OUT_OF_RESOURCES;
}
if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, parameters_)) {
LogError("Cannot capture and set the kernel parameters");
return CL_OUT_OF_RESOURCES;
}
return CL_SUCCESS;
}
int32_t NDRangeKernelCommand::captureAndValidate() {
const amd::Device& device = queue()->device();
// Validate the kernel before submission
+4
Melihat File
@@ -1177,7 +1177,11 @@ class NDRangeKernelCommand : public Command {
numWorkgroups_ = numWorkgroups;
}
// Capture kernel parameters and validate
int32_t captureAndValidate();
// Allocate, capture and set kernel parameters
int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs);
};
class NativeFnCommand : public Command {
+85
Melihat File
@@ -85,6 +85,91 @@ size_t KernelParameters::localMemSize(size_t minDataTypeAlignment) const {
return memSize;
}
// =================================================================================================
address KernelParameters::alloc(device::VirtualDevice& vDev) {
//! Information about which arguments are SVM pointers is stored after
// the actual parameters, but only if the device has any SVM capability
const size_t execInfoSize = getNumberOfSvmPtr() * sizeof(void*);
address mem = vDev.allocKernelArguments(totalSize_ + execInfoSize, 128);
if (mem == nullptr) {
mem = reinterpret_cast<address>(AlignedMemory::allocate(totalSize_ + execInfoSize,
PARAMETERS_MIN_ALIGNMENT));
} else {
deviceKernelArgs_ = true;
}
return mem;
}
// =================================================================================================
bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, address mem) {
for (size_t idx = 0; idx < signature_.numParameters(); ++idx) {
KernelParameterDescriptor& desc = signature_.params()[idx];
void* value = nullptr;
if (kernelParams != nullptr) {
value = kernelParams[idx];
} else {
value = kernArgs + desc.offset_;
}
void* param = mem + desc.offset_;
uint32_t uint32_value = 0;
uint64_t uint64_value = 0;
Memory* memArg = nullptr;
amd::Memory** memories = reinterpret_cast<amd::Memory**>(mem + memoryObjOffset());
if (desc.type_ == T_POINTER && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
LP64_SWITCH(uint32_value, uint64_value) = *(LP64_SWITCH(uint32_t*, uint64_t*))value;
memArg = amd::MemObjMap::FindMemObj(*reinterpret_cast<const void* const*>(value));
memories[desc.info_.arrayIndex_] = memArg;
if (memArg != nullptr) {
memArg->retain();
}
desc.info_.rawPointer_ = true;
} else if (desc.type_ == T_SAMPLER) {
LogError("Cannot handle Sampler now");
return false;
} else if (desc.type_ == T_QUEUE) {
LogError("Cannot handle Queue now");
return false;
} else {
switch (desc.size_) {
case 4:
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
uint32_value = desc.size_;
} else {
uint32_value = *(static_cast<const uint32_t*>(value));
}
break;
case 8:
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
uint64_value = desc.size_;
} else {
uint64_value = *(static_cast<const uint64_t*>(value));
}
break;
}
}
switch (desc.size_) {
case sizeof(uint32_t):
*static_cast<uint32_t*>(param) = uint32_value;
break;
case sizeof(uint64_t):
*static_cast<uint64_t*>(param) = uint64_value;
break;
default:
::memcpy(param, value, desc.size_);
break;
}
desc.info_.defined_ = true;
}
execInfoOffset_ = totalSize_;
return true;
}
void KernelParameters::set(size_t index, size_t size, const void* value, bool svmBound) {
KernelParameterDescriptor& desc = signature_.params()[index];
+6
Melihat File
@@ -284,6 +284,12 @@ class KernelParameters : protected HeapObject {
//! Returns true if arguemnts were allocated on device
bool deviceKernelArgs() const { return (deviceKernelArgs_ == 1); }
//! Allocate memory for kernel arguments to be set.
address alloc(device::VirtualDevice& vDev);
//! Capture the arguments from signature and set.
bool captureAndSet(void** kernelParams, address kernArgs, address mem);
};
/*! \brief Encapsulates a __kernel function and the argument values
+2
Melihat File
@@ -251,6 +251,8 @@ release(bool, DEBUG_HIP_GRAPH_DOT_PRINT, false, \
"Enable/Disable graph debug dot print dump") \
release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \
"Force to always use new comgr unbundling action") \
release(bool, DEBUG_HIP_KERNARG_COPY_OPT, true, \
"Enable/Disable multiple kern arg copies") \
namespace amd {