SWDEV-545273 - Respect HIP_LAUNCH_PARAM_BUFFER_SIZE (#770)
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
1492328894
Коммит
2a02d2c2f3
@@ -362,6 +362,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
}
|
||||
|
||||
address kernargs = nullptr;
|
||||
size_t kernargs_size = 0;
|
||||
// 'extra' is a struct that contains the following info: {
|
||||
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
|
||||
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
|
||||
@@ -373,10 +374,22 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
kernargs = reinterpret_cast<address>(extra[1]);
|
||||
kernargs_size = *reinterpret_cast<size_t*>(extra[3]);
|
||||
const uint32_t numParams = kernel->signature().numParameters();
|
||||
const bool expectsArgs = (numParams > 0);
|
||||
const bool hasArgs = (kernargs != nullptr && kernargs_size > 0);
|
||||
// we either expected args but got none, or didn’t expect any but got some
|
||||
if (expectsArgs == true && hasArgs == false) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
if (expectsArgs == false && kernargs_size != 0) {
|
||||
return hipErrorLaunchOutOfResources;
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG_HIP_KERNARG_COPY_OPT) {
|
||||
if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs)) {
|
||||
if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs,
|
||||
kernargs_size)) {
|
||||
kernelCommand->release();
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
@@ -386,8 +399,11 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
const amd::KernelParameterDescriptor& desc = kernel->signature().at(i);
|
||||
if (kernelParams == nullptr) {
|
||||
assert(kernargs != nullptr);
|
||||
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
// only copy if this parameter lies fully inside the passed buffer
|
||||
if (desc.offset_ + desc.size_ <= kernargs_size) {
|
||||
kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
}
|
||||
} else {
|
||||
kernel->parameters().set(i, desc.size_, kernelParams[i],
|
||||
desc.type_ == T_POINTER /*svmBound*/);
|
||||
@@ -891,7 +907,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
if (!hip::isValid(launch.stream)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
|
||||
if (launch.stream == nullptr || launch.stream == hipStreamLegacy) {
|
||||
return hipErrorInvalidResourceHandle;
|
||||
}
|
||||
|
||||
@@ -653,9 +653,10 @@ bool MigrateMemObjectsCommand::validateMemory() {
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs) {
|
||||
int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs,
|
||||
size_t kernArgsSize) {
|
||||
const amd::Device& device = queue()->device();
|
||||
// Validate the kernel before submission
|
||||
// Validate the kernel before submission
|
||||
if (!queue()->device().validateKernel(kernel(), queue()->vdev(), cooperativeGroups())) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
@@ -666,7 +667,7 @@ int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, addre
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, parameters_)) {
|
||||
if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, kernArgsSize, parameters_)) {
|
||||
LogError("Cannot capture and set the kernel parameters");
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
@@ -1265,7 +1265,7 @@ class NDRangeKernelCommand : public Command {
|
||||
int32_t captureAndValidate();
|
||||
|
||||
// Allocate, capture and set kernel parameters
|
||||
int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs);
|
||||
int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs, size_t kernArgsSize);
|
||||
};
|
||||
|
||||
class NativeFnCommand : public Command {
|
||||
|
||||
@@ -104,21 +104,20 @@ address KernelParameters::alloc(device::VirtualDevice& vDev) {
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, address mem) {
|
||||
|
||||
bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size_t kernArgsSize,
|
||||
address mem) {
|
||||
amd::Memory** memories = reinterpret_cast<amd::Memory**>(mem + memoryObjOffset());
|
||||
for (size_t idx = 0; idx < signature_.numParameters(); ++idx) {
|
||||
KernelParameterDescriptor& desc = signature_.params()[idx];
|
||||
void* value = nullptr;
|
||||
if (kernelParams != nullptr) {
|
||||
value = kernelParams[idx];
|
||||
} else {
|
||||
value = kernArgs + desc.offset_;
|
||||
}
|
||||
void* value = kernelParams ? kernelParams[idx] : kernArgs + desc.offset_;
|
||||
void* param = mem + desc.offset_;
|
||||
uint32_t uint32_value = 0;
|
||||
uint64_t uint64_value = 0;
|
||||
// if using the 'extra' path and this parameter lies beyond supplied size, write zero
|
||||
if (kernelParams == nullptr && ((desc.offset_ + desc.size_) > kernArgsSize)) {
|
||||
value = &uint64_value;
|
||||
}
|
||||
Memory* memArg = nullptr;
|
||||
amd::Memory** memories = reinterpret_cast<amd::Memory**>(mem + memoryObjOffset());
|
||||
if (desc.type_ == T_POINTER && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
|
||||
LP64_SWITCH(uint32_value, uint64_value) = *(LP64_SWITCH(uint32_t*, uint64_t*))value;
|
||||
memArg = amd::MemObjMap::FindMemObj(*reinterpret_cast<const void* const*>(value));
|
||||
|
||||
@@ -289,7 +289,7 @@ class KernelParameters : protected HeapObject {
|
||||
address alloc(device::VirtualDevice& vDev);
|
||||
|
||||
//! Capture the arguments from signature and set.
|
||||
bool captureAndSet(void** kernelParams, address kernArgs, address mem);
|
||||
bool captureAndSet(void** kernelParams, address kernArgs, size_t kernArgsSize, address mem);
|
||||
};
|
||||
|
||||
/*! \brief Encapsulates a __kernel function and the argument values
|
||||
@@ -412,7 +412,7 @@ class Kernel : public RuntimeObject {
|
||||
static const KernelFieldMapV3Type kKernelFieldMapV3[];
|
||||
static const ArgValueKindV3Type kArgValueKindV3[];
|
||||
static const ArgFieldMapV3Type kArgFieldMapV3[];
|
||||
#endif
|
||||
#endif
|
||||
}; // defined(USE_COMGR_LIBRARY)
|
||||
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user