diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp
index e1091d1dc8..3f818f2579 100644
--- a/hipamd/src/hip_module.cpp
+++ b/hipamd/src/hip_module.cpp
@@ -362,6 +362,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
}
address kernargs = nullptr;
+ size_t kernargs_size = 0;
// 'extra' is a struct that contains the following info: {
// HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs,
// HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size,
@@ -373,10 +374,22 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
return hipErrorInvalidValue;
}
kernargs = reinterpret_cast
(extra[1]);
+ kernargs_size = *reinterpret_cast(extra[3]);
+ const uint32_t numParams = kernel->signature().numParameters();
+ const bool expectsArgs = (numParams > 0);
+ const bool hasArgs = (kernargs != nullptr && kernargs_size > 0);
+ // we either expected args but got none, or didn’t expect any but got some
+ if (expectsArgs == true && hasArgs == false) {
+ return hipErrorInvalidValue;
+ }
+ if (expectsArgs == false && kernargs_size != 0) {
+ return hipErrorLaunchOutOfResources;
+ }
}
if (DEBUG_HIP_KERNARG_COPY_OPT) {
- if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs)) {
+ if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs,
+ kernargs_size)) {
kernelCommand->release();
return hipErrorOutOfMemory;
}
@@ -386,8 +399,11 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
const amd::KernelParameterDescriptor& desc = kernel->signature().at(i);
if (kernelParams == nullptr) {
assert(kernargs != nullptr);
- kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
- desc.type_ == T_POINTER /*svmBound*/);
+ // only copy if this parameter lies fully inside the passed buffer
+ if (desc.offset_ + desc.size_ <= kernargs_size) {
+ kernel->parameters().set(i, desc.size_, kernargs + desc.offset_,
+ desc.type_ == T_POINTER /*svmBound*/);
+ }
} else {
kernel->parameters().set(i, desc.size_, kernelParams[i],
desc.type_ == T_POINTER /*svmBound*/);
@@ -891,7 +907,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
if (!hip::isValid(launch.stream)) {
return hipErrorInvalidValue;
}
-
+
if (launch.stream == nullptr || launch.stream == hipStreamLegacy) {
return hipErrorInvalidResourceHandle;
}
diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp
index 3f4fd6a24c..6a20fe9e26 100644
--- a/rocclr/platform/command.cpp
+++ b/rocclr/platform/command.cpp
@@ -653,9 +653,10 @@ bool MigrateMemObjectsCommand::validateMemory() {
}
// =================================================================================================
-int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs) {
+int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs,
+ size_t kernArgsSize) {
const amd::Device& device = queue()->device();
- // Validate the kernel before submission
+ // Validate the kernel before submission
if (!queue()->device().validateKernel(kernel(), queue()->vdev(), cooperativeGroups())) {
return CL_OUT_OF_RESOURCES;
}
@@ -666,7 +667,7 @@ int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, addre
return CL_OUT_OF_RESOURCES;
}
- if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, parameters_)) {
+ if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, kernArgsSize, parameters_)) {
LogError("Cannot capture and set the kernel parameters");
return CL_OUT_OF_RESOURCES;
}
diff --git a/rocclr/platform/command.hpp b/rocclr/platform/command.hpp
index 0aebbc4b72..3dbc814c54 100644
--- a/rocclr/platform/command.hpp
+++ b/rocclr/platform/command.hpp
@@ -1265,7 +1265,7 @@ class NDRangeKernelCommand : public Command {
int32_t captureAndValidate();
// Allocate, capture and set kernel parameters
- int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs);
+ int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs, size_t kernArgsSize);
};
class NativeFnCommand : public Command {
diff --git a/rocclr/platform/kernel.cpp b/rocclr/platform/kernel.cpp
index 9eb744392b..678e98446d 100644
--- a/rocclr/platform/kernel.cpp
+++ b/rocclr/platform/kernel.cpp
@@ -104,21 +104,20 @@ address KernelParameters::alloc(device::VirtualDevice& vDev) {
}
// =================================================================================================
-bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, address mem) {
-
+bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size_t kernArgsSize,
+ address mem) {
+ amd::Memory** memories = reinterpret_cast(mem + memoryObjOffset());
for (size_t idx = 0; idx < signature_.numParameters(); ++idx) {
KernelParameterDescriptor& desc = signature_.params()[idx];
- void* value = nullptr;
- if (kernelParams != nullptr) {
- value = kernelParams[idx];
- } else {
- value = kernArgs + desc.offset_;
- }
+ void* value = kernelParams ? kernelParams[idx] : kernArgs + desc.offset_;
void* param = mem + desc.offset_;
uint32_t uint32_value = 0;
uint64_t uint64_value = 0;
+ // if using the 'extra' path and this parameter lies beyond supplied size, write zero
+ if (kernelParams == nullptr && ((desc.offset_ + desc.size_) > kernArgsSize)) {
+ value = &uint64_value;
+ }
Memory* memArg = nullptr;
- amd::Memory** memories = reinterpret_cast(mem + memoryObjOffset());
if (desc.type_ == T_POINTER && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
LP64_SWITCH(uint32_value, uint64_value) = *(LP64_SWITCH(uint32_t*, uint64_t*))value;
memArg = amd::MemObjMap::FindMemObj(*reinterpret_cast(value));
diff --git a/rocclr/platform/kernel.hpp b/rocclr/platform/kernel.hpp
index 250b91a717..4b2fd976ba 100644
--- a/rocclr/platform/kernel.hpp
+++ b/rocclr/platform/kernel.hpp
@@ -289,7 +289,7 @@ class KernelParameters : protected HeapObject {
address alloc(device::VirtualDevice& vDev);
//! Capture the arguments from signature and set.
- bool captureAndSet(void** kernelParams, address kernArgs, address mem);
+ bool captureAndSet(void** kernelParams, address kernArgs, size_t kernArgsSize, address mem);
};
/*! \brief Encapsulates a __kernel function and the argument values
@@ -412,7 +412,7 @@ class Kernel : public RuntimeObject {
static const KernelFieldMapV3Type kKernelFieldMapV3[];
static const ArgValueKindV3Type kArgValueKindV3[];
static const ArgFieldMapV3Type kArgFieldMapV3[];
-#endif
+#endif
}; // defined(USE_COMGR_LIBRARY)