From 2a02d2c2f318f80795dea283d5d2d44870f55fd7 Mon Sep 17 00:00:00 2001 From: "Betigeri, Sourabh" Date: Sun, 3 Aug 2025 17:32:52 -0700 Subject: [PATCH] SWDEV-545273 - Respect HIP_LAUNCH_PARAM_BUFFER_SIZE (#770) --- hipamd/src/hip_module.cpp | 24 ++++++++++++++++++++---- rocclr/platform/command.cpp | 7 ++++--- rocclr/platform/command.hpp | 2 +- rocclr/platform/kernel.cpp | 17 ++++++++--------- rocclr/platform/kernel.hpp | 4 ++-- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index e1091d1dc8..3f818f2579 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -362,6 +362,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, } address kernargs = nullptr; + size_t kernargs_size = 0; // 'extra' is a struct that contains the following info: { // HIP_LAUNCH_PARAM_BUFFER_POINTER, kernargs, // HIP_LAUNCH_PARAM_BUFFER_SIZE, &kernargs_size, @@ -373,10 +374,22 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, return hipErrorInvalidValue; } kernargs = reinterpret_cast
(extra[1]); + kernargs_size = *reinterpret_cast(extra[3]); + const uint32_t numParams = kernel->signature().numParameters(); + const bool expectsArgs = (numParams > 0); + const bool hasArgs = (kernargs != nullptr && kernargs_size > 0); + // we either expected args but got none, or didn’t expect any but got some + if (expectsArgs == true && hasArgs == false) { + return hipErrorInvalidValue; + } + if (expectsArgs == false && kernargs_size != 0) { + return hipErrorLaunchOutOfResources; + } } if (DEBUG_HIP_KERNARG_COPY_OPT) { - if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs)) { + if (CL_SUCCESS != kernelCommand->AllocCaptureSetValidate(kernelParams, kernargs, + kernargs_size)) { kernelCommand->release(); return hipErrorOutOfMemory; } @@ -386,8 +399,11 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, const amd::KernelParameterDescriptor& desc = kernel->signature().at(i); if (kernelParams == nullptr) { assert(kernargs != nullptr); - kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, - desc.type_ == T_POINTER /*svmBound*/); + // only copy if this parameter lies fully inside the passed buffer + if (desc.offset_ + desc.size_ <= kernargs_size) { + kernel->parameters().set(i, desc.size_, kernargs + desc.offset_, + desc.type_ == T_POINTER /*svmBound*/); + } } else { kernel->parameters().set(i, desc.size_, kernelParams[i], desc.type_ == T_POINTER /*svmBound*/); @@ -891,7 +907,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL if (!hip::isValid(launch.stream)) { return hipErrorInvalidValue; } - + if (launch.stream == nullptr || launch.stream == hipStreamLegacy) { return hipErrorInvalidResourceHandle; } diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp index 3f4fd6a24c..6a20fe9e26 100644 --- a/rocclr/platform/command.cpp +++ b/rocclr/platform/command.cpp @@ -653,9 +653,10 @@ bool MigrateMemObjectsCommand::validateMemory() { } // ================================================================================================= -int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs) { +int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, address kernArgs, + size_t kernArgsSize) { const amd::Device& device = queue()->device(); - // Validate the kernel before submission + // Validate the kernel before submission if (!queue()->device().validateKernel(kernel(), queue()->vdev(), cooperativeGroups())) { return CL_OUT_OF_RESOURCES; } @@ -666,7 +667,7 @@ int32_t NDRangeKernelCommand::AllocCaptureSetValidate(void** kernelParams, addre return CL_OUT_OF_RESOURCES; } - if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, parameters_)) { + if (!kernel().parameters().captureAndSet(kernelParams, kernArgs, kernArgsSize, parameters_)) { LogError("Cannot capture and set the kernel parameters"); return CL_OUT_OF_RESOURCES; } diff --git a/rocclr/platform/command.hpp b/rocclr/platform/command.hpp index 0aebbc4b72..3dbc814c54 100644 --- a/rocclr/platform/command.hpp +++ b/rocclr/platform/command.hpp @@ -1265,7 +1265,7 @@ class NDRangeKernelCommand : public Command { int32_t captureAndValidate(); // Allocate, capture and set kernel parameters - int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs); + int32_t AllocCaptureSetValidate(void** kernelParams, address kernArgs, size_t kernArgsSize); }; class NativeFnCommand : public Command { diff --git a/rocclr/platform/kernel.cpp b/rocclr/platform/kernel.cpp index 9eb744392b..678e98446d 100644 --- a/rocclr/platform/kernel.cpp +++ b/rocclr/platform/kernel.cpp @@ -104,21 +104,20 @@ address KernelParameters::alloc(device::VirtualDevice& vDev) { } // ================================================================================================= -bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, address mem) { - +bool KernelParameters::captureAndSet(void** kernelParams, address kernArgs, size_t kernArgsSize, + address mem) { + amd::Memory** memories = reinterpret_cast(mem + memoryObjOffset()); for (size_t idx = 0; idx < signature_.numParameters(); ++idx) { KernelParameterDescriptor& desc = signature_.params()[idx]; - void* value = nullptr; - if (kernelParams != nullptr) { - value = kernelParams[idx]; - } else { - value = kernArgs + desc.offset_; - } + void* value = kernelParams ? kernelParams[idx] : kernArgs + desc.offset_; void* param = mem + desc.offset_; uint32_t uint32_value = 0; uint64_t uint64_value = 0; + // if using the 'extra' path and this parameter lies beyond supplied size, write zero + if (kernelParams == nullptr && ((desc.offset_ + desc.size_) > kernArgsSize)) { + value = &uint64_value; + } Memory* memArg = nullptr; - amd::Memory** memories = reinterpret_cast(mem + memoryObjOffset()); if (desc.type_ == T_POINTER && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) { LP64_SWITCH(uint32_value, uint64_value) = *(LP64_SWITCH(uint32_t*, uint64_t*))value; memArg = amd::MemObjMap::FindMemObj(*reinterpret_cast(value)); diff --git a/rocclr/platform/kernel.hpp b/rocclr/platform/kernel.hpp index 250b91a717..4b2fd976ba 100644 --- a/rocclr/platform/kernel.hpp +++ b/rocclr/platform/kernel.hpp @@ -289,7 +289,7 @@ class KernelParameters : protected HeapObject { address alloc(device::VirtualDevice& vDev); //! Capture the arguments from signature and set. - bool captureAndSet(void** kernelParams, address kernArgs, address mem); + bool captureAndSet(void** kernelParams, address kernArgs, size_t kernArgsSize, address mem); }; /*! \brief Encapsulates a __kernel function and the argument values @@ -412,7 +412,7 @@ class Kernel : public RuntimeObject { static const KernelFieldMapV3Type kKernelFieldMapV3[]; static const ArgValueKindV3Type kArgValueKindV3[]; static const ArgFieldMapV3Type kArgFieldMapV3[]; -#endif +#endif }; // defined(USE_COMGR_LIBRARY)