diff --git a/rocclr/device/devkernel.cpp b/rocclr/device/devkernel.cpp index 5747e08258..2b919bf563 100644 --- a/rocclr/device/devkernel.cpp +++ b/rocclr/device/devkernel.cpp @@ -129,6 +129,7 @@ static amd_comgr_status_t populateArgs(const amd_comgr_metadata_node_t key, case amd::KernelParameterDescriptor::HiddenDefaultQueue: case amd::KernelParameterDescriptor::HiddenCompletionAction: case amd::KernelParameterDescriptor::HiddenMultiGridSync: + case amd::KernelParameterDescriptor::HiddenDynamicLdsSize: case amd::KernelParameterDescriptor::HiddenNone: lcArg->info_.hidden_ = true; break; diff --git a/rocclr/device/devkernel.hpp b/rocclr/device/devkernel.hpp index dbdf1e34ff..18f9a28e8f 100644 --- a/rocclr/device/devkernel.hpp +++ b/rocclr/device/devkernel.hpp @@ -64,7 +64,8 @@ struct KernelParameterDescriptor { HiddenPrivateBase = 27, HiddenSharedBase = 28, HiddenQueuePtr = 29, - HiddenLast = 30 + HiddenDynamicLdsSize = 30, + HiddenLast = 31 }; clk_value_type_t type_; //!< The parameter's type size_t offset_; //!< Its offset in the parameter's stack @@ -292,7 +293,8 @@ static const std::map ArgValueKindV3 = { {"hidden_grid_dims", amd::KernelParameterDescriptor::HiddenGridDims}, {"hidden_private_base", amd::KernelParameterDescriptor::HiddenPrivateBase}, {"hidden_shared_base", amd::KernelParameterDescriptor::HiddenSharedBase}, - {"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr} + {"hidden_queue_ptr", amd::KernelParameterDescriptor::HiddenQueuePtr}, + {"hidden_dynamic_lds_size", amd::KernelParameterDescriptor::HiddenDynamicLdsSize} }; static const std::map ArgAccQualV3 = { diff --git a/rocclr/device/pal/palkernel.cpp b/rocclr/device/pal/palkernel.cpp index fe5672e2fc..b0ad8cdc52 100644 --- a/rocclr/device/pal/palkernel.cpp +++ b/rocclr/device/pal/palkernel.cpp @@ -439,6 +439,9 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments( // @note: It's not a real AQL queue WriteAqlArgAt(hidden_arguments, gpu.hsaQueueMem()->vmAddress(), it.size_, it.offset_); break; + case amd::KernelParameterDescriptor::HiddenDynamicLdsSize: + WriteAqlArgAt(hidden_arguments, ldsAddress - ldsSize(), it.size_, it.offset_); + break; } } diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 5905894671..ea14273015 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -3208,6 +3208,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, case amd::KernelParameterDescriptor::HiddenQueuePtr: WriteAqlArgAt(hidden_arguments, gpu_queue_, it.size_, it.offset_); break; + case amd::KernelParameterDescriptor::HiddenDynamicLdsSize: + WriteAqlArgAt(hidden_arguments, sharedMemBytes, it.size_, it.offset_); + break; } }