diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index 9fdffd70e7..5526f26238 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -1343,7 +1343,8 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) { // Allocate the hidden arguments, but abstraction layer will skip them if (desc.info_.hidden_) { - if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) { + if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction && + !amd::IS_HIP) { setDynamicParallelFlag(true); } if (codeObjectVer() == 2) { diff --git a/projects/clr/rocclr/device/pal/palkernel.cpp b/projects/clr/rocclr/device/pal/palkernel.cpp index 035a6288a3..192a3dc109 100644 --- a/projects/clr/rocclr/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/device/pal/palkernel.cpp @@ -348,12 +348,12 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const } break; case amd::KernelParameterDescriptor::HiddenDefaultQueue: - if (vmDefQueue != 0) { + if (vmDefQueue != 0 && dynamicParallelism()) { WriteAqlArgAt(hidden_arguments, vmDefQueue, it.size_, it.offset_); } break; case amd::KernelParameterDescriptor::HiddenCompletionAction: - if (*vmParentWrap != 0) { + if (*vmParentWrap != 0 && dynamicParallelism()) { WriteAqlArgAt(hidden_arguments, *vmParentWrap, it.size_, it.offset_); } break; diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 5d87b010f0..9d98ca7f3d 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2908,7 +2908,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, case amd::KernelParameterDescriptor::HiddenDefaultQueue: { uint64_t vqVA = 0; amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev()); - if (nullptr != defQueue) { + if (nullptr != defQueue && devKernel->dynamicParallelism()) { if (!createVirtualQueue(defQueue->size()) || !createSchedulerParam()) { return false; } @@ -2919,7 +2919,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, } case amd::KernelParameterDescriptor::HiddenCompletionAction: { uint64_t spVA = 0; - if (nullptr != schedulerParam_) { + if (nullptr != schedulerParam_ && devKernel->dynamicParallelism()) { Memory* schedulerMem = dev().getRocMemory(schedulerParam_); AmdAqlWrap* wrap = reinterpret_cast( reinterpret_cast(schedulerParam_->getHostMem()) + sizeof(SchedulerParam));