From a6f2ecd703db9da10fbf4c39414bea7b6d60beee Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 17 Apr 2023 18:41:25 -0400 Subject: [PATCH] SWDEV-231145 - Add a validation for uniform launch Change-Id: I68d61690c50060d9dcca117b248dddf69204a23f [ROCm/clr commit: e82eecf33e8d9d47a77c9b3a84bfa3144bc6cee2] --- projects/clr/hipamd/src/hip_module.cpp | 9 +++++++++ projects/clr/rocclr/device/devkernel.cpp | 3 +++ projects/clr/rocclr/device/devkernel.hpp | 16 +++++++++------- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/projects/clr/hipamd/src/hip_module.cpp b/projects/clr/hipamd/src/hip_module.cpp index b6606fce87..1675baa9f2 100644 --- a/projects/clr/hipamd/src/hip_module.cpp +++ b/projects/clr/hipamd/src/hip_module.cpp @@ -375,6 +375,15 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, if (globalWorkSizeY < blockDimY) blockDimY = globalWorkSizeY; if (globalWorkSizeZ < blockDimZ) blockDimZ = globalWorkSizeZ; + auto device = g_devices[deviceId]->devices()[0]; + // Check if it's a uniform kernel and validate dimensions + if (kernel->getDeviceKernel(*device)->getUniformWorkGroupSize()) { + if (((globalWorkSizeX % blockDimX) != 0) || + ((globalWorkSizeY % blockDimY) != 0) || + ((globalWorkSizeZ % blockDimZ) != 0)) { + return hipErrorInvalidValue; + } + } amd::Command* command = nullptr; hip::Stream* hip_stream = hip::getStream(hStream); status = ihipLaunchKernelCommand(command, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index 14b65047f7..9fdffd70e7 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -569,6 +569,9 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k case KernelField::WgpMode: kernel->SetWGPMode(buf.compare("true") == 0); break; + case KernelField::UniformWrokGroupSize: + kernel->setUniformWorkGroupSize(buf.compare("true") == 0); + break; default: return AMD_COMGR_STATUS_ERROR; } diff --git a/projects/clr/rocclr/device/devkernel.hpp b/projects/clr/rocclr/device/devkernel.hpp index 3c244ff25b..bcc7720ed4 100644 --- a/projects/clr/rocclr/device/devkernel.hpp +++ b/projects/clr/rocclr/device/devkernel.hpp @@ -242,7 +242,8 @@ enum class KernelField : uint8_t { NumSpilledSGPRs = 13, NumSpilledVGPRs = 14, Kind = 15, - WgpMode = 16 + WgpMode = 16, + UniformWrokGroupSize = 17 }; static const std::map ArgFieldMapV3 = @@ -311,8 +312,7 @@ static const std::map ArgAddrSpace {"region", CL_KERNEL_ARG_ADDRESS_PRIVATE} }; -static const std::map KernelFieldMapV3 = -{ +static const std::map KernelFieldMapV3 = { {".symbol", KernelField::SymbolName}, {".reqd_workgroup_size", KernelField::ReqdWorkGroupSize}, {".workgroup_size_hint", KernelField::WorkGroupSizeHint}, @@ -329,7 +329,8 @@ static const std::map KernelFieldMapV3 = {".sgpr_spill_count", KernelField::NumSpilledSGPRs}, {".vgpr_spill_count", KernelField::NumSpilledVGPRs}, {".kind", KernelField::Kind}, - {".workgroup_processor_mode", KernelField::WgpMode} + {".workgroup_processor_mode", KernelField::WgpMode}, + {".uniform_work_group_size", KernelField::UniformWrokGroupSize} }; #endif // defined(USE_COMGR_LIBRARY) @@ -383,12 +384,13 @@ class Kernel : public amd::HeapObject { size_t availableStackSize_; //!< available stack size size_t usedStackSize_; //!< used stack size size_t compileSizeHint_[3]; //!< kernel compiled workgroup size hint - std::string compileVecTypeHint_; //!< kernel compiled vector type hint - bool uniformWorkGroupSize_; //!< uniform work group size option size_t wavesPerSimdHint_; //!< waves per simd hit + size_t constMemSize_; //!< size of user-allocated constant memory + std::string compileVecTypeHint_; //!< kernel compiled vector type hint + int maxOccupancyPerCu_; //!< Max occupancy per compute unit in threads - size_t constMemSize_; //!< size of user-allocated constant memory bool isWGPMode_; //!< kernel compiled in WGP/cumode + bool uniformWorkGroupSize_; //!< uniform work group size option }; //! Default constructor