diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index 74d551b895..c43c9b8c92 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -852,7 +852,7 @@ public: size_t compileSizeHint_[3]; //!< kernel compiled workgroup size hint std::string compileVecTypeHint_; //!< kernel compiled vector type hint bool uniformWorkGroupSize_; //!< uniform work group size option - bool limitWave_; //!< adaptively limit waves per SH + size_t wavesPerSimdHint_; //!< waves per simd hit }; //! Default constructor diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp index 5eded3fe2c..bca6be5280 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp @@ -59,7 +59,7 @@ const MetaDataConst ArgState[ArgStateTotal] = { "printfid:", KernelArg::PrintfBufId, { 0, 0, 0, 0, 0, 0, 0 } }, { "wsh:", KernelArg::GroupingHint, { 0, 0, 0, 0, 0, 0, 0 } }, { "vth:", KernelArg::VecTypeHint, { 0, 0, 0, 0, 0, 0, 0 } }, - { "limitwave:", KernelArg::LimitWave, { 0, 0, 0, 0, 0, 0, 0 } }, + { "WavesPerSimdHint:", KernelArg::WavesPerSimdHint,{ 0, 0, 0, 0, 0, 0, 0 } }, }; const DataTypeConst DataType[] = @@ -854,6 +854,8 @@ Kernel::Kernel( } // Workgroup info private memory size workGroupInfo_.privateMemSize_ = hwPrivateSize_; + // Default wavesPerSimdHint_ + workGroupInfo_.wavesPerSimdHint_ = ~0U; hsa_ = false; } @@ -2512,13 +2514,13 @@ NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount) } // Process next ... continue; - case KernelArg::LimitWave: + case KernelArg::WavesPerSimdHint: { uint tmp; if (!getuint(metaData, &pos, &tmp)) { return false; } - workGroupInfo_.limitWave_ = tmp!=0; + workGroupInfo_.wavesPerSimdHint_ = tmp; } continue; default: diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp index f79f019a94..05db79aad9 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.hpp @@ -197,7 +197,7 @@ public: PrintfBufId, GroupingHint, VecTypeHint, - LimitWave, + WavesPerSimdHint, TotalTypes }; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuwavelimiter.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuwavelimiter.cpp index d75005056c..7fcc0140bb 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuwavelimiter.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuwavelimiter.cpp @@ -344,8 +344,21 @@ void WaveLimiterManager::enable() { auto hwInfo = gpuDev->hwInfo(); // Enable it only for CI+, unless GPU_WAVE_LIMIT_ENABLE is set to 1 // Disabled for SI due to bug #10817 - setIfNotDefault(enable_, GPU_WAVE_LIMIT_ENABLE, - owner_->workGroupInfo()->limitWave_ && gpuDev->settings().ciPlus_); + + if (!flagIsDefault(GPU_WAVE_LIMIT_ENABLE)) { + enable_ = GPU_WAVE_LIMIT_ENABLE; + } + else { + if (gpuDev->settings().ciPlus_) { + if (owner_->workGroupInfo()->wavesPerSimdHint_ == 0) { + enable_ = true; + } + else if (owner_->workGroupInfo()->wavesPerSimdHint_ <= GPU_WAVE_LIMIT_MAX_WAVE) { + //Todo: + //fixed_ = owner_->workGroupInfo()->wavesPerSimdHint_; + } + } + } } }