diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index 15d8d631ab..0f53019d22 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -283,10 +283,6 @@ struct Info : public amd::EmbeddedObject { // using the data-parallel execution model. size_t maxWorkGroupSize_; - //! Preferred number of work-items in a work-group executing a kernel - // using the data-parallel execution model. - size_t preferredWorkGroupSize_; - //! Number of shader engines in physical GPU size_t numberOfShaderEngines; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index e33ae73aea..a512375433 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -487,7 +487,6 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.maxWorkItemSizes_[0] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[1] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[2] = info_.maxWorkGroupSize_; - info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_; if (settings().hwLDSSize_ != 0) { info_.localMemType_ = CL_LOCAL; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp b/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp index d0a331fef8..c186a307aa 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp @@ -57,8 +57,8 @@ Settings::Settings() { doublePrecision_ = ::CL_KHR_FP64; // Fill workgroup info size - maxWorkGroupSize_ = 1024; - preferredWorkGroupSize_ = 256; + // @todo: revisit the 256 limitation on workgroup size + maxWorkGroupSize_ = 256; hostMemDirectAccess_ = HostMemDisable; @@ -431,10 +431,6 @@ void Settings::override() { maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE; } - if (GPU_PREFERRED_WORKGROUP_SIZE != 0) { - preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE; - } - // Override blit engine type if (GPU_BLIT_ENGINE_TYPE != BlitEngineDefault) { blitEngine_ = GPU_BLIT_ENGINE_TYPE; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp b/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp index 7c08a5aa5e..8bb3364890 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp @@ -78,7 +78,6 @@ class Settings : public device::Settings { uint maxRenameSize_; //!< Maximum size for all renames uint hwLDSSize_; //!< HW local data store size uint maxWorkGroupSize_; //!< Requested workgroup size for this device - uint preferredWorkGroupSize_; //!< Requested preferred workgroup size for this device uint hostMemDirectAccess_; //!< Enables direct access to the host memory amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler uint workloadSplitSize_; //!< Workload split size diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index 03750a6254..202fd63808 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -445,7 +445,6 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.maxWorkItemSizes_[0] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[1] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[2] = info_.maxWorkGroupSize_; - info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_; info_.localMemType_ = CL_LOCAL; info_.localMemSize_ = settings().hwLDSSize_; diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp index 331a05b90f..de1675eccd 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp @@ -57,8 +57,8 @@ Settings::Settings() { doublePrecision_ = ::CL_KHR_FP64; // Fill workgroup info size - maxWorkGroupSize_ = 1024; - preferredWorkGroupSize_ = 256; + // @todo: revisit the 256 limitation on workgroup size + maxWorkGroupSize_ = 256; hostMemDirectAccess_ = HostMemDisable; @@ -431,10 +431,6 @@ void Settings::override() { maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE; } - if (GPU_PREFERRED_WORKGROUP_SIZE != 0) { - preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE; - } - // Override blit engine type if (GPU_BLIT_ENGINE_TYPE != BlitEngineDefault) { blitEngine_ = GPU_BLIT_ENGINE_TYPE; diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp index 39d51967f4..638a13eaef 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp @@ -74,7 +74,6 @@ class Settings : public device::Settings { uint maxRenameSize_; //!< Maximum size for all renames uint hwLDSSize_; //!< HW local data store size uint maxWorkGroupSize_; //!< Requested workgroup size for this device - uint preferredWorkGroupSize_;//!< Requested preferred workgroup size for this device uint workloadSplitSize_; //!< Workload split size uint minWorkloadTime_; //!< Minimal workload time in 0.1 ms uint maxWorkloadTime_; //!< Maximum workload time in 0.1 ms diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp index e719fdd9fb..16db78aa32 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp @@ -916,7 +916,6 @@ bool Device::populateOCLDeviceConstants() { info_.maxWorkItemSizes_[0] = std::min(max_workgroup_size[0], max_work_item_size); info_.maxWorkItemSizes_[1] = std::min(max_workgroup_size[1], max_work_item_size); info_.maxWorkItemSizes_[2] = std::min(max_workgroup_size[2], max_work_item_size); - info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_; info_.nativeVectorWidthChar_ = info_.preferredVectorWidthChar_ = 4; info_.nativeVectorWidthShort_ = info_.preferredVectorWidthShort_ = 2; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp index 0000d14215..a8b9e392b3 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp @@ -22,9 +22,7 @@ Settings::Settings() { enableLocalMemory_ = HSA_LOCAL_MEMORY_ENABLE; enableImageHandle_ = true; - maxWorkGroupSize_ = 1024; - preferredWorkGroupSize_ = 256; - + maxWorkGroupSize_ = 256; maxWorkGroupSize2DX_ = 16; maxWorkGroupSize2DY_ = 16; maxWorkGroupSize3DX_ = 4; @@ -134,10 +132,6 @@ void Settings::override() { maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE; } - if (GPU_PREFERRED_WORKGROUP_SIZE != 0) { - preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE; - } - if (!flagIsDefault(GPU_MAX_COMMAND_QUEUES)) { commandQueues_ = GPU_MAX_COMMAND_QUEUES; } diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp index 3f94c3f783..d09d93044b 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp @@ -37,9 +37,6 @@ class Settings : public device::Settings { //! Default max workgroup size for 1D int maxWorkGroupSize_; - //! Preferred workgroup size - uint preferredWorkGroupSize_; - //! Default max workgroup sizes for 2D int maxWorkGroupSize2DX_; int maxWorkGroupSize2DY_; diff --git a/projects/clr/rocclr/runtime/utils/flags.hpp b/projects/clr/rocclr/runtime/utils/flags.hpp index 88ba4b50aa..892fbc3a53 100644 --- a/projects/clr/rocclr/runtime/utils/flags.hpp +++ b/projects/clr/rocclr/runtime/utils/flags.hpp @@ -24,8 +24,6 @@ debug(bool, CPU_USE_ALIGNMENT_MAP, false, \ "Use flag to enable alignment mapping for parameters for CPU") \ release(int, GPU_MAX_WORKGROUP_SIZE, 0, \ "Maximum number of workitems in a workgroup for GPU, 0 -use default") \ -release(int, GPU_PREFERRED_WORKGROUP_SIZE, 0, \ - "Preferred number of workitems in a workgroup for GPU, 0 -use default") \ release(int, GPU_MAX_WORKGROUP_SIZE_2D_X, 0, \ "Maximum number of workitems in a 2D workgroup for GPU, x component, 0 -use default") \ release(int, GPU_MAX_WORKGROUP_SIZE_2D_Y, 0, \