From d2804df5c4e436a16655aaf023bdaacf5f62ddf7 Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 7 Sep 2017 17:12:29 -0400 Subject: [PATCH] P4 to Git Change 1456420 by wchau@wchau_OCL_boltzmann on 2017/09/07 16:56:26 SWDEV-130808 - Back out changelist 1456363 as it cause AMD SDK 2.9.1 Apps test failure when running OpenCL Sanity tests with Brahma build. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#65 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl1.2/CL/cl.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_ext.h#16 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl.hpp#7 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h#32 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.1/CL/cl.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.1/CL/cl_ext.h#9 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.2/CL/cl.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.2/CL/cl_ext.h#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#289 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#571 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#354 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#57 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#59 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#276 edit [ROCm/clr commit: 946984cba0756691123327ccd25272959973ce34] --- projects/clr/rocclr/runtime/device/device.hpp | 4 ---- projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp | 1 - projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp | 8 ++------ projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp | 1 - projects/clr/rocclr/runtime/device/pal/paldevice.cpp | 1 - projects/clr/rocclr/runtime/device/pal/palsettings.cpp | 8 ++------ projects/clr/rocclr/runtime/device/pal/palsettings.hpp | 1 - projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp | 1 - projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp | 8 +------- projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp | 3 --- projects/clr/rocclr/runtime/utils/flags.hpp | 2 -- 11 files changed, 5 insertions(+), 33 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index 15d8d631ab..0f53019d22 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -283,10 +283,6 @@ struct Info : public amd::EmbeddedObject { // using the data-parallel execution model. size_t maxWorkGroupSize_; - //! Preferred number of work-items in a work-group executing a kernel - // using the data-parallel execution model. - size_t preferredWorkGroupSize_; - //! Number of shader engines in physical GPU size_t numberOfShaderEngines; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index e33ae73aea..a512375433 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -487,7 +487,6 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.maxWorkItemSizes_[0] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[1] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[2] = info_.maxWorkGroupSize_; - info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_; if (settings().hwLDSSize_ != 0) { info_.localMemType_ = CL_LOCAL; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp b/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp index d0a331fef8..c186a307aa 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp @@ -57,8 +57,8 @@ Settings::Settings() { doublePrecision_ = ::CL_KHR_FP64; // Fill workgroup info size - maxWorkGroupSize_ = 1024; - preferredWorkGroupSize_ = 256; + // @todo: revisit the 256 limitation on workgroup size + maxWorkGroupSize_ = 256; hostMemDirectAccess_ = HostMemDisable; @@ -431,10 +431,6 @@ void Settings::override() { maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE; } - if (GPU_PREFERRED_WORKGROUP_SIZE != 0) { - preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE; - } - // Override blit engine type if (GPU_BLIT_ENGINE_TYPE != BlitEngineDefault) { blitEngine_ = GPU_BLIT_ENGINE_TYPE; diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp b/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp index 7c08a5aa5e..8bb3364890 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp @@ -78,7 +78,6 @@ class Settings : public device::Settings { uint maxRenameSize_; //!< Maximum size for all renames uint hwLDSSize_; //!< HW local data store size uint maxWorkGroupSize_; //!< Requested workgroup size for this device - uint preferredWorkGroupSize_; //!< Requested preferred workgroup size for this device uint hostMemDirectAccess_; //!< Enables direct access to the host memory amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler uint workloadSplitSize_; //!< Workload split size diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index 03750a6254..202fd63808 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -445,7 +445,6 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.maxWorkItemSizes_[0] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[1] = info_.maxWorkGroupSize_; info_.maxWorkItemSizes_[2] = info_.maxWorkGroupSize_; - info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_; info_.localMemType_ = CL_LOCAL; info_.localMemSize_ = settings().hwLDSSize_; diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp index 331a05b90f..de1675eccd 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp @@ -57,8 +57,8 @@ Settings::Settings() { doublePrecision_ = ::CL_KHR_FP64; // Fill workgroup info size - maxWorkGroupSize_ = 1024; - preferredWorkGroupSize_ = 256; + // @todo: revisit the 256 limitation on workgroup size + maxWorkGroupSize_ = 256; hostMemDirectAccess_ = HostMemDisable; @@ -431,10 +431,6 @@ void Settings::override() { maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE; } - if (GPU_PREFERRED_WORKGROUP_SIZE != 0) { - preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE; - } - // Override blit engine type if (GPU_BLIT_ENGINE_TYPE != BlitEngineDefault) { blitEngine_ = GPU_BLIT_ENGINE_TYPE; diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp index 39d51967f4..638a13eaef 100644 --- a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp @@ -74,7 +74,6 @@ class Settings : public device::Settings { uint maxRenameSize_; //!< Maximum size for all renames uint hwLDSSize_; //!< HW local data store size uint maxWorkGroupSize_; //!< Requested workgroup size for this device - uint preferredWorkGroupSize_;//!< Requested preferred workgroup size for this device uint workloadSplitSize_; //!< Workload split size uint minWorkloadTime_; //!< Minimal workload time in 0.1 ms uint maxWorkloadTime_; //!< Maximum workload time in 0.1 ms diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp index e719fdd9fb..16db78aa32 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp @@ -916,7 +916,6 @@ bool Device::populateOCLDeviceConstants() { info_.maxWorkItemSizes_[0] = std::min(max_workgroup_size[0], max_work_item_size); info_.maxWorkItemSizes_[1] = std::min(max_workgroup_size[1], max_work_item_size); info_.maxWorkItemSizes_[2] = std::min(max_workgroup_size[2], max_work_item_size); - info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_; info_.nativeVectorWidthChar_ = info_.preferredVectorWidthChar_ = 4; info_.nativeVectorWidthShort_ = info_.preferredVectorWidthShort_ = 2; diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp index 0000d14215..a8b9e392b3 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp @@ -22,9 +22,7 @@ Settings::Settings() { enableLocalMemory_ = HSA_LOCAL_MEMORY_ENABLE; enableImageHandle_ = true; - maxWorkGroupSize_ = 1024; - preferredWorkGroupSize_ = 256; - + maxWorkGroupSize_ = 256; maxWorkGroupSize2DX_ = 16; maxWorkGroupSize2DY_ = 16; maxWorkGroupSize3DX_ = 4; @@ -134,10 +132,6 @@ void Settings::override() { maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE; } - if (GPU_PREFERRED_WORKGROUP_SIZE != 0) { - preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE; - } - if (!flagIsDefault(GPU_MAX_COMMAND_QUEUES)) { commandQueues_ = GPU_MAX_COMMAND_QUEUES; } diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp index 3f94c3f783..d09d93044b 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp @@ -37,9 +37,6 @@ class Settings : public device::Settings { //! Default max workgroup size for 1D int maxWorkGroupSize_; - //! Preferred workgroup size - uint preferredWorkGroupSize_; - //! Default max workgroup sizes for 2D int maxWorkGroupSize2DX_; int maxWorkGroupSize2DY_; diff --git a/projects/clr/rocclr/runtime/utils/flags.hpp b/projects/clr/rocclr/runtime/utils/flags.hpp index 88ba4b50aa..892fbc3a53 100644 --- a/projects/clr/rocclr/runtime/utils/flags.hpp +++ b/projects/clr/rocclr/runtime/utils/flags.hpp @@ -24,8 +24,6 @@ debug(bool, CPU_USE_ALIGNMENT_MAP, false, \ "Use flag to enable alignment mapping for parameters for CPU") \ release(int, GPU_MAX_WORKGROUP_SIZE, 0, \ "Maximum number of workitems in a workgroup for GPU, 0 -use default") \ -release(int, GPU_PREFERRED_WORKGROUP_SIZE, 0, \ - "Preferred number of workitems in a workgroup for GPU, 0 -use default") \ release(int, GPU_MAX_WORKGROUP_SIZE_2D_X, 0, \ "Maximum number of workitems in a 2D workgroup for GPU, x component, 0 -use default") \ release(int, GPU_MAX_WORKGROUP_SIZE_2D_Y, 0, \