From d2804df5c4e436a16655aaf023bdaacf5f62ddf7 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 7 Sep 2017 17:12:29 -0400
Subject: [PATCH] P4 to Git Change 1456420 by wchau@wchau_OCL_boltzmann on
2017/09/07 16:56:26
SWDEV-130808 - Back out changelist 1456363 as it cause AMD SDK 2.9.1 Apps test failure when running OpenCL Sanity tests with Brahma build.
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#65 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl1.2/CL/cl.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl1.2/CL/cl_ext.h#16 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.0/CL/cl_ext.h#32 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.1/CL/cl.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.1/CL/cl_ext.h#9 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.2/CL/cl.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/khronos/headers/opencl2.2/CL/cl_ext.h#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#289 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#571 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#354 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#57 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#276 edit
[ROCm/clr commit: 946984cba0756691123327ccd25272959973ce34]
---
projects/clr/rocclr/runtime/device/device.hpp | 4 ----
projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp | 1 -
projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp | 8 ++------
projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp | 1 -
projects/clr/rocclr/runtime/device/pal/paldevice.cpp | 1 -
projects/clr/rocclr/runtime/device/pal/palsettings.cpp | 8 ++------
projects/clr/rocclr/runtime/device/pal/palsettings.hpp | 1 -
projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp | 1 -
projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp | 8 +-------
projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp | 3 ---
projects/clr/rocclr/runtime/utils/flags.hpp | 2 --
11 files changed, 5 insertions(+), 33 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp
index 15d8d631ab..0f53019d22 100644
--- a/projects/clr/rocclr/runtime/device/device.hpp
+++ b/projects/clr/rocclr/runtime/device/device.hpp
@@ -283,10 +283,6 @@ struct Info : public amd::EmbeddedObject {
// using the data-parallel execution model.
size_t maxWorkGroupSize_;
- //! Preferred number of work-items in a work-group executing a kernel
- // using the data-parallel execution model.
- size_t preferredWorkGroupSize_;
-
//! Number of shader engines in physical GPU
size_t numberOfShaderEngines;
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
index e33ae73aea..a512375433 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -487,7 +487,6 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
info_.maxWorkItemSizes_[0] = info_.maxWorkGroupSize_;
info_.maxWorkItemSizes_[1] = info_.maxWorkGroupSize_;
info_.maxWorkItemSizes_[2] = info_.maxWorkGroupSize_;
- info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_;
if (settings().hwLDSSize_ != 0) {
info_.localMemType_ = CL_LOCAL;
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp b/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp
index d0a331fef8..c186a307aa 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpusettings.cpp
@@ -57,8 +57,8 @@ Settings::Settings() {
doublePrecision_ = ::CL_KHR_FP64;
// Fill workgroup info size
- maxWorkGroupSize_ = 1024;
- preferredWorkGroupSize_ = 256;
+ // @todo: revisit the 256 limitation on workgroup size
+ maxWorkGroupSize_ = 256;
hostMemDirectAccess_ = HostMemDisable;
@@ -431,10 +431,6 @@ void Settings::override() {
maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE;
}
- if (GPU_PREFERRED_WORKGROUP_SIZE != 0) {
- preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE;
- }
-
// Override blit engine type
if (GPU_BLIT_ENGINE_TYPE != BlitEngineDefault) {
blitEngine_ = GPU_BLIT_ENGINE_TYPE;
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp b/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp
index 7c08a5aa5e..8bb3364890 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpusettings.hpp
@@ -78,7 +78,6 @@ class Settings : public device::Settings {
uint maxRenameSize_; //!< Maximum size for all renames
uint hwLDSSize_; //!< HW local data store size
uint maxWorkGroupSize_; //!< Requested workgroup size for this device
- uint preferredWorkGroupSize_; //!< Requested preferred workgroup size for this device
uint hostMemDirectAccess_; //!< Enables direct access to the host memory
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
uint workloadSplitSize_; //!< Workload split size
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index 03750a6254..202fd63808 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -445,7 +445,6 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.maxWorkItemSizes_[0] = info_.maxWorkGroupSize_;
info_.maxWorkItemSizes_[1] = info_.maxWorkGroupSize_;
info_.maxWorkItemSizes_[2] = info_.maxWorkGroupSize_;
- info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_;
info_.localMemType_ = CL_LOCAL;
info_.localMemSize_ = settings().hwLDSSize_;
diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
index 331a05b90f..de1675eccd 100644
--- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
@@ -57,8 +57,8 @@ Settings::Settings() {
doublePrecision_ = ::CL_KHR_FP64;
// Fill workgroup info size
- maxWorkGroupSize_ = 1024;
- preferredWorkGroupSize_ = 256;
+ // @todo: revisit the 256 limitation on workgroup size
+ maxWorkGroupSize_ = 256;
hostMemDirectAccess_ = HostMemDisable;
@@ -431,10 +431,6 @@ void Settings::override() {
maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE;
}
- if (GPU_PREFERRED_WORKGROUP_SIZE != 0) {
- preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE;
- }
-
// Override blit engine type
if (GPU_BLIT_ENGINE_TYPE != BlitEngineDefault) {
blitEngine_ = GPU_BLIT_ENGINE_TYPE;
diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp
index 39d51967f4..638a13eaef 100644
--- a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp
@@ -74,7 +74,6 @@ class Settings : public device::Settings {
uint maxRenameSize_; //!< Maximum size for all renames
uint hwLDSSize_; //!< HW local data store size
uint maxWorkGroupSize_; //!< Requested workgroup size for this device
- uint preferredWorkGroupSize_;//!< Requested preferred workgroup size for this device
uint workloadSplitSize_; //!< Workload split size
uint minWorkloadTime_; //!< Minimal workload time in 0.1 ms
uint maxWorkloadTime_; //!< Maximum workload time in 0.1 ms
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
index e719fdd9fb..16db78aa32 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -916,7 +916,6 @@ bool Device::populateOCLDeviceConstants() {
info_.maxWorkItemSizes_[0] = std::min(max_workgroup_size[0], max_work_item_size);
info_.maxWorkItemSizes_[1] = std::min(max_workgroup_size[1], max_work_item_size);
info_.maxWorkItemSizes_[2] = std::min(max_workgroup_size[2], max_work_item_size);
- info_.preferredWorkGroupSize_ = settings().preferredWorkGroupSize_;
info_.nativeVectorWidthChar_ = info_.preferredVectorWidthChar_ = 4;
info_.nativeVectorWidthShort_ = info_.preferredVectorWidthShort_ = 2;
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp
index 0000d14215..a8b9e392b3 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp
@@ -22,9 +22,7 @@ Settings::Settings() {
enableLocalMemory_ = HSA_LOCAL_MEMORY_ENABLE;
enableImageHandle_ = true;
- maxWorkGroupSize_ = 1024;
- preferredWorkGroupSize_ = 256;
-
+ maxWorkGroupSize_ = 256;
maxWorkGroupSize2DX_ = 16;
maxWorkGroupSize2DY_ = 16;
maxWorkGroupSize3DX_ = 4;
@@ -134,10 +132,6 @@ void Settings::override() {
maxWorkGroupSize_ = GPU_MAX_WORKGROUP_SIZE;
}
- if (GPU_PREFERRED_WORKGROUP_SIZE != 0) {
- preferredWorkGroupSize_ = GPU_PREFERRED_WORKGROUP_SIZE;
- }
-
if (!flagIsDefault(GPU_MAX_COMMAND_QUEUES)) {
commandQueues_ = GPU_MAX_COMMAND_QUEUES;
}
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp
index 3f94c3f783..d09d93044b 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.hpp
@@ -37,9 +37,6 @@ class Settings : public device::Settings {
//! Default max workgroup size for 1D
int maxWorkGroupSize_;
- //! Preferred workgroup size
- uint preferredWorkGroupSize_;
-
//! Default max workgroup sizes for 2D
int maxWorkGroupSize2DX_;
int maxWorkGroupSize2DY_;
diff --git a/projects/clr/rocclr/runtime/utils/flags.hpp b/projects/clr/rocclr/runtime/utils/flags.hpp
index 88ba4b50aa..892fbc3a53 100644
--- a/projects/clr/rocclr/runtime/utils/flags.hpp
+++ b/projects/clr/rocclr/runtime/utils/flags.hpp
@@ -24,8 +24,6 @@ debug(bool, CPU_USE_ALIGNMENT_MAP, false, \
"Use flag to enable alignment mapping for parameters for CPU") \
release(int, GPU_MAX_WORKGROUP_SIZE, 0, \
"Maximum number of workitems in a workgroup for GPU, 0 -use default") \
-release(int, GPU_PREFERRED_WORKGROUP_SIZE, 0, \
- "Preferred number of workitems in a workgroup for GPU, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_2D_X, 0, \
"Maximum number of workitems in a 2D workgroup for GPU, x component, 0 -use default") \
release(int, GPU_MAX_WORKGROUP_SIZE_2D_Y, 0, \