SWDEV-407533 - [ABI Break]Purge unused env vars

Change-Id: I627950e8ebb6299affc602754a20d442dbe42b14 [ROCm/clr commit: 077311153a]
2023-08-23 13:35:56 -04:00
@@ -684,102 +684,82 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize,
  if (workGroupInfo()->compileSize_[0] == 0) {
    // Find the default local workgroup size, if it wasn't specified
    if (lclWorkSize[0] == 0) {
-      if ((device().settings().overrideLclSet & (1 << (workDim - 1))) == 0) {
-        // Find threads per group
-        size_t thrPerGrp = workGroupInfo()->size_;
+      // Find threads per group
+      size_t thrPerGrp = workGroupInfo()->size_;

-        // Check if kernel uses images
-        if (flags_.imageEna_ &&
-          // and thread group is a multiple value of wavefronts
-          ((thrPerGrp % workGroupInfo()->wavefrontSize_) == 0) &&
-          // and it's 2 or 3-dimensional workload
-          (workDim > 1) && (((gblWorkSize[0] % 16) == 0) && ((gblWorkSize[1] % 16) == 0))) {
-          // Use 8x8 workgroup size if kernel has image writes
-          if (flags_.imageWriteEna_ || (thrPerGrp != device().info().preferredWorkGroupSize_)) {
-            lclWorkSize[0] = 8;
-            lclWorkSize[1] = 8;
-          }
-          else {
-            lclWorkSize[0] = 16;
-            lclWorkSize[1] = 16;
-          }
-          if (workDim == 3) {
-            lclWorkSize[2] = 1;
-          }
+      // Check if kernel uses images
+      if (flags_.imageEna_ &&
+        // and thread group is a multiple value of wavefronts
+        ((thrPerGrp % workGroupInfo()->wavefrontSize_) == 0) &&
+        // and it's 2 or 3-dimensional workload
+        (workDim > 1) && (((gblWorkSize[0] % 16) == 0) && ((gblWorkSize[1] % 16) == 0))) {
+        // Use 8x8 workgroup size if kernel has image writes
+        if (flags_.imageWriteEna_ || (thrPerGrp != device().info().preferredWorkGroupSize_)) {
+          lclWorkSize[0] = 8;
+          lclWorkSize[1] = 8;
        }
        else {
-          size_t tmp = thrPerGrp;
-          // Split the local workgroup into the most efficient way
-          for (uint d = 0; d < workDim; ++d) {
-            size_t div = tmp;
-            for (; (gblWorkSize[d] % div) != 0; div--)
-              ;
-            lclWorkSize[d] = div;
-            tmp /= div;
-          }
+          lclWorkSize[0] = 16;
+          lclWorkSize[1] = 16;
+        }
+        if (workDim == 3) {
+          lclWorkSize[2] = 1;
+        }
+      }
+      else {
+        size_t tmp = thrPerGrp;
+        // Split the local workgroup into the most efficient way
+        for (uint d = 0; d < workDim; ++d) {
+          size_t div = tmp;
+          for (; (gblWorkSize[d] % div) != 0; div--)
+            ;
+          lclWorkSize[d] = div;
+          tmp /= div;
+        }

-          if (!workGroupInfo()->uniformWorkGroupSize_) {
-            // Assuming DWORD access
-            const uint cacheLineMatch = device().info().globalMemCacheLineSize_ >> 2;
+        if (!workGroupInfo()->uniformWorkGroupSize_) {
+          // Assuming DWORD access
+          const uint cacheLineMatch = device().info().globalMemCacheLineSize_ >> 2;

-            // Check if we couldn't find optimal workload
-            if (((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
-                // or size is too small for the cache line
-              (lclWorkSize[0] < cacheLineMatch)) {
-              size_t maxSize = 0;
-              size_t maxDim = 0;
+          // Check if we couldn't find optimal workload
+          if (((lclWorkSize.product() % workGroupInfo()->wavefrontSize_) != 0) ||
+              // or size is too small for the cache line
+            (lclWorkSize[0] < cacheLineMatch)) {
+            size_t maxSize = 0;
+            size_t maxDim = 0;
+            for (uint d = 0; d < workDim; ++d) {
+              if (maxSize < gblWorkSize[d]) {
+                maxSize = gblWorkSize[d];
+                maxDim = d;
+              }
+            }
+            // Use X dimension as high priority. Runtime will assume that
+            // X dimension is more important for the address calculation
+            if ((maxDim != 0) && (gblWorkSize[0] >= (cacheLineMatch / 2))) {
+              lclWorkSize[0] = cacheLineMatch;
+              thrPerGrp /= cacheLineMatch;
+              lclWorkSize[maxDim] = thrPerGrp;
+              for (uint d = 1; d < workDim; ++d) {
+                if (d != maxDim) {
+                  lclWorkSize[d] = 1;
+                }
+              }
+            }
+            else {
+              // Check if a local workgroup has the most optimal size
+              if (thrPerGrp > maxSize) {
+                thrPerGrp = maxSize;
+              }
+              lclWorkSize[maxDim] = thrPerGrp;
              for (uint d = 0; d < workDim; ++d) {
-                if (maxSize < gblWorkSize[d]) {
-                  maxSize = gblWorkSize[d];
-                  maxDim = d;
-                }
-              }
-              // Use X dimension as high priority. Runtime will assume that
-              // X dimension is more important for the address calculation
-              if ((maxDim != 0) && (gblWorkSize[0] >= (cacheLineMatch / 2))) {
-                lclWorkSize[0] = cacheLineMatch;
-                thrPerGrp /= cacheLineMatch;
-                lclWorkSize[maxDim] = thrPerGrp;
-                for (uint d = 1; d < workDim; ++d) {
-                  if (d != maxDim) {
-                    lclWorkSize[d] = 1;
-                  }
-                }
-              }
-              else {
-                // Check if a local workgroup has the most optimal size
-                if (thrPerGrp > maxSize) {
-                  thrPerGrp = maxSize;
-                }
-                lclWorkSize[maxDim] = thrPerGrp;
-                for (uint d = 0; d < workDim; ++d) {
-                  if (d != maxDim) {
-                    lclWorkSize[d] = 1;
-                  }
+                if (d != maxDim) {
+                  lclWorkSize[d] = 1;
                }
              }
            }
          }
        }
      }
-      else {
-        // Use overrides when app doesn't provide workgroup dimensions
-        if (workDim == 1) {
-          lclWorkSize[0] = GPU_MAX_WORKGROUP_SIZE;
-        }
-        else if (workDim == 2) {
-          lclWorkSize[0] = GPU_MAX_WORKGROUP_SIZE_2D_X;
-          lclWorkSize[1] = GPU_MAX_WORKGROUP_SIZE_2D_Y;
-        }
-        else if (workDim == 3) {
-          lclWorkSize[0] = GPU_MAX_WORKGROUP_SIZE_3D_X;
-          lclWorkSize[1] = GPU_MAX_WORKGROUP_SIZE_3D_Y;
-          lclWorkSize[2] = GPU_MAX_WORKGROUP_SIZE_3D_Z;
-        }
-        else {
-          assert(0 && "Invalid workDim!");
-        }
-      }
    }
  }
  else {