P4 to Git Change 1594730 by asalmanp@asalmanp-ocl-stg on 2018/08/16 17:32:14

SWDEV-160930 - SPECworkstation 3 benchmark GPU Compute tests fail Root cause: Caffe compute benchmark fails within SPECWorkstation app because one of the Caffe's OCL kernel tries to launch a kernel with the local_work_size of 1024 causing the clEnqueueNDRangeKernel API to return CL_INVALID_WORK_GROUP_SIZE (i.e., the maximum allowable number is 256) Proposed workaround: In order to run a kernel with a local_work_size of 1024, we check the number of used VGPRs in the Kernel and if the Kernel is not using all the available VGPRs we let the Kernel to use 1024 as the local_work_size. ReviewURLBoard = http://ocltc.amd.com/reviews/r/15638/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#58 edit
2018-08-16 17:49:03 -04:00
@@ -716,7 +716,10 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
    workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
        workGroupInfo_.compileSize_[2];
  } else {
-    workGroupInfo_.size_ = dev().info().preferredWorkGroupSize_;
+      size_t nItems = (workGroupInfo_.availableVGPRs_ / workGroupInfo_.usedVGPRs_) *
+        dev().hwInfo()->simdPerCU_ * workGroupInfo_.wavefrontSize_;
+      workGroupInfo_.size_ = nItems > dev().info().preferredWorkGroupSize_ ?
+        std::min(size_t(1024) , nItems) : dev().info().preferredWorkGroupSize_;
  }

  // Pull out printf metadata from the ELF
@@ -1435,7 +1438,12 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
  // Copy wavefront size
  workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;

-  workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
+
+  size_t nItems = (workGroupInfo_.availableVGPRs_ / workGroupInfo_.usedVGPRs_) *
+    dev().hwInfo()->simdPerCU_ * workGroupInfo_.wavefrontSize_;
+  workGroupInfo_.size_ = nItems > kernelMD->mCodeProps.mMaxFlatWorkGroupSize ?
+    std::min(size_t(1024), nItems) : kernelMD->mCodeProps.mMaxFlatWorkGroupSize;
+
  if (workGroupInfo_.size_ == 0) {
    return false;
  }