From d5f37b53aecc9edfade6fff26313931d173535be Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 10 Jan 2017 17:46:11 -0500
Subject: [PATCH] P4 to Git Change 1360479 by rerrabol@RocmOnLC on 2017/01/10
17:32:55
SWDEV-108854 - Support initialization of SGPR and VGPR usage from kernel header object. Changes to fix issue reported by SWDEV-108854
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#18 edit
[ROCm/clr commit: a3c0b6525f7f1cd8efa461449465c62e6f38adfa]
---
.../rocclr/runtime/device/rocm/rocdevice.cpp | 4 ++++
.../rocclr/runtime/device/rocm/rockernel.cpp | 18 ++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
index a11471fb4d..0d5deb8788 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -285,6 +285,10 @@ bool NullDevice::init() {
if (!initCompiler(offlineDevice_)){
return false;
}
+
+ // Return without initializing offline device list
+ return true;
+
#if !defined(WITH_LIGHTNING_COMPILER)
//If there is an HSA enabled device online then skip any offline device
std::vector devices;
diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
index d5fe95568c..a92dbb0807 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
@@ -865,9 +865,23 @@ bool Kernel::init()
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
- workGroupInfo_.usedSGPRs_ = 0;
+
+ // Query kernel header object to initialize the number of
+ // SGPR's and VGPR's used by the kernel
+ const void* kernelHostPtr = nullptr;
+ if (Device::loaderQueryHostAddress(
+ reinterpret_cast(kernelCodeHandle_), &kernelHostPtr
+ ) == HSA_STATUS_SUCCESS) {
+ auto akc = reinterpret_cast(kernelHostPtr);
+ workGroupInfo_.usedSGPRs_ = akc->wavefront_sgpr_count;
+ workGroupInfo_.usedVGPRs_ = akc->workitem_vgpr_count;
+ }
+ else {
+ workGroupInfo_.usedSGPRs_ = 0;
+ workGroupInfo_.usedVGPRs_ = 0;
+ }
+
workGroupInfo_.usedStackSize_ = 0;
- workGroupInfo_.usedVGPRs_ = 0;
workGroupInfo_.wavefrontPerSIMD_ =
program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
workGroupInfo_.wavefrontSize_ = wavefront_size;