From d5f37b53aecc9edfade6fff26313931d173535be Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 10 Jan 2017 17:46:11 -0500 Subject: [PATCH] P4 to Git Change 1360479 by rerrabol@RocmOnLC on 2017/01/10 17:32:55 SWDEV-108854 - Support initialization of SGPR and VGPR usage from kernel header object. Changes to fix issue reported by SWDEV-108854 Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#18 edit [ROCm/clr commit: a3c0b6525f7f1cd8efa461449465c62e6f38adfa] --- .../rocclr/runtime/device/rocm/rocdevice.cpp | 4 ++++ .../rocclr/runtime/device/rocm/rockernel.cpp | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp index a11471fb4d..0d5deb8788 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp @@ -285,6 +285,10 @@ bool NullDevice::init() { if (!initCompiler(offlineDevice_)){ return false; } + + // Return without initializing offline device list + return true; + #if !defined(WITH_LIGHTNING_COMPILER) //If there is an HSA enabled device online then skip any offline device std::vector devices; diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp index d5fe95568c..a92dbb0807 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp @@ -865,9 +865,23 @@ bool Kernel::init() workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_; workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_; workGroupInfo_.preferredSizeMultiple_ = wavefront_size; - workGroupInfo_.usedSGPRs_ = 0; + + // Query kernel header object to initialize the number of + // SGPR's and VGPR's used by the kernel + const void* kernelHostPtr = nullptr; + if (Device::loaderQueryHostAddress( + reinterpret_cast(kernelCodeHandle_), &kernelHostPtr + ) == HSA_STATUS_SUCCESS) { + auto akc = reinterpret_cast(kernelHostPtr); + workGroupInfo_.usedSGPRs_ = akc->wavefront_sgpr_count; + workGroupInfo_.usedVGPRs_ = akc->workitem_vgpr_count; + } + else { + workGroupInfo_.usedSGPRs_ = 0; + workGroupInfo_.usedVGPRs_ = 0; + } + workGroupInfo_.usedStackSize_ = 0; - workGroupInfo_.usedVGPRs_ = 0; workGroupInfo_.wavefrontPerSIMD_ = program_->dev().info().maxWorkItemSizes_[0] / wavefront_size; workGroupInfo_.wavefrontSize_ = wavefront_size;