From d8942b2e3789776fa020e16f135ac94dcecc3a8a Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 5 Nov 2015 18:28:26 -0500
Subject: [PATCH] P4 to Git Change 1208096 by ashi1@ashi1_win50 on 2015/11/05
18:20:58
SWDEV-80864 - HSAIL Metadata Workgroup Size Hint and Vec Type Hint added to HSAIL Runtime
Runtime changes required for the use of these two metadata:
- Runtime's gpukernel.cpp requires new aclQueries during HSAILKernel::Init
- One for quering WorkGroupSizeHint's array
- Two for size of VecTypeHint and fetching VecTypeHint's string
- initArgList needs to be moved to end of HSAILKernel::init to allow createSignature to get non empty values
- Compiler lib's workgroup hint (wsh) needs to match runtime's type (size_t)
- In Kernel constructor, instead of using memset which corrupts std::string, specifically set default workGroupInfo struct's variables
Also fixed wavesPerSimdHint to use size_t to match runtime.
Updated CLAssumptionCheck.cpp since aclMetadata structure was modified.
Note: This is the runtime counterpart to submitted CL#1204512. (Post Review#8808, SWDEV-79695)
Affected files ...
... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/include/v0_8/aclStructs.h#5 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclStructs.h#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#260 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#308 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLAssumptionCheck.cpp#48 edit
[ROCm/clr commit: b9fcb50bbcb5535d9622109817e40b23f095d3bd]
---
.../compiler/lib/include/v0_8/aclStructs.h | 4 +-
projects/clr/rocclr/runtime/device/device.hpp | 31 ++++++++++++++-
.../rocclr/runtime/device/gpu/gpukernel.cpp | 39 +++++++++++++++++--
3 files changed, 68 insertions(+), 6 deletions(-)
diff --git a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h
index 42206cb1fd..479f3341b1 100644
--- a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h
+++ b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h
@@ -120,8 +120,8 @@ typedef struct _acl_metadata_0_8 {
bool enqueue_kernel; // RT_DEVICE_ENQUEUE
uint32_t kernel_index; // RT_KERNEL_INDEX
uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS
- uint32_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT
- uint32_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT
+ size_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT
+ size_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT
size_t vecTypeHintSize;
const char *vth; // RT_VEC_TYPE_HINT
} aclMetadata_0_8;
diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp
index d5df880689..a69185b57e 100644
--- a/projects/clr/rocclr/runtime/device/device.hpp
+++ b/projects/clr/rocclr/runtime/device/device.hpp
@@ -860,7 +860,36 @@ public:
//! Default constructor
Kernel(const std::string& name): name_(name), signature_(NULL), hsa_(false)
- { memset(&workGroupInfo_, '\0', sizeof(workGroupInfo_)); }
+ {
+ // Instead of memset(&workGroupInfo_, '\0', sizeof(workGroupInfo_));
+ // Due to std::string not being able to be memset to 0
+ workGroupInfo_.size_ = 0;
+ workGroupInfo_.compileSize_[0] = 0;
+ workGroupInfo_.compileSize_[1] = 0;
+ workGroupInfo_.compileSize_[2] = 0;
+ workGroupInfo_.localMemSize_ = 0;
+ workGroupInfo_.preferredSizeMultiple_ = 0;
+ workGroupInfo_.privateMemSize_ = 0;
+ workGroupInfo_.scratchRegs_ = 0;
+ workGroupInfo_.wavefrontPerSIMD_ = 0;
+ workGroupInfo_.wavefrontSize_ = 0;
+ workGroupInfo_.availableGPRs_ = 0;
+ workGroupInfo_.usedGPRs_ = 0;
+ workGroupInfo_.availableSGPRs_ = 0;
+ workGroupInfo_.usedSGPRs_ = 0;
+ workGroupInfo_.availableVGPRs_ = 0;
+ workGroupInfo_.usedVGPRs_ = 0;
+ workGroupInfo_.availableLDSSize_ = 0;
+ workGroupInfo_.usedLDSSize_ = 0;
+ workGroupInfo_.availableStackSize_ = 0;
+ workGroupInfo_.usedStackSize_ = 0;
+ workGroupInfo_.compileSizeHint_[0] = 0;
+ workGroupInfo_.compileSizeHint_[1] = 0;
+ workGroupInfo_.compileSizeHint_[2] = 0;
+ workGroupInfo_.compileVecTypeHint_ = "";
+ workGroupInfo_.uniformWorkGroupSize_ = false;
+ workGroupInfo_.wavesPerSimdHint_ = 0;
+ }
//! Default destructor
virtual ~Kernel();
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp
index 22b8955de4..7218f67371 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp
@@ -3440,9 +3440,6 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
if (error != ACL_SUCCESS) {
return false;
}
- // Set the argList
- initArgList(reinterpret_cast(aclArgList));
- delete [] aclArgList;
size_t sizeOfWorkGroupSize;
error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(),
@@ -3527,6 +3524,42 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize)
waveLimiter_.enable(dev().settings().ciPlus_);
+ size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_);
+ error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(),
+ RT_WORK_GROUP_SIZE_HINT, openClKernelName.c_str(),
+ workGroupInfo_.compileSizeHint_, &sizeOfWorkGroupSizeHint);
+ if (error != ACL_SUCCESS) {
+ return false;
+ }
+
+ size_t sizeOfVecTypeHint;
+ error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(),
+ RT_VEC_TYPE_HINT, openClKernelName.c_str(),
+ NULL, &sizeOfVecTypeHint);
+ if (error != ACL_SUCCESS) {
+ return false;
+ }
+
+ if (0 != sizeOfVecTypeHint) {
+ char* VecTypeHint = new char[sizeOfVecTypeHint + 1];
+ if (NULL == VecTypeHint) {
+ return false;
+ }
+ error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(),
+ RT_VEC_TYPE_HINT, openClKernelName.c_str(),
+ VecTypeHint, &sizeOfVecTypeHint);
+ if (error != ACL_SUCCESS) {
+ return false;
+ }
+ VecTypeHint[sizeOfVecTypeHint] = '\0';
+ workGroupInfo_.compileVecTypeHint_ = std::string(VecTypeHint);
+ delete [] VecTypeHint;
+ }
+
+ // Set the argList
+ initArgList(reinterpret_cast(aclArgList));
+ delete [] aclArgList;
+
return true;
}