From d8942b2e3789776fa020e16f135ac94dcecc3a8a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 5 Nov 2015 18:28:26 -0500 Subject: [PATCH] P4 to Git Change 1208096 by ashi1@ashi1_win50 on 2015/11/05 18:20:58 SWDEV-80864 - HSAIL Metadata Workgroup Size Hint and Vec Type Hint added to HSAIL Runtime Runtime changes required for the use of these two metadata: - Runtime's gpukernel.cpp requires new aclQueries during HSAILKernel::Init - One for quering WorkGroupSizeHint's array - Two for size of VecTypeHint and fetching VecTypeHint's string - initArgList needs to be moved to end of HSAILKernel::init to allow createSignature to get non empty values - Compiler lib's workgroup hint (wsh) needs to match runtime's type (size_t) - In Kernel constructor, instead of using memset which corrupts std::string, specifically set default workGroupInfo struct's variables Also fixed wavesPerSimdHint to use size_t to match runtime. Updated CLAssumptionCheck.cpp since aclMetadata structure was modified. Note: This is the runtime counterpart to submitted CL#1204512. (Post Review#8808, SWDEV-79695) Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/include/v0_8/aclStructs.h#5 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclStructs.h#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#260 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#308 edit ... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLAssumptionCheck.cpp#48 edit [ROCm/clr commit: b9fcb50bbcb5535d9622109817e40b23f095d3bd] --- .../compiler/lib/include/v0_8/aclStructs.h | 4 +- projects/clr/rocclr/runtime/device/device.hpp | 31 ++++++++++++++- .../rocclr/runtime/device/gpu/gpukernel.cpp | 39 +++++++++++++++++-- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h index 42206cb1fd..479f3341b1 100644 --- a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h +++ b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h @@ -120,8 +120,8 @@ typedef struct _acl_metadata_0_8 { bool enqueue_kernel; // RT_DEVICE_ENQUEUE uint32_t kernel_index; // RT_KERNEL_INDEX uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS - uint32_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT - uint32_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT + size_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT + size_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT size_t vecTypeHintSize; const char *vth; // RT_VEC_TYPE_HINT } aclMetadata_0_8; diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index d5df880689..a69185b57e 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -860,7 +860,36 @@ public: //! Default constructor Kernel(const std::string& name): name_(name), signature_(NULL), hsa_(false) - { memset(&workGroupInfo_, '\0', sizeof(workGroupInfo_)); } + { + // Instead of memset(&workGroupInfo_, '\0', sizeof(workGroupInfo_)); + // Due to std::string not being able to be memset to 0 + workGroupInfo_.size_ = 0; + workGroupInfo_.compileSize_[0] = 0; + workGroupInfo_.compileSize_[1] = 0; + workGroupInfo_.compileSize_[2] = 0; + workGroupInfo_.localMemSize_ = 0; + workGroupInfo_.preferredSizeMultiple_ = 0; + workGroupInfo_.privateMemSize_ = 0; + workGroupInfo_.scratchRegs_ = 0; + workGroupInfo_.wavefrontPerSIMD_ = 0; + workGroupInfo_.wavefrontSize_ = 0; + workGroupInfo_.availableGPRs_ = 0; + workGroupInfo_.usedGPRs_ = 0; + workGroupInfo_.availableSGPRs_ = 0; + workGroupInfo_.usedSGPRs_ = 0; + workGroupInfo_.availableVGPRs_ = 0; + workGroupInfo_.usedVGPRs_ = 0; + workGroupInfo_.availableLDSSize_ = 0; + workGroupInfo_.usedLDSSize_ = 0; + workGroupInfo_.availableStackSize_ = 0; + workGroupInfo_.usedStackSize_ = 0; + workGroupInfo_.compileSizeHint_[0] = 0; + workGroupInfo_.compileSizeHint_[1] = 0; + workGroupInfo_.compileSizeHint_[2] = 0; + workGroupInfo_.compileVecTypeHint_ = ""; + workGroupInfo_.uniformWorkGroupSize_ = false; + workGroupInfo_.wavesPerSimdHint_ = 0; + } //! Default destructor virtual ~Kernel(); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp index 22b8955de4..7218f67371 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp @@ -3440,9 +3440,6 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize) if (error != ACL_SUCCESS) { return false; } - // Set the argList - initArgList(reinterpret_cast(aclArgList)); - delete [] aclArgList; size_t sizeOfWorkGroupSize; error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), @@ -3527,6 +3524,42 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize) waveLimiter_.enable(dev().settings().ciPlus_); + size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_); + error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), + RT_WORK_GROUP_SIZE_HINT, openClKernelName.c_str(), + workGroupInfo_.compileSizeHint_, &sizeOfWorkGroupSizeHint); + if (error != ACL_SUCCESS) { + return false; + } + + size_t sizeOfVecTypeHint; + error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), + RT_VEC_TYPE_HINT, openClKernelName.c_str(), + NULL, &sizeOfVecTypeHint); + if (error != ACL_SUCCESS) { + return false; + } + + if (0 != sizeOfVecTypeHint) { + char* VecTypeHint = new char[sizeOfVecTypeHint + 1]; + if (NULL == VecTypeHint) { + return false; + } + error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), + RT_VEC_TYPE_HINT, openClKernelName.c_str(), + VecTypeHint, &sizeOfVecTypeHint); + if (error != ACL_SUCCESS) { + return false; + } + VecTypeHint[sizeOfVecTypeHint] = '\0'; + workGroupInfo_.compileVecTypeHint_ = std::string(VecTypeHint); + delete [] VecTypeHint; + } + + // Set the argList + initArgList(reinterpret_cast(aclArgList)); + delete [] aclArgList; + return true; }