diff --git a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h index 42206cb1fd..479f3341b1 100644 --- a/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h +++ b/projects/clr/rocclr/compiler/lib/include/v0_8/aclStructs.h @@ -120,8 +120,8 @@ typedef struct _acl_metadata_0_8 { bool enqueue_kernel; // RT_DEVICE_ENQUEUE uint32_t kernel_index; // RT_KERNEL_INDEX uint32_t numHiddenKernelArgs; // RT_NUM_KERNEL_HIDDEN_ARGS - uint32_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT - uint32_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT + size_t wavesPerSimdHint; // RT_WAVES_PER_SIMD_HINT + size_t wsh[3]; // RT_WORK_GROUP_SIZE_HINT size_t vecTypeHintSize; const char *vth; // RT_VEC_TYPE_HINT } aclMetadata_0_8; diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index d5df880689..a69185b57e 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -860,7 +860,36 @@ public: //! Default constructor Kernel(const std::string& name): name_(name), signature_(NULL), hsa_(false) - { memset(&workGroupInfo_, '\0', sizeof(workGroupInfo_)); } + { + // Instead of memset(&workGroupInfo_, '\0', sizeof(workGroupInfo_)); + // Due to std::string not being able to be memset to 0 + workGroupInfo_.size_ = 0; + workGroupInfo_.compileSize_[0] = 0; + workGroupInfo_.compileSize_[1] = 0; + workGroupInfo_.compileSize_[2] = 0; + workGroupInfo_.localMemSize_ = 0; + workGroupInfo_.preferredSizeMultiple_ = 0; + workGroupInfo_.privateMemSize_ = 0; + workGroupInfo_.scratchRegs_ = 0; + workGroupInfo_.wavefrontPerSIMD_ = 0; + workGroupInfo_.wavefrontSize_ = 0; + workGroupInfo_.availableGPRs_ = 0; + workGroupInfo_.usedGPRs_ = 0; + workGroupInfo_.availableSGPRs_ = 0; + workGroupInfo_.usedSGPRs_ = 0; + workGroupInfo_.availableVGPRs_ = 0; + workGroupInfo_.usedVGPRs_ = 0; + workGroupInfo_.availableLDSSize_ = 0; + workGroupInfo_.usedLDSSize_ = 0; + workGroupInfo_.availableStackSize_ = 0; + workGroupInfo_.usedStackSize_ = 0; + workGroupInfo_.compileSizeHint_[0] = 0; + workGroupInfo_.compileSizeHint_[1] = 0; + workGroupInfo_.compileSizeHint_[2] = 0; + workGroupInfo_.compileVecTypeHint_ = ""; + workGroupInfo_.uniformWorkGroupSize_ = false; + workGroupInfo_.wavesPerSimdHint_ = 0; + } //! Default destructor virtual ~Kernel(); diff --git a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp index 22b8955de4..7218f67371 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpukernel.cpp @@ -3440,9 +3440,6 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize) if (error != ACL_SUCCESS) { return false; } - // Set the argList - initArgList(reinterpret_cast(aclArgList)); - delete [] aclArgList; size_t sizeOfWorkGroupSize; error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), @@ -3527,6 +3524,42 @@ HSAILKernel::init(amd::hsa::loader::Symbol *sym, bool finalize) waveLimiter_.enable(dev().settings().ciPlus_); + size_t sizeOfWorkGroupSizeHint = sizeof(workGroupInfo_.compileSizeHint_); + error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), + RT_WORK_GROUP_SIZE_HINT, openClKernelName.c_str(), + workGroupInfo_.compileSizeHint_, &sizeOfWorkGroupSizeHint); + if (error != ACL_SUCCESS) { + return false; + } + + size_t sizeOfVecTypeHint; + error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), + RT_VEC_TYPE_HINT, openClKernelName.c_str(), + NULL, &sizeOfVecTypeHint); + if (error != ACL_SUCCESS) { + return false; + } + + if (0 != sizeOfVecTypeHint) { + char* VecTypeHint = new char[sizeOfVecTypeHint + 1]; + if (NULL == VecTypeHint) { + return false; + } + error = aclQueryInfo(dev().hsaCompiler(), prog().binaryElf(), + RT_VEC_TYPE_HINT, openClKernelName.c_str(), + VecTypeHint, &sizeOfVecTypeHint); + if (error != ACL_SUCCESS) { + return false; + } + VecTypeHint[sizeOfVecTypeHint] = '\0'; + workGroupInfo_.compileVecTypeHint_ = std::string(VecTypeHint); + delete [] VecTypeHint; + } + + // Set the argList + initArgList(reinterpret_cast(aclArgList)); + delete [] aclArgList; + return true; }