From ba7dc25ca8ba00b463fbd1718be5ffb6050c8bc1 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 21 Jan 2020 18:24:20 -0500 Subject: [PATCH] P4 to Git Change 2061164 by gandryey@gera-win10 on 2020/01/21 18:19:35 SWDEV-197836 - Drop the use of llvm header files in opencl runtime - Remove llvm::AMDGPU::HSAMD::Kernel::Metadata usage Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#33 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#78 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#86 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#55 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#28 edit [ROCm/clr commit: 7cb078bf637a8122a35f692a57df729a09c7ef84] --- .../clr/rocclr/runtime/device/devkernel.cpp | 142 +++++++++--------- .../clr/rocclr/runtime/device/devkernel.hpp | 45 ++++-- .../clr/rocclr/runtime/device/devprogram.cpp | 5 +- .../clr/rocclr/runtime/device/devprogram.hpp | 24 +-- .../rocclr/runtime/device/pal/palkernel.cpp | 40 ++--- .../rocclr/runtime/device/pal/palkernel.hpp | 7 +- .../rocclr/runtime/device/rocm/rockernel.cpp | 51 +++---- .../rocclr/runtime/device/rocm/rockernel.hpp | 19 --- 8 files changed, 132 insertions(+), 201 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp index 55429f87f8..3cb05bfc8f 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.cpp +++ b/projects/clr/rocclr/runtime/device/devkernel.cpp @@ -215,22 +215,26 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key, return AMD_COMGR_STATUS_ERROR; } - KernelMD* kernelMD = static_cast(data); + device::Kernel* kernel = static_cast(data); switch (itAttrField->second) { case AttrField::ReqdWorkGroupSize: { status = amd::Comgr::get_metadata_list_size(value, &size); if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) { + std::vector wrkSize; for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) { amd_comgr_metadata_node_t workgroupSize; status = amd::Comgr::index_list_metadata(value, i, &workgroupSize); if (status == AMD_COMGR_STATUS_SUCCESS && getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) { - kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str())); + wrkSize.push_back(atoi(buf.c_str())); } amd::Comgr::destroy_metadata(workgroupSize); } + if (!wrkSize.empty()) { + kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]); + } } } break; @@ -238,31 +242,31 @@ static amd_comgr_status_t populateAttrs(const amd_comgr_metadata_node_t key, { status = amd::Comgr::get_metadata_list_size(value, &size); if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) { + std::vector hintSize; for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) { amd_comgr_metadata_node_t workgroupSizeHint; status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint); if (status == AMD_COMGR_STATUS_SUCCESS && getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) { - kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str())); + hintSize.push_back(atoi(buf.c_str())); } amd::Comgr::destroy_metadata(workgroupSizeHint); } + if (!hintSize.empty()) { + kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]); + } } } break; case AttrField::VecTypeHint: - { - if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) { - kernelMD->mAttrs.mVecTypeHint = buf; - } + if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) { + kernel->setVecTypeHint(buf); } break; case AttrField::RuntimeHandle: - { - if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) { - kernelMD->mAttrs.mRuntimeHandle = buf; - } + if (getMetaBuf(value,&buf) == AMD_COMGR_STATUS_SUCCESS) { + kernel->setRuntimeHandle(buf); } break; default: @@ -299,43 +303,47 @@ static amd_comgr_status_t populateCodeProps(const amd_comgr_metadata_node_t key, status = getMetaBuf(value, &buf); } - KernelMD* kernelMD = static_cast(data); + device::Kernel* kernel = static_cast(data); switch (itCodePropField->second) { case CodePropField::KernargSegmentSize: - kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str()); + kernel->SetKernargSegmentByteSize(atoi(buf.c_str())); break; case CodePropField::GroupSegmentFixedSize: - kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str()); + kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str())); break; case CodePropField::PrivateSegmentFixedSize: - kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str()); + kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str())); break; case CodePropField::KernargSegmentAlign: - kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str()); + kernel->SetKernargSegmentAlignment(atoi(buf.c_str())); break; case CodePropField::WavefrontSize: - kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str()); + kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str()); break; case CodePropField::NumSGPRs: - kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str()); + kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str()); break; case CodePropField::NumVGPRs: - kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str()); + kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str()); break; case CodePropField::MaxFlatWorkGroupSize: - kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str()); + kernel->workGroupInfo()->size_ = atoi(buf.c_str()); break; - case CodePropField::IsDynamicCallStack: - kernelMD->mCodeProps.mIsDynamicCallStack = (buf.compare("true") == 0); + case CodePropField::IsDynamicCallStack: { + size_t mIsDynamicCallStack = (buf.compare("true") == 0); + } break; - case CodePropField::IsXNACKEnabled: - kernelMD->mCodeProps.mIsXNACKEnabled = (buf.compare("true") == 0); + case CodePropField::IsXNACKEnabled: { + size_t mIsXNACKEnabled = (buf.compare("true") == 0); + } break; - case CodePropField::NumSpilledSGPRs: - kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str()); + case CodePropField::NumSpilledSGPRs: { + size_t mNumSpilledSGPRs = atoi(buf.c_str()); + } break; - case CodePropField::NumSpilledVGPRs: - kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str()); + case CodePropField::NumSpilledVGPRs: { + size_t mNumSpilledVGPRs = atoi(buf.c_str()); + } break; default: return AMD_COMGR_STATUS_ERROR; @@ -501,76 +509,86 @@ static amd_comgr_status_t populateKernelMetaV3(const amd_comgr_metadata_node_t k return AMD_COMGR_STATUS_ERROR; } - KernelMD* kernelMD = static_cast(data); + device::Kernel* kernel = static_cast(data); switch (itKernelField->second) { case KernelField::ReqdWorkGroupSize: status = amd::Comgr::get_metadata_list_size(value, &size); if (size == 3 && status == AMD_COMGR_STATUS_SUCCESS) { + std::vector wrkSize; for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) { amd_comgr_metadata_node_t workgroupSize; status = amd::Comgr::index_list_metadata(value, i, &workgroupSize); if (status == AMD_COMGR_STATUS_SUCCESS && getMetaBuf(workgroupSize, &buf) == AMD_COMGR_STATUS_SUCCESS) { - kernelMD->mAttrs.mReqdWorkGroupSize.push_back(atoi(buf.c_str())); + wrkSize.push_back(atoi(buf.c_str())); } amd::Comgr::destroy_metadata(workgroupSize); } + if (!wrkSize.empty()) { + kernel->setReqdWorkGroupSize(wrkSize[0], wrkSize[1], wrkSize[2]); + } } break; case KernelField::WorkGroupSizeHint: status = amd::Comgr::get_metadata_list_size(value, &size); if (status == AMD_COMGR_STATUS_SUCCESS && size == 3) { + std::vector hintSize; for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) { amd_comgr_metadata_node_t workgroupSizeHint; status = amd::Comgr::index_list_metadata(value, i, &workgroupSizeHint); if (status == AMD_COMGR_STATUS_SUCCESS && getMetaBuf(workgroupSizeHint, &buf) == AMD_COMGR_STATUS_SUCCESS) { - kernelMD->mAttrs.mWorkGroupSizeHint.push_back(atoi(buf.c_str())); + hintSize.push_back(atoi(buf.c_str())); } amd::Comgr::destroy_metadata(workgroupSizeHint); } + if (!hintSize.empty()) { + kernel->setWorkGroupSizeHint(hintSize[0], hintSize[1], hintSize[2]); + } } break; case KernelField::VecTypeHint: - kernelMD->mAttrs.mVecTypeHint = buf; + kernel->setVecTypeHint(buf); break; case KernelField::DeviceEnqueueSymbol: - kernelMD->mAttrs.mRuntimeHandle = buf; + kernel->setRuntimeHandle(buf); break; case KernelField::KernargSegmentSize: - kernelMD->mCodeProps.mKernargSegmentSize = atoi(buf.c_str()); + kernel->SetKernargSegmentByteSize(atoi(buf.c_str())); break; case KernelField::GroupSegmentFixedSize: - kernelMD->mCodeProps.mGroupSegmentFixedSize = atoi(buf.c_str()); + kernel->SetWorkgroupGroupSegmentByteSize(atoi(buf.c_str())); break; case KernelField::PrivateSegmentFixedSize: - kernelMD->mCodeProps.mPrivateSegmentFixedSize = atoi(buf.c_str()); + kernel->SetWorkitemPrivateSegmentByteSize(atoi(buf.c_str())); break; case KernelField::KernargSegmentAlign: - kernelMD->mCodeProps.mKernargSegmentAlign = atoi(buf.c_str()); + kernel->SetKernargSegmentAlignment(atoi(buf.c_str())); break; case KernelField::WavefrontSize: - kernelMD->mCodeProps.mWavefrontSize = atoi(buf.c_str()); + kernel->workGroupInfo()->wavefrontSize_ = atoi(buf.c_str()); break; case KernelField::NumSGPRs: - kernelMD->mCodeProps.mNumSGPRs = atoi(buf.c_str()); + kernel->workGroupInfo()->usedSGPRs_ = atoi(buf.c_str()); break; case KernelField::NumVGPRs: - kernelMD->mCodeProps.mNumVGPRs = atoi(buf.c_str()); + kernel->workGroupInfo()->usedVGPRs_ = atoi(buf.c_str()); break; case KernelField::MaxFlatWorkGroupSize: - kernelMD->mCodeProps.mMaxFlatWorkGroupSize = atoi(buf.c_str()); + kernel->workGroupInfo()->size_ = atoi(buf.c_str()); break; - case KernelField::NumSpilledSGPRs: - kernelMD->mCodeProps.mNumSpilledSGPRs = atoi(buf.c_str()); + case KernelField::NumSpilledSGPRs: { + size_t mNumSpilledSGPRs = atoi(buf.c_str()); + } break; - case KernelField::NumSpilledVGPRs: - kernelMD->mCodeProps.mNumSpilledVGPRs = atoi(buf.c_str()); + case KernelField::NumSpilledVGPRs: { + size_t mNumSpilledVGPRs = atoi(buf.c_str()); + } break; case KernelField::SymbolName: - kernelMD->mSymbolName = buf; + kernel->SetSymbolName(buf); break; default: return AMD_COMGR_STATUS_ERROR; @@ -1087,8 +1105,7 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a // ================================================================================================ #if defined(USE_COMGR_LIBRARY) -bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode, - KernelMD* kernelMD) { +bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode) { InitParameters(kernelMetaNode); @@ -1105,8 +1122,10 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta amd_comgr_metadata_node_t symbolName; status = amd::Comgr::metadata_lookup(kernelMetaNode, "SymbolName", &symbolName); if (status == AMD_COMGR_STATUS_SUCCESS) { - status = getMetaBuf(symbolName, &(kernelMD->mSymbolName)); + std::string name; + status = getMetaBuf(symbolName, &name); amd::Comgr::destroy_metadata(symbolName); + SetSymbolName(name); } amd_comgr_metadata_node_t attrMeta; @@ -1114,7 +1133,7 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::iterate_map_metadata(attrMeta, populateAttrs, - static_cast(kernelMD)); + static_cast(this)); amd::Comgr::destroy_metadata(attrMeta); } } @@ -1127,14 +1146,14 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::iterate_map_metadata(codePropsMeta, populateCodeProps, - static_cast(kernelMD)); + static_cast(this)); amd::Comgr::destroy_metadata(codePropsMeta); } } break; case 3: { status = amd::Comgr::iterate_map_metadata(kernelMetaNode, populateKernelMetaV3, - static_cast(kernelMD)); + static_cast(this)); } break; default: @@ -1146,25 +1165,6 @@ bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMeta return false; } - // Setup the workgroup info based on the attributes and code properties - if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) { - const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize; - workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0]; - workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1]; - workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2]; - } - - if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) { - const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint; - workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0]; - workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1]; - workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2]; - } - - if (!kernelMD->mAttrs.mVecTypeHint.empty()) { - workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str(); - } - return true; } diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp index bb8ed716aa..8cbcce8173 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.hpp +++ b/projects/clr/rocclr/runtime/device/devkernel.hpp @@ -64,14 +64,6 @@ struct KernelParameterDescriptor { } #if defined(USE_COMGR_LIBRARY) -namespace llvm { - namespace AMDGPU { - namespace HSAMD { - namespace Kernel { - struct Metadata; -}}}} -typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; - //! Runtime handle structure for device enqueue struct RuntimeHandle { uint64_t kernel_handle; //!< Pointer to amd_kernel_code_s or kernel_descriptor_t @@ -80,7 +72,6 @@ struct RuntimeHandle { }; #include "amd_comgr.h" -#include "llvm/Support/AMDGPUMetadata.h" // for Code Object V3 enum class ArgField : uint8_t { @@ -387,6 +378,8 @@ class Kernel : public amd::HeapObject { //! Returns the kernel info structure const WorkGroupInfo* workGroupInfo() const { return &workGroupInfo_; } + //! Returns the kernel info structure for filling in + WorkGroupInfo* workGroupInfo() { return &workGroupInfo_; } //! Returns the kernel signature const amd::KernelSignature& signature() const { return *signature_; } @@ -438,6 +431,9 @@ class Kernel : public amd::HeapObject { void setPreferredSizeMultiple(size_t size) { workGroupInfo_.preferredSizeMultiple_ = size; } + const std::string& RuntimeHandle() const { return runtimeHandle_; } + void setRuntimeHandle(const std::string& handle) { runtimeHandle_ = handle; } + //! Return the build log const std::string& buildLog() const { return buildLog_; } @@ -476,19 +472,29 @@ class Kernel : public amd::HeapObject { amd::NDRange& lclWorkSize //!< Calculated local work size ) const; + const uint64_t KernelCodeHandle() const { return kernelCodeHandle_; } + + const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; } + void SetWorkgroupGroupSegmentByteSize(uint32_t size) { workgroupGroupSegmentByteSize_ = size; } + + const uint32_t WorkitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; } + void SetWorkitemPrivateSegmentByteSize(uint32_t size) { workitemPrivateSegmentByteSize_ = size; } + + const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; } + void SetKernargSegmentByteSize(uint32_t size) { kernargSegmentByteSize_ = size; } + + const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; } + void SetKernargSegmentAlignment(uint32_t align) { kernargSegmentAlignment_ = align; } + + void SetSymbolName(const std::string& name) { symbolName_ = name; } + protected: //! Initializes the abstraction layer kernel parameters #if defined(USE_COMGR_LIBRARY) void InitParameters(const amd_comgr_metadata_node_t kernelMD); - //! Get ther kernel metadata - bool GetKernelMetadata(const amd_comgr_metadata_node_t programMD, - const std::string& name, - amd_comgr_metadata_node_t* kernelNode); - //! Retrieve kernel attribute and code properties metadata - bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode, - KernelMD* kernelMD); + bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode); //! Retrieve the available SGPRs and VGPRs bool SetAvailableSgprVgpr(const std::string& targetIdent); @@ -524,6 +530,13 @@ class Kernel : public amd::HeapObject { std::string buildLog_; //!< build log std::vector printf_; //!< Format strings for GPU printf support WaveLimiterManager waveLimiter_; //!< adaptively control number of waves + std::string runtimeHandle_; //!< Runtime handle for context loader + + uint64_t kernelCodeHandle_ = 0; //!< Kernel code handle (aka amd_kernel_code_t) + uint32_t workgroupGroupSegmentByteSize_ = 0; + uint32_t workitemPrivateSegmentByteSize_ = 0; + uint32_t kernargSegmentByteSize_ = 0; //!< Size of kernel argument buffer + uint32_t kernargSegmentAlignment_ = 0; union Flags { struct { diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp index 645c84b31c..2e810586e1 100644 --- a/projects/clr/rocclr/runtime/device/devprogram.cpp +++ b/projects/clr/rocclr/runtime/device/devprogram.cpp @@ -66,8 +66,7 @@ Program::Program(amd::Device& device, amd::Program& owner) buildError_(CL_SUCCESS), machineTarget_(nullptr), globalVariableTotalSize_(0), - programOptions_(nullptr), - metadata_{0} + programOptions_(nullptr) { memset(&binOpts_, 0, sizeof(binOpts_)); binOpts_.struct_size = sizeof(binOpts_); @@ -92,8 +91,6 @@ Program::~Program() { amd::Comgr::destroy_metadata(kernelMeta.second); } amd::Comgr::destroy_metadata(metadata_); -#else - delete metadata_; #endif } } diff --git a/projects/clr/rocclr/runtime/device/devprogram.hpp b/projects/clr/rocclr/runtime/device/devprogram.hpp index 33a18b3874..66626472d3 100644 --- a/projects/clr/rocclr/runtime/device/devprogram.hpp +++ b/projects/clr/rocclr/runtime/device/devprogram.hpp @@ -11,24 +11,8 @@ #if defined(USE_COMGR_LIBRARY) #include "amd_comgr.h" - -namespace llvm { - namespace AMDGPU { - namespace HSAMD { - struct Metadata; - namespace Kernel { - struct Metadata; -}}}} - -#define LC_METADATA 1 -typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; -typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; #endif // defined(USE_COMGR_LIBRARY) -#ifndef LC_METADATA -typedef char CodeObjectMD; -#endif - namespace amd { namespace hsa { namespace loader { @@ -124,11 +108,9 @@ class Program : public amd::HeapObject { #if defined(USE_COMGR_LIBRARY) - amd_comgr_metadata_node_t metadata_; //!< COMgr metadata - uint32_t codeObjectVer_; //!< version of code object + amd_comgr_metadata_node_t metadata_ = {}; //!< COMgr metadata + uint32_t codeObjectVer_; //!< version of code object std::map kernelMetadataMap_; //!< Map of kernel metadata -#else - CodeObjectMD* metadata_; //!< Runtime metadata #endif std::vector undef_mem_obj_; @@ -227,8 +209,6 @@ class Program : public amd::HeapObject { } const uint32_t codeObjectVer() const { return codeObjectVer_; } -#else - const CodeObjectMD* metadata() const { return metadata_; } #endif //! Get the machine target for the program diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index 33f9e39f40..198beb8088 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -9,13 +9,6 @@ #include "platform/commandqueue.hpp" #include "utils/options.hpp" #include "acl.h" - -#if defined(USE_COMGR_LIBRARY) -#include "llvm/Support/AMDGPUMetadata.h" - -typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -#endif // defined(USE_COMGR_LIBRARY) - #include #include #include @@ -99,9 +92,7 @@ HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compi compileOptions_(compileOptions), index_(0), code_(0), - codeSize_(0), - workgroupGroupSegmentByteSize_(0), - kernargSegmentByteSize_(0) { + codeSize_(0) { flags_.hsa_ = true; } @@ -414,15 +405,6 @@ const LightningProgram& LightningKernel::prog() const { } #if defined(USE_COMGR_LIBRARY) -static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) { - for (const KernelMD& kernelMD : programMD->mKernels) { - if (kernelMD.mName == name) { - return &kernelMD; - } - } - return nullptr; -} - bool LightningKernel::init() { flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false; @@ -432,15 +414,13 @@ bool LightningKernel::init() { return false; } - KernelMD kernelMD; - if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) { + if (!GetAttrCodePropMetadata(*kernelMetaNode)) { return false; } - symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName; - - workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize; - kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize; + if (codeObjectVer() == 2) { + symbolName_ = name(); + } // Copy codeobject of this kernel from the program CPU segment hsa_agent_t agent; @@ -456,13 +436,13 @@ bool LightningKernel::init() { codeSize_ = prog().codeSegGpu().owner()->getSize(); // handle device enqueue - if (!kernelMD.mAttrs.mRuntimeHandle.empty()) { + if (!RuntimeHandle().empty()) { hsa_agent_t agent; agent.handle = 1; amd::hsa::loader::Symbol* rth_symbol; // Get the runtime handle symbol GPU address - rth_symbol = prog().GetSymbol(const_cast(kernelMD.mAttrs.mRuntimeHandle.c_str()), + rth_symbol = prog().GetSymbol(const_cast(RuntimeHandle().c_str()), const_cast(&agent)); uint64_t symbol_address; rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address); @@ -480,14 +460,12 @@ bool LightningKernel::init() { } // Setup the the workgroup info - setWorkGroupInfo(kernelMD.mCodeProps.mPrivateSegmentFixedSize, - kernelMD.mCodeProps.mGroupSegmentFixedSize, kernelMD.mCodeProps.mNumSGPRs, - kernelMD.mCodeProps.mNumVGPRs); + setWorkGroupInfo(WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize(), + workGroupInfo()->usedSGPRs_, workGroupInfo()->usedVGPRs_); // Copy wavefront size workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_; - workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize; if (workGroupInfo_.size_ == 0) { return false; } diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp index d7beb12fbf..c1a9232734 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp @@ -58,7 +58,7 @@ class HSAILKernel : public device::Kernel { const HSAILProgram& prog() const; //! Returns LDS size used in this kernel - uint32_t ldsSize() const { return workgroupGroupSegmentByteSize_; } + uint32_t ldsSize() const { return WorkgroupGroupSegmentByteSize(); } //! Returns pointer on CPU to AQL code info const amd_kernel_code_t* cpuAqlCode() const { return &akc_; } @@ -114,10 +114,7 @@ class HSAILKernel : public device::Kernel { uint64_t code_; //!< GPU memory pointer to the kernel size_t codeSize_; //!< Size of ISA code - - uint32_t workgroupGroupSegmentByteSize_; //!< LDS size used in the kernel - uint32_t kernargSegmentByteSize_; //!< Size of kernel argument buffer -}; + }; class LightningKernel : public HSAILKernel { public: diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp index 22c047e4b3..03a5b25ae1 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp @@ -9,33 +9,23 @@ #ifndef WITHOUT_HSA_BACKEND -#if defined(USE_COMGR_LIBRARY) -#include "llvm/Support/AMDGPUMetadata.h" - -typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; -typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -#endif // defined(USE_COMGR_LIBRARY) - namespace roc { Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle, const uint32_t workgroupGroupSegmentByteSize, const uint32_t workitemPrivateSegmentByteSize, const uint32_t kernargSegmentByteSize, const uint32_t kernargSegmentAlignment) - : device::Kernel(prog->dev(), name, *prog), - kernelCodeHandle_(kernelCodeHandle), - workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize), - workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize), - kernargSegmentByteSize_(kernargSegmentByteSize), - kernargSegmentAlignment_(kernargSegmentAlignment) {} + : device::Kernel(prog->dev(), name, *prog) { + kernelCodeHandle_ = kernelCodeHandle; + workgroupGroupSegmentByteSize_ = workgroupGroupSegmentByteSize; + workitemPrivateSegmentByteSize_ = workitemPrivateSegmentByteSize; + kernargSegmentByteSize_ = kernargSegmentByteSize; + kernargSegmentAlignment_ = kernargSegmentAlignment; +} Kernel::Kernel(std::string name, Program* prog) - : device::Kernel(prog->dev(), name, *prog), - kernelCodeHandle_(0), - workgroupGroupSegmentByteSize_(0), - workitemPrivateSegmentByteSize_(0), - kernargSegmentByteSize_(0), - kernargSegmentAlignment_(0) {} + : device::Kernel(prog->dev(), name, *prog) { +} #if defined(USE_COMGR_LIBRARY) bool LightningKernel::init() { @@ -48,20 +38,18 @@ bool LightningKernel::init() { return false; } - KernelMD kernelMD; - if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) { + if (!GetAttrCodePropMetadata(*kernelMetaNode)) { return false; } // Set the kernel symbol name and size/alignment based on the kernel metadata // NOTE: kernel name is used to get the kernel code handle in V2, // but kernel symbol name is used in V3 - symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName; - workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize; - workitemPrivateSegmentByteSize_ = kernelMD.mCodeProps.mPrivateSegmentFixedSize; - kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize; - kernargSegmentAlignment_ = amd::alignUp(std::max(kernelMD.mCodeProps.mKernargSegmentAlign, 128u), - dev().info().globalMemCacheLineSize_); + if (codeObjectVer() == 2) { + symbolName_ = name(); + } + kernargSegmentAlignment_ = + amd::alignUp(std::max(kernargSegmentAlignment_, 128u), dev().info().globalMemCacheLineSize_); // Set the workgroup information for the kernel workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_; @@ -95,7 +83,7 @@ bool LightningKernel::init() { return false; } - if (!kernelMD.mAttrs.mRuntimeHandle.empty()) { + if (!RuntimeHandle().empty()) { hsa_executable_symbol_t kernelSymbol; int variable_size; uint64_t variable_address; @@ -106,7 +94,7 @@ bool LightningKernel::init() { // only after the hsa executable is loaded. The below code copies the kernel code object handle to the // address of the variable. hsaStatus = hsa_executable_get_symbol_by_name(program()->hsaExecutable(), - kernelMD.mAttrs.mRuntimeHandle.c_str(), + RuntimeHandle().c_str(), &agent, &kernelSymbol); if (hsaStatus == HSA_STATUS_SUCCESS) { hsaStatus = hsa_executable_symbol_get_info(kernelSymbol, @@ -122,7 +110,7 @@ bool LightningKernel::init() { if (hsaStatus == HSA_STATUS_SUCCESS) { const struct RuntimeHandle runtime_handle = { kernelCodeHandle_, - workitemPrivateSegmentByteSize(), + WorkitemPrivateSegmentByteSize(), WorkgroupGroupSegmentByteSize() }; hsaStatus = hsa_memory_copy(reinterpret_cast(variable_address), @@ -145,12 +133,9 @@ bool LightningKernel::init() { workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_; workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_; workGroupInfo_.preferredSizeMultiple_ = wavefront_size; - workGroupInfo_.usedSGPRs_ = kernelMD.mCodeProps.mNumSGPRs; - workGroupInfo_.usedVGPRs_ = kernelMD.mCodeProps.mNumVGPRs; workGroupInfo_.usedStackSize_ = 0; workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size; workGroupInfo_.wavefrontSize_ = wavefront_size; - workGroupInfo_.size_ = kernelMD.mCodeProps.mMaxFlatWorkGroupSize; if (workGroupInfo_.size_ == 0) { return false; } diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp index 46fc1c2339..724b4b4227 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.hpp @@ -24,31 +24,12 @@ class Kernel : public device::Kernel { Kernel(std::string name, Program* prog); - const uint64_t& KernelCodeHandle() { return kernelCodeHandle_; } - - const uint32_t WorkgroupGroupSegmentByteSize() const { return workgroupGroupSegmentByteSize_; } - - const uint32_t workitemPrivateSegmentByteSize() const { return workitemPrivateSegmentByteSize_; } - - const uint32_t KernargSegmentByteSize() const { return kernargSegmentByteSize_; } - - const uint8_t KernargSegmentAlignment() const { return kernargSegmentAlignment_; } - ~Kernel() {} //! Initializes the metadata required for this kernel virtual bool init() = 0; const Program* program() const { return static_cast(&prog_); } - - protected: -// Program* program_; //!< The roc::Program context - uint64_t kernelCodeHandle_; //!< Kernel code handle (aka amd_kernel_code_t) - uint32_t workgroupGroupSegmentByteSize_; - uint32_t workitemPrivateSegmentByteSize_; - uint32_t kernargSegmentByteSize_; - uint32_t kernargSegmentAlignment_; - size_t kernelDirectiveOffset_; }; class HSAILKernel : public roc::Kernel {