From aa1de227cea3dabd9c194fd77bad9f8f3a257bcb Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 4 Jan 2019 15:06:29 -0500 Subject: [PATCH] P4 to Git Change 1726335 by wchau@wchau_OCL_boltzmann on 2019/01/04 14:53:36 SWDEV-174898 - OCL Runtime kernel metadata lookup requires quadratic time Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#13 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#26 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#15 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#76 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#49 edit [ROCm/clr commit: 2b9242119405528de699999e67c110c63a05a852] --- .../clr/rocclr/runtime/device/devkernel.cpp | 70 +----------------- .../clr/rocclr/runtime/device/devkernel.hpp | 2 +- .../clr/rocclr/runtime/device/devprogram.cpp | 73 +++++++++++++++++++ .../clr/rocclr/runtime/device/devprogram.hpp | 10 +++ .../rocclr/runtime/device/pal/palkernel.cpp | 11 ++- .../rocclr/runtime/device/rocm/rockernel.cpp | 12 ++- 6 files changed, 105 insertions(+), 73 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp index 4c40b4ca85..661c8d1127 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.cpp +++ b/projects/clr/rocclr/runtime/device/devkernel.cpp @@ -771,16 +771,11 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a // ================================================================================================ #if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) #if defined(USE_COMGR_LIBRARY) -bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, +bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode, const uint32_t kernargSegmentByteSize, KernelMD* kernelMD) { - amd_comgr_metadata_node_t kernelMeta = {0}; - if (!GetKernelMetadata(programMD, name(), &kernelMeta)) { - return false; - } - - InitParameters(kernelMeta, kernargSegmentByteSize); + InitParameters(kernelMetaNode, kernargSegmentByteSize); // Set the workgroup information for the kernel workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_; @@ -791,7 +786,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, // extract the attribute metadata if there is any amd_comgr_metadata_node_t attrMeta; amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS; - if (amd::Comgr::metadata_lookup(kernelMeta, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) { + if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::iterate_map_metadata(attrMeta, device::populateAttrs, static_cast(kernelMD)); amd::Comgr::destroy_metadata(attrMeta); @@ -800,7 +795,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, // extract the code properties metadata amd_comgr_metadata_node_t codePropsMeta; if (status == AMD_COMGR_STATUS_SUCCESS) { - status = amd::Comgr::metadata_lookup(kernelMeta, "CodeProps", &codePropsMeta); + status = amd::Comgr::metadata_lookup(kernelMetaNode, "CodeProps", &codePropsMeta); } if (status == AMD_COMGR_STATUS_SUCCESS) { @@ -809,8 +804,6 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, amd::Comgr::destroy_metadata(codePropsMeta); } - amd::Comgr::destroy_metadata(kernelMeta); - if (status != AMD_COMGR_STATUS_SUCCESS) { return false; } @@ -837,61 +830,6 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, return true; } -bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD, - const std::string& name, - amd_comgr_metadata_node_t* kernelNode) { - amd_comgr_status_t status; - amd_comgr_metadata_node_t kernelsMD; - bool hasKernelMD = false; - size_t size = 0; - - status = amd::Comgr::metadata_lookup(programMD, "Kernels", &kernelsMD); - if (status == AMD_COMGR_STATUS_SUCCESS) { - hasKernelMD = true; - status = amd::Comgr::get_metadata_list_size(kernelsMD, &size); - } - - bool kernelFound = false; - for (size_t i = 0; i < size && !kernelFound && status == AMD_COMGR_STATUS_SUCCESS; i++) { - std::string kernelName; - - amd_comgr_metadata_node_t nameMeta; - bool hasNameMeta = false; - bool hasKernelNode = false; - - status = amd::Comgr::index_list_metadata(kernelsMD, i, kernelNode); - - if (status == AMD_COMGR_STATUS_SUCCESS) { - hasKernelNode = true; - status = amd::Comgr::metadata_lookup(*kernelNode, "Name", &nameMeta); - } - - if (status == AMD_COMGR_STATUS_SUCCESS) { - hasNameMeta = true; - status = getMetaBuf(nameMeta, &kernelName); - } - - if ((status == AMD_COMGR_STATUS_SUCCESS) && (name.compare(kernelName) == 0)) { - kernelFound = true; - } - else { - if (hasKernelNode) { - amd::Comgr::destroy_metadata(*kernelNode); - } - } - - if (hasNameMeta) { - amd::Comgr::destroy_metadata(nameMeta); - } - } - - if (hasKernelMD) { - amd::Comgr::destroy_metadata(kernelsMD); - } - - return kernelFound; -} - bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) { std::string buf; diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp index 3e2554b11e..6d1f289460 100644 --- a/projects/clr/rocclr/runtime/device/devkernel.hpp +++ b/projects/clr/rocclr/runtime/device/devkernel.hpp @@ -380,7 +380,7 @@ class Kernel : public amd::HeapObject { amd_comgr_metadata_node_t* kernelNode); //! Retrieve kernel attribute and code properties metadata - bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, + bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode, const uint32_t kernargSegmentByteSize, KernelMD* kernelMD); diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp index 7c61f12936..6ae2665f0e 100644 --- a/projects/clr/rocclr/runtime/device/devprogram.cpp +++ b/projects/clr/rocclr/runtime/device/devprogram.cpp @@ -72,6 +72,11 @@ Program::Program(amd::Device& device) // ================================================================================================ Program::~Program() { clear(); +#if defined(USE_COMGR_LIBRARY) + for (auto const& kernelMeta : kernelMetadataMap_) { + amd::Comgr::destroy_metadata(kernelMeta.second); + } +#endif delete metadata_; } @@ -2699,6 +2704,66 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options } // ================================================================================================ +#if defined(USE_COMGR_LIBRARY) +bool Program::createKernelMetadataMap() { + + amd_comgr_status_t status; + amd_comgr_metadata_node_t kernelsMD; + bool hasKernelMD = false; + size_t size = 0; + + status = amd::Comgr::metadata_lookup(*metadata_, "Kernels", &kernelsMD); + if (status == AMD_COMGR_STATUS_SUCCESS) { + hasKernelMD = true; + status = amd::Comgr::get_metadata_list_size(kernelsMD, &size); + } + + for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) { + amd_comgr_metadata_node_t nameMeta; + bool hasNameMeta = false; + bool hasKernelNode = false; + + amd_comgr_metadata_node_t kernelNode; + + std::string kernelName; + status = amd::Comgr::index_list_metadata(kernelsMD, i, &kernelNode); + + if (status == AMD_COMGR_STATUS_SUCCESS) { + hasKernelNode = true; + status = amd::Comgr::metadata_lookup(kernelNode, "Name", &nameMeta); + } + + if (status == AMD_COMGR_STATUS_SUCCESS) { + hasNameMeta = true; + status = getMetaBuf(nameMeta, &kernelName); + } + + if (status == AMD_COMGR_STATUS_SUCCESS) { + kernelMetadataMap_[kernelName] = kernelNode; + } + else { + if (hasKernelNode) { + amd::Comgr::destroy_metadata(kernelNode); + } + for (auto const& kernelMeta : kernelMetadataMap_) { + amd::Comgr::destroy_metadata(kernelMeta.second); + } + kernelMetadataMap_.clear(); + } + + if (hasNameMeta) { + amd::Comgr::destroy_metadata(nameMeta); + } + } + + if (hasKernelMD) { + amd::Comgr::destroy_metadata(kernelsMD); + } + + return (status == AMD_COMGR_STATUS_SUCCESS); +} +#endif + bool Program::FindGlobalVarSize(void* binary, size_t binSize) { #if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) size_t progvarsTotalSize = 0; @@ -2801,6 +2866,14 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { return false; } +#if defined(USE_COMGR_LIBRARY) + if (!createKernelMetadataMap()) { + buildLog_ += + "Error: create kernel metadata map using COMgr\n"; + return false; + } +#endif + progvarsTotalSize -= dynamicSize; setGlobalVariableTotalSize(progvarsTotalSize); diff --git a/projects/clr/rocclr/runtime/device/devprogram.hpp b/projects/clr/rocclr/runtime/device/devprogram.hpp index 713270e818..743350ec72 100644 --- a/projects/clr/rocclr/runtime/device/devprogram.hpp +++ b/projects/clr/rocclr/runtime/device/devprogram.hpp @@ -115,6 +115,7 @@ class Program : public amd::HeapObject { #if defined(USE_COMGR_LIBRARY) amd_comgr_metadata_node_t* metadata_; //!< COMgr metadata + std::map kernelMetadataMap_; //!< Map of kernel metadata #else CodeObjectMD* metadata_; //!< Runtime metadata #endif @@ -201,6 +202,12 @@ class Program : public amd::HeapObject { #if defined(USE_COMGR_LIBRARY) const amd_comgr_metadata_node_t* metadata() const { return metadata_; } + + //! Get the kernel metadata + const amd_comgr_metadata_node_t* getKernelMetadata(const std::string name) const { + auto it = kernelMetadataMap_.find(name); + return (it == kernelMetadataMap_.end()) ? nullptr : &(it->second); + } #else const CodeObjectMD* metadata() const { return metadata_; } #endif @@ -347,6 +354,9 @@ class Program : public amd::HeapObject { bool compileAndLinkExecutable(const amd_comgr_data_set_t inputs, const std::string& options, amd::option::Options* amdOptions, char* executable[], size_t* executableSize); + + //! Create the map for the kernel name and its metadata for fast access + bool createKernelMetadataMap(); #endif //! Disable default copy constructor diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index bb4ed8d682..7bbdbfbb5b 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -396,11 +396,13 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { aqlCreateHWInfo(symbol); #if defined(USE_COMGR_LIBRARY) - const amd_comgr_metadata_node_t* programMD = prog().metadata(); - assert(programMD != nullptr); + const amd_comgr_metadata_node_t* kernelMetaNode = prog().getKernelMetadata(name()); + if (kernelMetaNode == nullptr) { + return false; + } KernelMD kernelMD; - if (!GetAttrCodePropMetadata(*programMD, argsBufferSize(), &kernelMD)) { + if (!GetAttrCodePropMetadata(*kernelMetaNode, argsBufferSize(), &kernelMD)) { return false; } @@ -439,6 +441,9 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { } // handle the printf metadata if any + const amd_comgr_metadata_node_t* programMD = prog().metadata(); + assert(programMD != nullptr); + std::vector printfStr; if (!GetPrintfStr(*programMD, &printfStr)) { return false; diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp index ad20af284c..d8bef9c240 100644 --- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp +++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp @@ -37,11 +37,14 @@ bool LightningKernel::init() { hsa_agent_t hsaDevice = program_->hsaDevice(); - const amd_comgr_metadata_node_t* programMD = static_cast(program_)->metadata(); - assert(programMD != nullptr); + const amd_comgr_metadata_node_t* kernelMetaNode = + static_cast(program_)->getKernelMetadata(name()); + if (kernelMetaNode == nullptr) { + return false; + } KernelMD kernelMD; - if (!GetAttrCodePropMetadata(*programMD, KernargSegmentByteSize(), &kernelMD)) { + if (!GetAttrCodePropMetadata(*kernelMetaNode, KernargSegmentByteSize(), &kernelMD)) { return false; } @@ -127,6 +130,9 @@ bool LightningKernel::init() { } // handle the printf metadata if any + const amd_comgr_metadata_node_t* programMD = static_cast(program_)->metadata(); + assert(programMD != nullptr); + std::vector printfStr; if (!GetPrintfStr(*programMD, &printfStr)) { return false;