From aa1de227cea3dabd9c194fd77bad9f8f3a257bcb Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 4 Jan 2019 15:06:29 -0500
Subject: [PATCH] P4 to Git Change 1726335 by wchau@wchau_OCL_boltzmann on
2019/01/04 14:53:36
SWDEV-174898 - OCL Runtime kernel metadata lookup requires quadratic time
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#49 edit
[ROCm/clr commit: 2b9242119405528de699999e67c110c63a05a852]
---
.../clr/rocclr/runtime/device/devkernel.cpp | 70 +-----------------
.../clr/rocclr/runtime/device/devkernel.hpp | 2 +-
.../clr/rocclr/runtime/device/devprogram.cpp | 73 +++++++++++++++++++
.../clr/rocclr/runtime/device/devprogram.hpp | 10 +++
.../rocclr/runtime/device/pal/palkernel.cpp | 11 ++-
.../rocclr/runtime/device/rocm/rockernel.cpp | 12 ++-
6 files changed, 105 insertions(+), 73 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp
index 4c40b4ca85..661c8d1127 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.cpp
@@ -771,16 +771,11 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
// ================================================================================================
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
#if defined(USE_COMGR_LIBRARY)
-bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
+bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
const uint32_t kernargSegmentByteSize,
KernelMD* kernelMD) {
- amd_comgr_metadata_node_t kernelMeta = {0};
- if (!GetKernelMetadata(programMD, name(), &kernelMeta)) {
- return false;
- }
-
- InitParameters(kernelMeta, kernargSegmentByteSize);
+ InitParameters(kernelMetaNode, kernargSegmentByteSize);
// Set the workgroup information for the kernel
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
@@ -791,7 +786,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
// extract the attribute metadata if there is any
amd_comgr_metadata_node_t attrMeta;
amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS;
- if (amd::Comgr::metadata_lookup(kernelMeta, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) {
+ if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::iterate_map_metadata(attrMeta, device::populateAttrs,
static_cast(kernelMD));
amd::Comgr::destroy_metadata(attrMeta);
@@ -800,7 +795,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
// extract the code properties metadata
amd_comgr_metadata_node_t codePropsMeta;
if (status == AMD_COMGR_STATUS_SUCCESS) {
- status = amd::Comgr::metadata_lookup(kernelMeta, "CodeProps", &codePropsMeta);
+ status = amd::Comgr::metadata_lookup(kernelMetaNode, "CodeProps", &codePropsMeta);
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
@@ -809,8 +804,6 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
amd::Comgr::destroy_metadata(codePropsMeta);
}
- amd::Comgr::destroy_metadata(kernelMeta);
-
if (status != AMD_COMGR_STATUS_SUCCESS) {
return false;
}
@@ -837,61 +830,6 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
return true;
}
-bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
- const std::string& name,
- amd_comgr_metadata_node_t* kernelNode) {
- amd_comgr_status_t status;
- amd_comgr_metadata_node_t kernelsMD;
- bool hasKernelMD = false;
- size_t size = 0;
-
- status = amd::Comgr::metadata_lookup(programMD, "Kernels", &kernelsMD);
- if (status == AMD_COMGR_STATUS_SUCCESS) {
- hasKernelMD = true;
- status = amd::Comgr::get_metadata_list_size(kernelsMD, &size);
- }
-
- bool kernelFound = false;
- for (size_t i = 0; i < size && !kernelFound && status == AMD_COMGR_STATUS_SUCCESS; i++) {
- std::string kernelName;
-
- amd_comgr_metadata_node_t nameMeta;
- bool hasNameMeta = false;
- bool hasKernelNode = false;
-
- status = amd::Comgr::index_list_metadata(kernelsMD, i, kernelNode);
-
- if (status == AMD_COMGR_STATUS_SUCCESS) {
- hasKernelNode = true;
- status = amd::Comgr::metadata_lookup(*kernelNode, "Name", &nameMeta);
- }
-
- if (status == AMD_COMGR_STATUS_SUCCESS) {
- hasNameMeta = true;
- status = getMetaBuf(nameMeta, &kernelName);
- }
-
- if ((status == AMD_COMGR_STATUS_SUCCESS) && (name.compare(kernelName) == 0)) {
- kernelFound = true;
- }
- else {
- if (hasKernelNode) {
- amd::Comgr::destroy_metadata(*kernelNode);
- }
- }
-
- if (hasNameMeta) {
- amd::Comgr::destroy_metadata(nameMeta);
- }
- }
-
- if (hasKernelMD) {
- amd::Comgr::destroy_metadata(kernelsMD);
- }
-
- return kernelFound;
-}
-
bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) {
std::string buf;
diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp
index 3e2554b11e..6d1f289460 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.hpp
@@ -380,7 +380,7 @@ class Kernel : public amd::HeapObject {
amd_comgr_metadata_node_t* kernelNode);
//! Retrieve kernel attribute and code properties metadata
- bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
+ bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
const uint32_t kernargSegmentByteSize,
KernelMD* kernelMD);
diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp
index 7c61f12936..6ae2665f0e 100644
--- a/projects/clr/rocclr/runtime/device/devprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/devprogram.cpp
@@ -72,6 +72,11 @@ Program::Program(amd::Device& device)
// ================================================================================================
Program::~Program() {
clear();
+#if defined(USE_COMGR_LIBRARY)
+ for (auto const& kernelMeta : kernelMetadataMap_) {
+ amd::Comgr::destroy_metadata(kernelMeta.second);
+ }
+#endif
delete metadata_;
}
@@ -2699,6 +2704,66 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options
}
// ================================================================================================
+#if defined(USE_COMGR_LIBRARY)
+bool Program::createKernelMetadataMap() {
+
+ amd_comgr_status_t status;
+ amd_comgr_metadata_node_t kernelsMD;
+ bool hasKernelMD = false;
+ size_t size = 0;
+
+ status = amd::Comgr::metadata_lookup(*metadata_, "Kernels", &kernelsMD);
+ if (status == AMD_COMGR_STATUS_SUCCESS) {
+ hasKernelMD = true;
+ status = amd::Comgr::get_metadata_list_size(kernelsMD, &size);
+ }
+
+ for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
+ amd_comgr_metadata_node_t nameMeta;
+ bool hasNameMeta = false;
+ bool hasKernelNode = false;
+
+ amd_comgr_metadata_node_t kernelNode;
+
+ std::string kernelName;
+ status = amd::Comgr::index_list_metadata(kernelsMD, i, &kernelNode);
+
+ if (status == AMD_COMGR_STATUS_SUCCESS) {
+ hasKernelNode = true;
+ status = amd::Comgr::metadata_lookup(kernelNode, "Name", &nameMeta);
+ }
+
+ if (status == AMD_COMGR_STATUS_SUCCESS) {
+ hasNameMeta = true;
+ status = getMetaBuf(nameMeta, &kernelName);
+ }
+
+ if (status == AMD_COMGR_STATUS_SUCCESS) {
+ kernelMetadataMap_[kernelName] = kernelNode;
+ }
+ else {
+ if (hasKernelNode) {
+ amd::Comgr::destroy_metadata(kernelNode);
+ }
+ for (auto const& kernelMeta : kernelMetadataMap_) {
+ amd::Comgr::destroy_metadata(kernelMeta.second);
+ }
+ kernelMetadataMap_.clear();
+ }
+
+ if (hasNameMeta) {
+ amd::Comgr::destroy_metadata(nameMeta);
+ }
+ }
+
+ if (hasKernelMD) {
+ amd::Comgr::destroy_metadata(kernelsMD);
+ }
+
+ return (status == AMD_COMGR_STATUS_SUCCESS);
+}
+#endif
+
bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
size_t progvarsTotalSize = 0;
@@ -2801,6 +2866,14 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
return false;
}
+#if defined(USE_COMGR_LIBRARY)
+ if (!createKernelMetadataMap()) {
+ buildLog_ +=
+ "Error: create kernel metadata map using COMgr\n";
+ return false;
+ }
+#endif
+
progvarsTotalSize -= dynamicSize;
setGlobalVariableTotalSize(progvarsTotalSize);
diff --git a/projects/clr/rocclr/runtime/device/devprogram.hpp b/projects/clr/rocclr/runtime/device/devprogram.hpp
index 713270e818..743350ec72 100644
--- a/projects/clr/rocclr/runtime/device/devprogram.hpp
+++ b/projects/clr/rocclr/runtime/device/devprogram.hpp
@@ -115,6 +115,7 @@ class Program : public amd::HeapObject {
#if defined(USE_COMGR_LIBRARY)
amd_comgr_metadata_node_t* metadata_; //!< COMgr metadata
+ std::map kernelMetadataMap_; //!< Map of kernel metadata
#else
CodeObjectMD* metadata_; //!< Runtime metadata
#endif
@@ -201,6 +202,12 @@ class Program : public amd::HeapObject {
#if defined(USE_COMGR_LIBRARY)
const amd_comgr_metadata_node_t* metadata() const { return metadata_; }
+
+ //! Get the kernel metadata
+ const amd_comgr_metadata_node_t* getKernelMetadata(const std::string name) const {
+ auto it = kernelMetadataMap_.find(name);
+ return (it == kernelMetadataMap_.end()) ? nullptr : &(it->second);
+ }
#else
const CodeObjectMD* metadata() const { return metadata_; }
#endif
@@ -347,6 +354,9 @@ class Program : public amd::HeapObject {
bool compileAndLinkExecutable(const amd_comgr_data_set_t inputs,
const std::string& options, amd::option::Options* amdOptions,
char* executable[], size_t* executableSize);
+
+ //! Create the map for the kernel name and its metadata for fast access
+ bool createKernelMetadataMap();
#endif
//! Disable default copy constructor
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index bb4ed8d682..7bbdbfbb5b 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -396,11 +396,13 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
aqlCreateHWInfo(symbol);
#if defined(USE_COMGR_LIBRARY)
- const amd_comgr_metadata_node_t* programMD = prog().metadata();
- assert(programMD != nullptr);
+ const amd_comgr_metadata_node_t* kernelMetaNode = prog().getKernelMetadata(name());
+ if (kernelMetaNode == nullptr) {
+ return false;
+ }
KernelMD kernelMD;
- if (!GetAttrCodePropMetadata(*programMD, argsBufferSize(), &kernelMD)) {
+ if (!GetAttrCodePropMetadata(*kernelMetaNode, argsBufferSize(), &kernelMD)) {
return false;
}
@@ -439,6 +441,9 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
}
// handle the printf metadata if any
+ const amd_comgr_metadata_node_t* programMD = prog().metadata();
+ assert(programMD != nullptr);
+
std::vector printfStr;
if (!GetPrintfStr(*programMD, &printfStr)) {
return false;
diff --git a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
index ad20af284c..d8bef9c240 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rockernel.cpp
@@ -37,11 +37,14 @@ bool LightningKernel::init() {
hsa_agent_t hsaDevice = program_->hsaDevice();
- const amd_comgr_metadata_node_t* programMD = static_cast(program_)->metadata();
- assert(programMD != nullptr);
+ const amd_comgr_metadata_node_t* kernelMetaNode =
+ static_cast(program_)->getKernelMetadata(name());
+ if (kernelMetaNode == nullptr) {
+ return false;
+ }
KernelMD kernelMD;
- if (!GetAttrCodePropMetadata(*programMD, KernargSegmentByteSize(), &kernelMD)) {
+ if (!GetAttrCodePropMetadata(*kernelMetaNode, KernargSegmentByteSize(), &kernelMD)) {
return false;
}
@@ -127,6 +130,9 @@ bool LightningKernel::init() {
}
// handle the printf metadata if any
+ const amd_comgr_metadata_node_t* programMD = static_cast(program_)->metadata();
+ assert(programMD != nullptr);
+
std::vector printfStr;
if (!GetPrintfStr(*programMD, &printfStr)) {
return false;