P4 to Git Change 1726335 by wchau@wchau_OCL_boltzmann on 2019/01/04 14:53:36
SWDEV-174898 - OCL Runtime kernel metadata lookup requires quadratic time
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#49 edit
[ROCm/clr commit: 2b92421194]
Этот коммит содержится в:
@@ -771,16 +771,11 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a
|
||||
// ================================================================================================
|
||||
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
KernelMD* kernelMD) {
|
||||
|
||||
amd_comgr_metadata_node_t kernelMeta = {0};
|
||||
if (!GetKernelMetadata(programMD, name(), &kernelMeta)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
InitParameters(kernelMeta, kernargSegmentByteSize);
|
||||
InitParameters(kernelMetaNode, kernargSegmentByteSize);
|
||||
|
||||
// Set the workgroup information for the kernel
|
||||
workGroupInfo_.availableLDSSize_ = dev().info().localMemSizePerCU_;
|
||||
@@ -791,7 +786,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
// extract the attribute metadata if there is any
|
||||
amd_comgr_metadata_node_t attrMeta;
|
||||
amd_comgr_status_t status = AMD_COMGR_STATUS_SUCCESS;
|
||||
if (amd::Comgr::metadata_lookup(kernelMeta, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
if (amd::Comgr::metadata_lookup(kernelMetaNode, "Attrs", &attrMeta) == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd::Comgr::iterate_map_metadata(attrMeta, device::populateAttrs,
|
||||
static_cast<void*>(kernelMD));
|
||||
amd::Comgr::destroy_metadata(attrMeta);
|
||||
@@ -800,7 +795,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
// extract the code properties metadata
|
||||
amd_comgr_metadata_node_t codePropsMeta;
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd::Comgr::metadata_lookup(kernelMeta, "CodeProps", &codePropsMeta);
|
||||
status = amd::Comgr::metadata_lookup(kernelMetaNode, "CodeProps", &codePropsMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
@@ -809,8 +804,6 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
amd::Comgr::destroy_metadata(codePropsMeta);
|
||||
}
|
||||
|
||||
amd::Comgr::destroy_metadata(kernelMeta);
|
||||
|
||||
if (status != AMD_COMGR_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
@@ -837,61 +830,6 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
const std::string& name,
|
||||
amd_comgr_metadata_node_t* kernelNode) {
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_node_t kernelsMD;
|
||||
bool hasKernelMD = false;
|
||||
size_t size = 0;
|
||||
|
||||
status = amd::Comgr::metadata_lookup(programMD, "Kernels", &kernelsMD);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasKernelMD = true;
|
||||
status = amd::Comgr::get_metadata_list_size(kernelsMD, &size);
|
||||
}
|
||||
|
||||
bool kernelFound = false;
|
||||
for (size_t i = 0; i < size && !kernelFound && status == AMD_COMGR_STATUS_SUCCESS; i++) {
|
||||
std::string kernelName;
|
||||
|
||||
amd_comgr_metadata_node_t nameMeta;
|
||||
bool hasNameMeta = false;
|
||||
bool hasKernelNode = false;
|
||||
|
||||
status = amd::Comgr::index_list_metadata(kernelsMD, i, kernelNode);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasKernelNode = true;
|
||||
status = amd::Comgr::metadata_lookup(*kernelNode, "Name", &nameMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasNameMeta = true;
|
||||
status = getMetaBuf(nameMeta, &kernelName);
|
||||
}
|
||||
|
||||
if ((status == AMD_COMGR_STATUS_SUCCESS) && (name.compare(kernelName) == 0)) {
|
||||
kernelFound = true;
|
||||
}
|
||||
else {
|
||||
if (hasKernelNode) {
|
||||
amd::Comgr::destroy_metadata(*kernelNode);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasNameMeta) {
|
||||
amd::Comgr::destroy_metadata(nameMeta);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasKernelMD) {
|
||||
amd::Comgr::destroy_metadata(kernelsMD);
|
||||
}
|
||||
|
||||
return kernelFound;
|
||||
}
|
||||
|
||||
bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) {
|
||||
std::string buf;
|
||||
|
||||
|
||||
@@ -380,7 +380,7 @@ class Kernel : public amd::HeapObject {
|
||||
amd_comgr_metadata_node_t* kernelNode);
|
||||
|
||||
//! Retrieve kernel attribute and code properties metadata
|
||||
bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD,
|
||||
bool GetAttrCodePropMetadata(const amd_comgr_metadata_node_t kernelMetaNode,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
KernelMD* kernelMD);
|
||||
|
||||
|
||||
@@ -72,6 +72,11 @@ Program::Program(amd::Device& device)
|
||||
// ================================================================================================
|
||||
Program::~Program() {
|
||||
clear();
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
for (auto const& kernelMeta : kernelMetadataMap_) {
|
||||
amd::Comgr::destroy_metadata(kernelMeta.second);
|
||||
}
|
||||
#endif
|
||||
delete metadata_;
|
||||
}
|
||||
|
||||
@@ -2699,6 +2704,66 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
bool Program::createKernelMetadataMap() {
|
||||
|
||||
amd_comgr_status_t status;
|
||||
amd_comgr_metadata_node_t kernelsMD;
|
||||
bool hasKernelMD = false;
|
||||
size_t size = 0;
|
||||
|
||||
status = amd::Comgr::metadata_lookup(*metadata_, "Kernels", &kernelsMD);
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasKernelMD = true;
|
||||
status = amd::Comgr::get_metadata_list_size(kernelsMD, &size);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < size && status == AMD_COMGR_STATUS_SUCCESS; i++) {
|
||||
amd_comgr_metadata_node_t nameMeta;
|
||||
bool hasNameMeta = false;
|
||||
bool hasKernelNode = false;
|
||||
|
||||
amd_comgr_metadata_node_t kernelNode;
|
||||
|
||||
std::string kernelName;
|
||||
status = amd::Comgr::index_list_metadata(kernelsMD, i, &kernelNode);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasKernelNode = true;
|
||||
status = amd::Comgr::metadata_lookup(kernelNode, "Name", &nameMeta);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasNameMeta = true;
|
||||
status = getMetaBuf(nameMeta, &kernelName);
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
kernelMetadataMap_[kernelName] = kernelNode;
|
||||
}
|
||||
else {
|
||||
if (hasKernelNode) {
|
||||
amd::Comgr::destroy_metadata(kernelNode);
|
||||
}
|
||||
for (auto const& kernelMeta : kernelMetadataMap_) {
|
||||
amd::Comgr::destroy_metadata(kernelMeta.second);
|
||||
}
|
||||
kernelMetadataMap_.clear();
|
||||
}
|
||||
|
||||
if (hasNameMeta) {
|
||||
amd::Comgr::destroy_metadata(nameMeta);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasKernelMD) {
|
||||
amd::Comgr::destroy_metadata(kernelsMD);
|
||||
}
|
||||
|
||||
return (status == AMD_COMGR_STATUS_SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
|
||||
#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
|
||||
size_t progvarsTotalSize = 0;
|
||||
@@ -2801,6 +2866,14 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
if (!createKernelMetadataMap()) {
|
||||
buildLog_ +=
|
||||
"Error: create kernel metadata map using COMgr\n";
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
progvarsTotalSize -= dynamicSize;
|
||||
setGlobalVariableTotalSize(progvarsTotalSize);
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ class Program : public amd::HeapObject {
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
amd_comgr_metadata_node_t* metadata_; //!< COMgr metadata
|
||||
std::map<std::string,amd_comgr_metadata_node_t> kernelMetadataMap_; //!< Map of kernel metadata
|
||||
#else
|
||||
CodeObjectMD* metadata_; //!< Runtime metadata
|
||||
#endif
|
||||
@@ -201,6 +202,12 @@ class Program : public amd::HeapObject {
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
const amd_comgr_metadata_node_t* metadata() const { return metadata_; }
|
||||
|
||||
//! Get the kernel metadata
|
||||
const amd_comgr_metadata_node_t* getKernelMetadata(const std::string name) const {
|
||||
auto it = kernelMetadataMap_.find(name);
|
||||
return (it == kernelMetadataMap_.end()) ? nullptr : &(it->second);
|
||||
}
|
||||
#else
|
||||
const CodeObjectMD* metadata() const { return metadata_; }
|
||||
#endif
|
||||
@@ -347,6 +354,9 @@ class Program : public amd::HeapObject {
|
||||
bool compileAndLinkExecutable(const amd_comgr_data_set_t inputs,
|
||||
const std::string& options, amd::option::Options* amdOptions,
|
||||
char* executable[], size_t* executableSize);
|
||||
|
||||
//! Create the map for the kernel name and its metadata for fast access
|
||||
bool createKernelMetadataMap();
|
||||
#endif
|
||||
|
||||
//! Disable default copy constructor
|
||||
|
||||
@@ -396,11 +396,13 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
|
||||
aqlCreateHWInfo(symbol);
|
||||
|
||||
#if defined(USE_COMGR_LIBRARY)
|
||||
const amd_comgr_metadata_node_t* programMD = prog().metadata();
|
||||
assert(programMD != nullptr);
|
||||
const amd_comgr_metadata_node_t* kernelMetaNode = prog().getKernelMetadata(name());
|
||||
if (kernelMetaNode == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelMD kernelMD;
|
||||
if (!GetAttrCodePropMetadata(*programMD, argsBufferSize(), &kernelMD)) {
|
||||
if (!GetAttrCodePropMetadata(*kernelMetaNode, argsBufferSize(), &kernelMD)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -439,6 +441,9 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
|
||||
}
|
||||
|
||||
// handle the printf metadata if any
|
||||
const amd_comgr_metadata_node_t* programMD = prog().metadata();
|
||||
assert(programMD != nullptr);
|
||||
|
||||
std::vector<std::string> printfStr;
|
||||
if (!GetPrintfStr(*programMD, &printfStr)) {
|
||||
return false;
|
||||
|
||||
@@ -37,11 +37,14 @@ bool LightningKernel::init() {
|
||||
|
||||
hsa_agent_t hsaDevice = program_->hsaDevice();
|
||||
|
||||
const amd_comgr_metadata_node_t* programMD = static_cast<LightningProgram*>(program_)->metadata();
|
||||
assert(programMD != nullptr);
|
||||
const amd_comgr_metadata_node_t* kernelMetaNode =
|
||||
static_cast<LightningProgram*>(program_)->getKernelMetadata(name());
|
||||
if (kernelMetaNode == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
KernelMD kernelMD;
|
||||
if (!GetAttrCodePropMetadata(*programMD, KernargSegmentByteSize(), &kernelMD)) {
|
||||
if (!GetAttrCodePropMetadata(*kernelMetaNode, KernargSegmentByteSize(), &kernelMD)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -127,6 +130,9 @@ bool LightningKernel::init() {
|
||||
}
|
||||
|
||||
// handle the printf metadata if any
|
||||
const amd_comgr_metadata_node_t* programMD = static_cast<LightningProgram*>(program_)->metadata();
|
||||
assert(programMD != nullptr);
|
||||
|
||||
std::vector<std::string> printfStr;
|
||||
if (!GetPrintfStr(*programMD, &printfStr)) {
|
||||
return false;
|
||||
|
||||
Ссылка в новой задаче
Block a user