From 2664d8cf9eb140eef3be08eb2224c8c265577e4c Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 23 Feb 2022 21:18:35 -0500 Subject: [PATCH] SWDEV-319375 - Avoid global var size query Currently COMGR doesn't provide global variable size and runtime parses ELF binary directly. Avoid parsing for HIP. That can save 5% in hipModuleLoad() time. Change-Id: I47540d1e957bdb0c2406b6b848222de2920b2504 --- rocclr/device/devprogram.cpp | 64 +++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/rocclr/device/devprogram.cpp b/rocclr/device/devprogram.cpp index eb61f7456e..f5f3249bdd 100644 --- a/rocclr/device/devprogram.cpp +++ b/rocclr/device/devprogram.cpp @@ -2743,36 +2743,47 @@ bool Program::createKernelMetadataMap(void* binary, size_t binSize) { bool Program::FindGlobalVarSize(void* binary, size_t binSize) { #if defined(USE_COMGR_LIBRARY) - size_t progvarsTotalSize = 0; - size_t dynamicSize = 0; - size_t progvarsWriteSize = 0; + // HIP doesn't need information about global variable size. + // Hence runtime can skip expensive Elf object creation for parsing + if (!amd::IS_HIP) { + size_t progvarsTotalSize = 0; + size_t dynamicSize = 0; + size_t progvarsWriteSize = 0; - amd::Elf elfIn(ELFCLASSNONE, reinterpret_cast(binary), binSize, - nullptr, amd::Elf::ELF_C_READ); + amd::Elf elfIn(ELFCLASSNONE, reinterpret_cast(binary), binSize, + nullptr, amd::Elf::ELF_C_READ); - if (!elfIn.isSuccessful()) { - buildLog_ += "Creating input amd::Elf object failed\n"; - return false; - } - - auto numpHdrs = elfIn.getSegmentNum(); - for (unsigned int i = 0; i < numpHdrs; ++i) { - amd::ELFIO::segment* seg = nullptr; - if (!elfIn.getSegment(i, seg)) { - continue; + if (!elfIn.isSuccessful()) { + buildLog_ += "Creating input amd::Elf object failed\n"; + return false; } - // Accumulate the size of R & !X loadable segments - if (seg->get_type() == PT_LOAD && !(seg->get_flags() & PF_X)) { - if (seg->get_flags() & PF_R) { - progvarsTotalSize += seg->get_memory_size(); + auto numpHdrs = elfIn.getSegmentNum(); + for (unsigned int i = 0; i < numpHdrs; ++i) { + amd::ELFIO::segment* seg = nullptr; + if (!elfIn.getSegment(i, seg)) { + continue; } - if (seg->get_flags() & PF_W) { - progvarsWriteSize += seg->get_memory_size(); + + // Accumulate the size of R & !X loadable segments + if (seg->get_type() == PT_LOAD && !(seg->get_flags() & PF_X)) { + if (seg->get_flags() & PF_R) { + progvarsTotalSize += seg->get_memory_size(); + } + if (seg->get_flags() & PF_W) { + progvarsWriteSize += seg->get_memory_size(); + } + } + else if (seg->get_type() == PT_DYNAMIC) { + dynamicSize += seg->get_memory_size(); } } - else if (seg->get_type() == PT_DYNAMIC) { - dynamicSize += seg->get_memory_size(); + + progvarsTotalSize -= dynamicSize; + setGlobalVariableTotalSize(progvarsTotalSize); + + if (progvarsWriteSize != dynamicSize) { + hasGlobalStores_ = true; } } @@ -2780,13 +2791,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { buildLog_ += "Error: create kernel metadata map using COMgr\n"; return false; } - - progvarsTotalSize -= dynamicSize; - setGlobalVariableTotalSize(progvarsTotalSize); - - if (progvarsWriteSize != dynamicSize) { - hasGlobalStores_ = true; - } #endif // defined(USE_COMGR_LIBRARY) return true; }