From 511c6a5de8b8fcf0af9cbab45cea2083cbd90bb9 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 29 Mar 2016 07:35:41 -0400 Subject: [PATCH] P4 to Git Change 1252154 by emankov@em-hsa-amd on 2016/03/29 07:23:56 SWDEV-79309 - Compiler Lib memory consumption decreasing by aclQueryInfo refactoring. [Problem] Memcpy on every aclQueryInfo for metadata. As a result an unreasonable memory consumption on every compiled binary (on Runtime). [Solution] Once extracted kernel Metadata is cached, secondary extraction doesn't happen. Blits are also affected. [Testing] precheckin http://ocltc.amd.com:8111/viewModification.html?modId=69016&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true [Reviewers] Stanislav Mekhanoshin, Nikolay Haustov Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/include/v0_8/aclStructs.h#6 edit ... //depot/stg/opencl/drivers/opencl/compiler/legacy-lib/include/v0_8/aclTypes.h#3 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/api/v0_8/acl.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#92 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.cpp#66 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclStructs.h#23 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/include/v0_8/aclTypes.h#8 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.cpp#15 edit ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#24 edit ... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/CLAssumptionCheck.cpp#49 edit --- .../lib/backends/common/v0_8/if_acl.cpp | 16 +++------ rocclr/compiler/lib/include/v0_8/aclStructs.h | 1 + rocclr/compiler/lib/include/v0_8/aclTypes.h | 1 + rocclr/compiler/lib/utils/v0_8/libUtils.cpp | 36 +++++++++++++++++++ rocclr/compiler/lib/utils/v0_8/libUtils.h | 24 +++++++++++-- 5 files changed, 65 insertions(+), 13 deletions(-) diff --git a/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp b/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp index 172214cb74..3c3d5df104 100644 --- a/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp +++ b/rocclr/compiler/lib/backends/common/v0_8/if_acl.cpp @@ -2542,7 +2542,7 @@ if_aclQueryInfo(aclCompiler *cl, return ACL_INVALID_ARG; } std::string symbol = pre + std::string(kernel) + post; - const void* roSec = cl->clAPI.extSym(cl, binary, &roSize, secID, symbol.c_str(), &error_code); + const void* roSec = aclutGetKernelMetadata(cl, binary, &roSize, symbol.c_str(), &error_code); if (error_code != ACL_SUCCESS) return error_code; if (roSec == NULL || roSize == 0) { return ACL_ELF_ERROR; @@ -2876,8 +2876,7 @@ if_aclDbgAddArgument(aclCompiler *cl, acl_error error_code; aclMetadata *md = NULL; { - const char* roSec = reinterpret_cast(cl->clAPI.extSym( - cl, bin, &roSize, sym->sections[0], symbol.c_str(), &error_code)); + const char* roSec = reinterpret_cast(aclutGetKernelMetadata(cl, bin, &roSize, symbol.c_str(), &error_code)); if (error_code != ACL_SUCCESS) return error_code; if (roSec == NULL || roSize == 0) { return ACL_ELF_ERROR; @@ -2998,9 +2997,7 @@ if_aclDbgAddArgument(aclCompiler *cl, newMD->data_size = newSize; memcpy(tmp_ptr, reinterpret_cast(md) + printf_offset, roSize - printf_offset); tmp_ptr += (roSize - printf_offset); - cl->clAPI.remSym(cl, bin, aclRODATA, symbol.c_str()); - error_code = cl->clAPI.insSym(cl, bin, newMDptr, newSize, - aclRODATA, symbol.c_str()); + error_code = aclutSetKernelMetadata(cl, bin, newMDptr, newSize, symbol.c_str()); assert((size_t)(tmp_ptr - newMDptr) == newSize && "allocated memory does not equal the amount of memory copied!"); free(md); delete [] newMDptr; @@ -3023,8 +3020,7 @@ if_aclDbgRemoveArgument(aclCompiler *cl, acl_error error_code; aclMetadata *md = NULL; { - const char* roSec = reinterpret_cast(cl->clAPI.extSym(cl, bin, &roSize, - sym->sections[0], symbol.c_str(), &error_code)); + const char* roSec = reinterpret_cast(aclutGetKernelMetadata(cl, bin, &roSize, symbol.c_str(), &error_code)); if (error_code != ACL_SUCCESS) return error_code; if (roSec == NULL || roSize == 0) { return ACL_ELF_ERROR; @@ -3130,9 +3126,7 @@ if_aclDbgRemoveArgument(aclCompiler *cl, memcpy(tmp_ptr, reinterpret_cast(md) + printf_offset, roSize - printf_offset); tmp_ptr += (roSize - printf_offset); newMD->data_size = newSize; - cl->clAPI.remSym(cl, bin, aclRODATA, symbol.c_str()); - error_code = cl->clAPI.insSym(cl, bin, newMDptr, newSize, - aclRODATA, symbol.c_str()); + error_code = aclutSetKernelMetadata(cl, bin, newMDptr, newSize, symbol.c_str()); assert((size_t)(tmp_ptr - newMDptr) == newSize && "allocated memory does not equal the amount of memory copied!"); free(md); delete [] newMDptr; diff --git a/rocclr/compiler/lib/include/v0_8/aclStructs.h b/rocclr/compiler/lib/include/v0_8/aclStructs.h index 479f3341b1..80240fc1da 100644 --- a/rocclr/compiler/lib/include/v0_8/aclStructs.h +++ b/rocclr/compiler/lib/include/v0_8/aclStructs.h @@ -180,6 +180,7 @@ typedef struct _acl_bif_rec_0_8_1 { aclOptions* options; // Pointer to acl options. aclBinaryOptions_0_8_1 binOpts; // Pointer to the binary options. aclDevCaps_0_8 caps; // Capabilities of the BIF. + aclBinaryData *binData; // Pointer to the binary's temporary data. } aclBinary_0_8_1; #define ACL_LOADER_COMMON\ diff --git a/rocclr/compiler/lib/include/v0_8/aclTypes.h b/rocclr/compiler/lib/include/v0_8/aclTypes.h index f6cb0f9598..aef5582fce 100644 --- a/rocclr/compiler/lib/include/v0_8/aclTypes.h +++ b/rocclr/compiler/lib/include/v0_8/aclTypes.h @@ -30,6 +30,7 @@ typedef struct _acl_be_loader_rec_0_8 aclBELoader; typedef struct _acl_llvm_module_0_8* aclModule; // Opaque pointer to llvm::Module typedef struct _acl_llvm_context_0_8* aclContext; // Opaque pointer to llvm::Context typedef struct _acl_loader_data_0_8* aclLoaderData; // Opaque pointer to loader data +typedef struct _acl_binary_data_0_8_2 aclBinaryData; #include "aclEnums.h" // Typedefs for enumerations diff --git a/rocclr/compiler/lib/utils/v0_8/libUtils.cpp b/rocclr/compiler/lib/utils/v0_8/libUtils.cpp index 2418ed843e..6c31452315 100644 --- a/rocclr/compiler/lib/utils/v0_8/libUtils.cpp +++ b/rocclr/compiler/lib/utils/v0_8/libUtils.cpp @@ -1052,3 +1052,39 @@ void dump(aclBinary *bin) { bifbase *elfBin = reinterpret_cast(bin->bin); elfBin->dump(); } + +const void* +aclutGetKernelMetadata(aclCompiler *cl, const aclBinary *bin, size_t *symbolSize, const char *symbol, acl_error *error_code) { + aclutKernelMetadata* md = bin->binData->kernelMetadata; + std::string s(symbol); + aclutKernelMetadata::iterator it = md->find(s); + if (md->end() == it) { + const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta); + assert(sym && "symbol not found"); + const void* symbolData = cl->clAPI.extSym(cl, bin, symbolSize, sym->sections[0], symbol, error_code); + if (symbolData && *symbolSize != 0) { + md->insert(std::make_pair(s, std::make_pair(symbolData, *symbolSize))); + return symbolData; + } + } else { + aclutDataSizePair ds = it->second; + *symbolSize = ds.second; + *error_code = ACL_SUCCESS; + return ds.first; + } + return NULL; +} + +acl_error +aclutSetKernelMetadata(aclCompiler *cl, aclBinary *bin, const void *symbolData, size_t symbolSize, const char *symbol) { + const oclBIFSymbolStruct* sym = findBIF30SymStruct(symOpenclMeta); + assert(sym && "symbol not found"); + acl_error err = cl->clAPI.insSym(cl, bin, symbolData, symbolSize, sym->sections[0], symbol); + if (err != ACL_SUCCESS) return err; + aclutKernelMetadata* md = bin->binData->kernelMetadata; + std::string s(symbol); + if (md->end() != md->find(s)) { + md->erase(s); + } + return ACL_SUCCESS; +} diff --git a/rocclr/compiler/lib/utils/v0_8/libUtils.h b/rocclr/compiler/lib/utils/v0_8/libUtils.h index 06ed8a6105..5bb11dfade 100644 --- a/rocclr/compiler/lib/utils/v0_8/libUtils.h +++ b/rocclr/compiler/lib/utils/v0_8/libUtils.h @@ -4,15 +4,23 @@ #ifndef _CL_LIB_UTILS_0_8_H_ #define _CL_LIB_UTILS_0_8_H_ #include "acl.h" -#include #include -#include +#include #include #include #include "library.hpp" #include "utils/bif_section_labels.hpp" using namespace bif; +typedef std::pair aclutDataSizePair; +typedef std::map aclutKernelMetadata; + +//! Structure for storing temporary aclBinary information (non-serializable). +typedef struct _acl_binary_data_0_8_2 { + ACL_STRUCT_HEADER; + aclutKernelMetadata* kernelMetadata; // Opaque pointer to the cached kernel's Metadata. +} aclBinaryData_0_8_2; + // Utility function to set a flag in option structure // of the aclDevCaps. void @@ -178,6 +186,18 @@ aclutAlloc(const aclCompilerOptions *bin); FreeFunc aclutFree(const aclCompilerOptions *bin); +// Wrapper function for extracting symbol with particular kernel Metadata from the binary bin, +// once extracted kernel Metadata is cached, secondary extraction doesn't happen. +// This function have to be used instead of extSym function for symOpenclMeta. +const void* +aclutGetKernelMetadata(aclCompiler *cl, const aclBinary *bin, size_t *symbolSize, const char *symbol, acl_error *error_code); + +// Wrapper function for inserting symbol with particular kernel Metadata into the binary bin, +// once inserted kernel Metadata is cached. +// This function have to be used instead of insSym function for symOpenclMeta. +acl_error +aclutSetKernelMetadata(aclCompiler *cl, aclBinary *bin, const void *symbolData, size_t symbolSize, const char *symbol); + inline std::vector splitSpaceSeparatedString(char *str) { std::string s(str);