diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp index 66c1687074..e80711bfc0 100644 --- a/rocclr/runtime/device/device.cpp +++ b/rocclr/runtime/device/device.cpp @@ -426,174 +426,6 @@ char* Device::getExtensionString() { return result; } -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) -CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, - bool resetCache) - : codeCache_(targetStr, 0, AMD_PLATFORM_BUILD_NUMBER, postfix), - isCodeCacheEnabled_(enableCache) { - if (resetCache) { - // clean up the cached data of the target device - StringCache emptyCache(targetStr, 0, 0, postfix); - } -} - -bool CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C, - std::vector& inputs, - amd::opencl_driver::Buffer* output, - std::vector& options, std::string& buildLog) { - std::string cacheOpt; - cacheOpt = std::accumulate(begin(options), end(options), cacheOpt); - - bool ret = false; - bool cachedCodeExist = false; - std::vector bcSet; - if (isCodeCacheEnabled_) { - using namespace amd::opencl_driver; - - for (auto& input : inputs) { - assert(input->Type() == DT_LLVM_BC); - - BufferReference* bc = reinterpret_cast(input); - StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()}; - bcSet.push_back(cachedData); - } - - std::string dstData = ""; - if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData, - "Link LLVM Bitcodes")) { - std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); - cachedCodeExist = true; - } - } - - if (!cachedCodeExist) { - if (!C->LinkLLVMBitcode(inputs, output, options)) { - return false; - } - - if (isCodeCacheEnabled_) { - std::string dstData(output->Buf().data(), output->Buf().size()); - if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { - buildLog += "Warning: Failed to caching codes.\n"; - LogWarning("Caching codes failed!"); - } - } - } - - return true; -} - -bool CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C, - std::vector& inputs, - amd::opencl_driver::Buffer* output, - std::vector& options, - std::string& buildLog) { - std::string cacheOpt; - for (uint i = 0; i < options.size(); i++) { - // skip the header file option, which is associated with the -cl-std= option - if (options[i].compare("-include-pch") == 0) { - i++; - continue; - } - cacheOpt += options[i]; - } - - bool ret = false; - bool cachedCodeExist = false; - std::vector bcSet; - if (isCodeCacheEnabled_) { - using namespace amd::opencl_driver; - - bool checkCache = true; - for (auto& input : inputs) { - if (input->Type() == DT_CL) { - BufferReference* bc = reinterpret_cast(input); - StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()}; - bcSet.push_back(cachedData); - } else if (input->Type() == DT_CL_HEADER) { - FileReference* bcFile = reinterpret_cast(input); - std::string bc; - bcFile->ReadToString(bc); - StringCache::CachedData cachedData = {bc.c_str(), bc.size()}; - bcSet.push_back(cachedData); - } else { - buildLog += "Error: unsupported bitcode type for checking cache.\n"; - checkCache = false; - break; - } - } - - std::string dstData = ""; - if (checkCache && - codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData, - "Compile to LLVM Bitcodes")) { - std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); - cachedCodeExist = true; - } - } - - if (!cachedCodeExist) { - if (!C->CompileToLLVMBitcode(inputs, output, options)) { - return false; - } - - if (isCodeCacheEnabled_) { - std::string dstData(output->Buf().data(), output->Buf().size()); - if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { - buildLog += "Warning: Failed to caching codes.\n"; - LogWarning("Caching codes failed!"); - } - } - } - - return true; -} - -bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C, - std::vector& inputs, - amd::opencl_driver::Buffer* output, - std::vector& options, - std::string& buildLog) { - std::string cacheOpt; - cacheOpt = std::accumulate(begin(options), end(options), cacheOpt); - - bool ret = false; - bool cachedCodeExist = false; - std::vector bcSet; - if (isCodeCacheEnabled_) { - for (auto& input : inputs) { - assert(input->Type() == amd::opencl_driver::DT_LLVM_BC); - - amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*)input; - StringCache::CachedData cachedData = {bc->Buf().data(), bc->Size()}; - bcSet.push_back(cachedData); - } - - std::string dstData = ""; - if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData, - "Compile and Link Executable")) { - std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); - cachedCodeExist = true; - } - } - - if (!cachedCodeExist) { - if (!C->CompileAndLinkExecutable(inputs, output, options)) { - return false; - } - - if (isCodeCacheEnabled_) { - std::string dstData(output->Buf().data(), output->Buf().size()); - if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { - buildLog += "Warning: Failed to caching codes.\n"; - LogWarning("Caching codes failed!"); - } - } - } - - return true; -} -#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY) } // namespace amd diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index 10d8244250..ce6b3bef6b 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -18,13 +18,7 @@ #include "devprogram.hpp" #include "devkernel.hpp" #include "amdocl/cl_profile_amd.h" - -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) -#include "caching/cache.hpp" -#include "driver/AmdCompiler.h" -#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY) #include "acl.h" - #include "hwdebug.hpp" #include @@ -69,9 +63,6 @@ class SvmUnmapMemoryCommand; class TransferBufferFileCommand; class HwDebugManager; class Device; -#ifndef USE_COMGR_LIBRARY -class CacheCompilation; -#endif struct KernelParameterDescriptor; struct Coord3D; @@ -1378,10 +1369,6 @@ class Device : public RuntimeObject { // current device std::vector p2p_access_devices_; -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) - amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); } -#endif - //! Checks if OCL runtime can use code object manager for compilation bool ValidateComgr(); @@ -1427,10 +1414,6 @@ class Device : public RuntimeObject { BlitProgram* blitProgram_; //!< Blit program info static AppProfile appProfile_; //!< application profile HwDebugManager* hwDebugMgr_; //!< Hardware Debug manager -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) - //! Compilation with cache support - std::unique_ptr cacheCompilation_; -#endif static amd::Context* glb_ctx_; //!< Global context with all devices static amd::Monitor p2p_stage_ops_; //!< Lock to serialise cache for the P2P resources @@ -1450,45 +1433,6 @@ class Device : public RuntimeObject { uint32_t index_; //!< Unique device index }; -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) -//! Compilation process with cache support. -class CacheCompilation : public amd::HeapObject { - public: - enum COMPILER_OPERATION { LINK_LLVM_BITCODES = 0, COMPILE_TO_LLVM, COMPILE_AND_LINK_EXEC }; - - //! Constructor - CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, bool resetCache); - - //! NB, the cacheOpt argument is used for specifying the operation - //! condition, normally would be the same as the options argument. - //! However, the cacheOpt argument should not include any option - //! that would be modified each time but not affect the operation, - //! e.g. output file name. - - //! Link LLVM bitcode - bool linkLLVMBitcode(amd::opencl_driver::Compiler* C, - std::vector& inputs, - amd::opencl_driver::Buffer* output, std::vector& options, - std::string& buildLog); - - //! Compile to LLVM bitcode - bool compileToLLVMBitcode(amd::opencl_driver::Compiler* C, - std::vector& inputs, - amd::opencl_driver::Buffer* output, std::vector& options, - std::string& buildLog); - - //! Compile and link executable - bool compileAndLinkExecutable(amd::opencl_driver::Compiler* C, - std::vector& inputs, - amd::opencl_driver::Buffer* output, - std::vector& options, std::string& buildLog); - - private: - StringCache codeCache_; //! Cached codes - const bool isCodeCacheEnabled_; //! Code cache enable -}; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - /*! @} * @} */ diff --git a/rocclr/runtime/device/devkernel.cpp b/rocclr/runtime/device/devkernel.cpp index e55c082a35..aa40224fa3 100644 --- a/rocclr/runtime/device/devkernel.cpp +++ b/rocclr/runtime/device/devkernel.cpp @@ -17,7 +17,7 @@ #include "acl.h" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) #include "llvm/Support/AMDGPUMetadata.h" typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; @@ -26,7 +26,7 @@ using llvm::AMDGPU::HSAMD::AccessQualifier; using llvm::AMDGPU::HSAMD::AddressSpaceQualifier; using llvm::AMDGPU::HSAMD::ValueKind; using llvm::AMDGPU::HSAMD::ValueType; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) namespace device { @@ -719,7 +719,7 @@ void Kernel::FindLocalWorkSize(size_t workDim, const amd::NDRange& gblWorkSize, } } // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isHidden) { switch (lcArg.mValueKind) { case ValueKind::GlobalBuffer: @@ -824,7 +824,7 @@ static const clk_value_type_t ClkValueMapType[6][6] = { }; // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline clk_value_type_t GetOclTypeOCL(const KernelArgMD& lcArg, size_t size = 0) { uint sizeType; uint numElements; @@ -981,7 +981,7 @@ static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t s #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline size_t GetArgOffsetOCL(const KernelArgMD& lcArg) { return lcArg.mOffset; } static inline size_t GetArgAlignmentOCL(const KernelArgMD& lcArg) { return lcArg.mAlign; } @@ -1027,7 +1027,7 @@ static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) { #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline size_t GetArgPointeeAlignmentOCL(const KernelArgMD& lcArg) { if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) { uint32_t align = lcArg.mPointeeAlign; @@ -1052,7 +1052,7 @@ static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) { #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline bool GetReadOnlyOCL(const KernelArgMD& lcArg) { if ((lcArg.mValueKind == ValueKind::GlobalBuffer) || (lcArg.mValueKind == ValueKind::Image)) { switch (lcArg.mAccQual) { @@ -1082,7 +1082,7 @@ static inline bool GetReadOnlyOCL(const aclArgData* argInfo) { #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline int GetArgSizeOCL(const KernelArgMD& lcArg) { return lcArg.mSize; } #endif @@ -1125,7 +1125,7 @@ inline static int GetArgSizeOCL(const aclArgData* argInfo) { #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const KernelArgMD& lcArg) { if (lcArg.mValueKind == ValueKind::DynamicSharedPointer) { return CL_KERNEL_ARG_ADDRESS_LOCAL; @@ -1181,7 +1181,7 @@ static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const KernelArgMD& lcArg) { if (lcArg.mValueKind == ValueKind::Image) { switch (lcArg.mAccQual) { @@ -1216,7 +1216,7 @@ static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgDat #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const KernelArgMD& lcArg) { cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE; if (lcArg.mValueKind == ValueKind::GlobalBuffer || @@ -1271,7 +1271,6 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* a #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) #if defined(USE_COMGR_LIBRARY) bool Kernel::GetAttrCodePropMetadata( const amd_comgr_metadata_node_t kernelMetaNode, KernelMD* kernelMD) { @@ -1564,83 +1563,7 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD) { params.insert(params.end(), hiddenParams.begin(), hiddenParams.end()); createSignature(params, numParams, amd::KernelSignature::ABIVersion_2); } -#else // not define USE_COMGR_LIBRARY -void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) { - // Iterate through the arguments and insert into parameterList - device::Kernel::parameters_t params; - device::Kernel::parameters_t hiddenParams; - amd::KernelParameterDescriptor desc; - size_t offset = 0; - size_t offsetStruct = argBufferSize; - - for (size_t i = 0; i < kernelMD.mArgs.size(); ++i) { - const KernelArgMD& lcArg = kernelMD.mArgs[i]; - - size_t size = GetArgSizeOCL(lcArg); - size_t alignment = GetArgAlignmentOCL(lcArg); - bool isHidden = false; - desc.info_.oclObject_ = GetOclArgumentTypeOCL(lcArg, &isHidden); - - // Allocate the hidden arguments, but abstraction layer will skip them - if (isHidden) { - - if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::HiddenCompletionAction) { - setDynamicParallelFlag(true); - } - - offset = amd::alignUp(offset, alignment); - desc.offset_ = offset; - desc.size_ = size; - offset += size; - hiddenParams.push_back(desc); - continue; - } - - desc.name_ = lcArg.mName.c_str(); - desc.type_ = GetOclTypeOCL(lcArg, size); - desc.typeName_ = lcArg.mTypeName.c_str(); - - desc.addressQualifier_ = GetOclAddrQualOCL(lcArg); - desc.accessQualifier_ = GetOclAccessQualOCL(lcArg); - desc.typeQualifier_ = GetOclTypeQualOCL(lcArg); - desc.info_.arrayIndex_ = GetArgPointeeAlignmentOCL(lcArg); - desc.size_ = size; - - // These objects have forced data size to uint64_t - if ((desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) || - (desc.info_.oclObject_ == amd::KernelParameterDescriptor::SamplerObject) || - (desc.info_.oclObject_ == amd::KernelParameterDescriptor::QueueObject)) { - offset = amd::alignUp(offset, sizeof(uint64_t)); - desc.offset_ = offset; - offset += sizeof(uint64_t); - } - else { - offset = amd::alignUp(offset, alignment); - desc.offset_ = offset; - offset += size; - } - - // Update read only flag - desc.info_.readOnly_ = GetReadOnlyOCL(lcArg); - - params.push_back(desc); - - if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject) { - flags_.imageEna_ = true; - if (desc.accessQualifier_ != CL_KERNEL_ARG_ACCESS_READ_ONLY) { - flags_.imageWriteEna_ = true; - } - } - } - - // Save the number of OCL arguments - uint32_t numParams = params.size(); - // Append the hidden arguments to the OCL arguments - params.insert(params.end(), hiddenParams.begin(), hiddenParams.end()); - createSignature(params, numParams, amd::KernelSignature::ABIVersion_2); -} #endif // defined(USE_COMGR_LIBRARY) -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) // ================================================================================================ #if defined(WITH_COMPILER_LIB) @@ -1724,7 +1647,7 @@ void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) { #endif // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) void Kernel::InitPrintf(const std::vector& printfInfoStrings) { for (auto str : printfInfoStrings) { std::vector tokens; @@ -1814,7 +1737,7 @@ void Kernel::InitPrintf(const std::vector& printfInfoStrings) { // ] } } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) // ================================================================================================ #if defined(WITH_COMPILER_LIB) diff --git a/rocclr/runtime/device/devkernel.hpp b/rocclr/runtime/device/devkernel.hpp index 0980537333..c9139762d7 100644 --- a/rocclr/runtime/device/devkernel.hpp +++ b/rocclr/runtime/device/devkernel.hpp @@ -9,7 +9,7 @@ #include "platform/memory.hpp" #include "devwavelimiter.hpp" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) namespace llvm { namespace AMDGPU { namespace HSAMD { @@ -25,7 +25,6 @@ struct RuntimeHandle { uint32_t group_segment_size; //!< From GROUP_SEGMENT_FIXED_SIZE }; -#if defined(USE_COMGR_LIBRARY) #include "amd_comgr.h" #include "llvm/Support/AMDGPUMetadata.h" typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; @@ -285,7 +284,6 @@ static const std::map KernelFieldMapV3 = #endif // defined(USE_COMGR_LIBRARY) -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) namespace amd { namespace hsa { @@ -492,7 +490,6 @@ class Kernel : public amd::HeapObject { protected: //! Initializes the abstraction layer kernel parameters -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) #if defined(USE_COMGR_LIBRARY) void InitParameters(const amd_comgr_metadata_node_t kernelMD); @@ -516,9 +513,6 @@ class Kernel : public amd::HeapObject { //! Returns the kernel code object version const uint32_t codeObjectVer() const { return prog().codeObjectVer(); } -#else - void InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize); -#endif //! Initializes HSAIL Printf metadata and info for LC void InitPrintf(const std::vector& printfInfoStrings); #endif diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp index 961f80b043..5a08b9e8eb 100644 --- a/rocclr/runtime/device/devprogram.cpp +++ b/rocclr/runtime/device/devprogram.cpp @@ -12,15 +12,6 @@ #include "utils/libUtils.h" #include "comgrctx.hpp" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -#ifndef USE_COMGR_LIBRARY -#include "driver/AmdCompiler.h" -#include "libraries.amdgcn.inc" -#include "opencl1.2-c.amdgcn.inc" -#include "opencl2.0-c.amdgcn.inc" -#endif -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - #include #include #include @@ -37,11 +28,11 @@ #include "spirv/spirvUtils.h" #include "acl.h" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) #include "llvm/Support/AMDGPUMetadata.h" typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) #ifdef EARLY_INLINE #define AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-early-inline-all" @@ -134,7 +125,7 @@ bool Program::compileImpl(const std::string& sourceCode, } // ================================================================================================ -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static std::string llvmBin_(amd::Os::getEnvironment("LLVM_BIN")); #if defined(ATI_OS_LINUX) @@ -174,25 +165,7 @@ static void checkLLVM_BIN() { } #endif // defined(ATI_OS_LINUX) -#if !defined(USE_COMGR_LIBRARY) -std::unique_ptr Program::newCompilerInstance() { -#if defined(ATI_OS_LINUX) - pthread_once(&once, checkLLVM_BIN); -#endif // defined(ATI_OS_LINUX) -#if defined(DEBUG) - std::string clangExe(llvmBin_ + LINUX_SWITCH("/clang", "\\clang.exe")); - struct stat buf; - if (stat(clangExe.c_str(), &buf)) { - std::string msg("Could not find the Clang binary in " + llvmBin_); - LogWarning(msg.c_str()); - } -#endif // defined(DEBUG) - - return std::unique_ptr( - amd::opencl_driver::CompilerFactory().CreateAMDGPUCompiler(llvmBin_)); -} -#endif // !defined(USE_COMGR_LIBRARY) -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) // ================================================================================================ @@ -777,214 +750,6 @@ bool Program::compileImplLC(const std::string& sourceCode, amd::Comgr::destroy_data_set(inputs); return ret; } -#else // not using COMgr -bool Program::compileImplLC(const std::string& sourceCode, - const std::vector& headers, - const char** headerIncludeNames, amd::option::Options* options) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - const char* xLang = options->oVariables->XLang; - if (xLang != nullptr) { - if (strcmp(xLang, "asm") == 0) { - clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size()); - return true; - } - else if (!strcmp(xLang, "cl")) { - buildLog_ += "Unsupported language: \"" + std::string(xLang) + "\".\n"; - return false; - } - } - - using namespace amd::opencl_driver; - std::unique_ptr C(newCompilerInstance()); - std::vector inputs; - - Data* input = C->NewBufferReference(DT_CL, sourceCode.c_str(), sourceCode.length()); - if (input == nullptr) { - buildLog_ += "Error while creating data from source code"; - return false; - } - - inputs.push_back(input); - - amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC); - if (output == nullptr) { - buildLog_ += "Error while creating buffer for the LLVM bitcode"; - return false; - } - - // Set the options for the compiler - // Some options are set in Clang AMDGPUToolChain (like -m64) - std::ostringstream ostrstr; - std::copy(options->clangOptions.begin(), options->clangOptions.end(), - std::ostream_iterator(ostrstr, " ")); - - std::string driverOptions(ostrstr.str()); - - // Setting the language - driverOptions.append(" -cl-std=").append(options->oVariables->CLStd); - - // Set the -O# - std::ostringstream optLevel; - optLevel << " -O" << options->oVariables->OptLevel; - driverOptions.append(optLevel.str()); - - // Set the machine target - driverOptions.append(" -mcpu="); - driverOptions.append(machineTarget_); - - // Set xnack option if needed - if (xnackEnabled_) { - driverOptions.append(" -mxnack"); - } - - // Set SRAM ECC option if needed - if (sramEccEnabled_) { - driverOptions.append(" -msram-ecc"); - } - else { - driverOptions.append(" -mno-sram-ecc"); - } - - driverOptions.append(options->llvmOptions); - - driverOptions.append(ProcessOptionsFlattened(options)); - - // Set whole program mode - driverOptions.append(AMDGPU_EARLY_INLINE_ALL_OPTION " -mllvm -amdgpu-prelink"); - - // Find the temp folder for the OS - std::string tempFolder = amd::Os::getTempPath(); - - // Iterate through each source code and dump it into tmp - std::fstream f; - std::vector headerFileNames(headers.size()); - std::vector newDirs; - for (size_t i = 0; i < headers.size(); ++i) { - std::string headerPath = tempFolder; - std::string headerIncludeName(headerIncludeNames[i]); - // replace / in path with current os's file separator - if (amd::Os::fileSeparator() != '/') { - for (auto& it : headerIncludeName) { - if (it == '/') it = amd::Os::fileSeparator(); - } - } - size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator()); - if (pos != std::string::npos) { - headerPath += amd::Os::fileSeparator(); - headerPath += headerIncludeName.substr(0, pos); - headerIncludeName = headerIncludeName.substr(pos + 1); - } - if (!amd::Os::pathExists(headerPath)) { - bool ret = amd::Os::createPath(headerPath); - assert(ret && "failed creating path!"); - newDirs.push_back(headerPath); - } - std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName; - headerFileNames[i] = headerFullName; - f.open(headerFullName.c_str(), std::fstream::out); - // Should we allow asserts - assert(!f.fail() && "failed creating header file!"); - f.write(headers[i]->c_str(), headers[i]->length()); - f.close(); - - Data* inc = C->NewFileReference(DT_CL_HEADER, headerFileNames[i]); - if (inc == nullptr) { - buildLog_ += "Error while creating data from headers"; - return false; - } - inputs.push_back(inc); - } - - // Set the include path for the temp folder that contains the includes - if (!headers.empty()) { - driverOptions.append(" -I"); - driverOptions.append(tempFolder); - } - - if (options->isDumpFlagSet(amd::option::DUMP_CL)) { - std::ofstream f(options->getDumpFileName(".cl").c_str(), std::ios::trunc); - if (f.is_open()) { - f << "/* Compiler options:\n" - "-c -emit-llvm -target amdgcn-amd-amdhsa -x cl " - << driverOptions << " -include opencl-c.h " - << "\n*/\n\n" - << sourceCode; - f.close(); - } - else { - buildLog_ += "Warning: opening the file to dump the OpenCL source failed.\n"; - } - } - - uint clcStd = - (options->oVariables->CLStd[2] - '0') * 100 + (options->oVariables->CLStd[4] - '0') * 10; - - std::pair hdr; - switch (clcStd) { - case 100: - case 110: - case 120: - hdr = { opencl1_2_c, opencl1_2_c_size }; - break; - case 200: - hdr = { opencl2_0_c, opencl2_0_c_size }; - break; - default: - buildLog_ += "Unsupported requested OpenCL C version (-cl-std).\n"; - return false; - } - - File* pch = C->NewTempFile(DT_CL_HEADER); - if (pch == nullptr || !pch->WriteData((const char*)hdr.first, hdr.second)) { - buildLog_ += "Error while opening the opencl-c header "; - return false; - } - - driverOptions.append(" -include-pch " + pch->Name()); - driverOptions.append(" -Xclang -fno-validate-pch"); - driverOptions.append(" -Xclang -target-feature -Xclang -code-object-v3"); - - // Tokenize the options string into a vector of strings - std::istringstream istrstr(driverOptions); - std::istream_iterator sit(istrstr), end; - std::vector params(sit, end); - - // Compile source to IR - bool ret = - device().cacheCompilation()->compileToLLVMBitcode(C.get(), inputs, output, params, buildLog_); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n"; - return false; - } - - llvmBinary_.assign(output->Buf().data(), output->Size()); - elfSectionType_ = amd::OclElf::LLVMIR; - - if (options->isDumpFlagSet(amd::option::DUMP_BC_ORIGINAL)) { - std::ofstream f(options->getDumpFileName("_original.bc").c_str(), - std::ios::binary | std::ios::trunc); - if (f.is_open()) { - f.write(llvmBinary_.data(), llvmBinary_.size()); - f.close(); - } - else { - buildLog_ += "Warning: opening the file to dump the compiled IR failed.\n"; - } - } - - if (clBinary()->saveSOURCE()) { - clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.size()); - } - if (clBinary()->saveLLVMIR()) { - clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(), - false); - // store the original compile options - clBinary()->storeCompileOptions(compileOptions_); - } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - return true; -} #endif // defined(USE_COMGR_LIBRARY) // ================================================================================================ @@ -1208,101 +973,6 @@ bool Program::linkImplLC(const std::vector& inputPrograms, return linkImpl(options); } - -#else // not using COMgr -bool Program::linkImplLC(const std::vector& inputPrograms, - amd::option::Options* options, bool createLibrary) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - using namespace amd::opencl_driver; - std::unique_ptr C(newCompilerInstance()); - - std::vector inputs; - for (auto program : inputPrograms) { - if (program->llvmBinary_.empty()) { - if (program->clBinary() == NULL) { - buildLog_ += "Internal error: Input program not compiled!\n"; - return false; - } - - // We are using CL binary directly. - // Setup elfIn() and try to load llvmIR from binary - // This elfIn() will be released at the end of build by finiBuild(). - if (!program->clBinary()->setElfIn()) { - buildLog_ += "Internal error: Setting input OCL binary failed!\n"; - return false; - } - if (!program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_)) { - buildLog_ += "Internal error: Failed loading compiled binary!\n"; - return false; - } - } - - if (program->elfSectionType_ != amd::OclElf::LLVMIR) { - buildLog_ += "Error: Input binary format is not supported\n."; - return false; - } - - Data* input = C->NewBufferReference(DT_LLVM_BC, (const char*)program->llvmBinary_.data(), - program->llvmBinary_.size()); - - if (!input) { - buildLog_ += "Internal error: Failed to open the compiled programs.\n"; - return false; - } - - // release elfIn() for the program - program->clBinary()->resetElfIn(); - - inputs.push_back(input); - } - - // open the linked output - amd::opencl_driver::Buffer* output = C->NewBuffer(DT_LLVM_BC); - - if (!output) { - buildLog_ += "Error: Failed to open the linked program.\n"; - return false; - } - - std::vector linkOptions; - - // NOTE: The params is also used to identy cached code object. This parameter - // should not contain any dyanamically generated filename. - bool ret = device().cacheCompilation()->linkLLVMBitcode( - C.get(), inputs, output, linkOptions, buildLog_); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n"; - return false; - } - - llvmBinary_.assign(output->Buf().data(), output->Size()); - elfSectionType_ = amd::OclElf::LLVMIR; - - if (clBinary()->saveLLVMIR()) { - clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(), - false); - // store the original link options - clBinary()->storeLinkOptions(linkOptions_); - // store the original compile options - clBinary()->storeCompileOptions(compileOptions_); - } - - // skip the rest if we are building an opencl library - if (createLibrary) { - setType(TYPE_LIBRARY); - if (!createBinary(options)) { - buildLog_ += "Internal error: creating OpenCL binary failed\n"; - return false; - } - return true; - } - - return linkImpl(options); -#else - return false; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -} #endif // defined(USE_COMGR_LIBRARY) // ================================================================================================ @@ -1596,264 +1266,6 @@ bool Program::linkImplLC(amd::option::Options* options) { return true; } -#else // not using COMgr -bool Program::linkImplLC(amd::option::Options* options) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - using namespace amd::opencl_driver; - internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? - true : false; - std::vector inputs; - std::unique_ptr C(newCompilerInstance()); - bool bLinkLLVMBitcode = true; - aclType continueCompileFrom = llvmBinary_.empty() ? - getNextCompilationStageFromBinary(options) : ACL_TYPE_LLVMIR_BINARY; - - switch (continueCompileFrom) { - case ACL_TYPE_CG: - case ACL_TYPE_LLVMIR_BINARY: { - break; - } - case ACL_TYPE_ASM_TEXT: { - char* section; - size_t sz; - clBinary()->elfOut()->getSection(amd::OclElf::SOURCE, §ion, &sz); - Data* input = C->NewBufferReference(DT_ASSEMBLY, section, sz); - if (!input) { - buildLog_ += "Error: Failed to open the assembler text.\n"; - return false; - } - inputs.push_back(input); - bLinkLLVMBitcode = false; - break; - } - case ACL_TYPE_ISA: { - binary_t isaBinary = binary(); - return setKernels(options, (void*)isaBinary.first, isaBinary.second); - break; - } - default: - buildLog_ += "Error while Codegen phase: the binary is incomplete \n"; - return false; - } - - // call LinkLLVMBitcode - if (bLinkLLVMBitcode) { - // open the input IR source - Data* input = C->NewBufferReference(DT_LLVM_BC, llvmBinary_.data(), llvmBinary_.size()); - - if (!input) { - buildLog_ += "Error: Failed to open the compiled program.\n"; - return false; - } - - inputs.push_back(input); // must be the first input - // open the bitcode libraries - Data* opencl_bc = - C->NewBufferReference(DT_LLVM_BC, (const char*)opencl_lib, opencl_lib_size); - Data* ocml_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ocml_lib, ocml_lib_size); - Data* ockl_bc = C->NewBufferReference(DT_LLVM_BC, (const char*)ockl_lib, ockl_lib_size); - - if (!opencl_bc || !ocml_bc || !ockl_bc) { - buildLog_ += "Error: Failed to open the bitcode library.\n"; - return false; - } - - inputs.push_back(opencl_bc); // depends on oclm & ockl - inputs.push_back(ockl_bc); - inputs.push_back(ocml_bc); - - // open the control functions - auto isa_version = get_oclc_isa_version(device().info().gfxipVersion_); - if (!std::get<1>(isa_version)) { - buildLog_ += "Error: Linking for this device is not supported\n"; - return false; - } - - Data* isa_version_bc = - C->NewBufferReference(DT_LLVM_BC, (const char*)std::get<1>(isa_version), std::get<2>(isa_version)); - - if (!isa_version_bc) { - buildLog_ += "Error: Failed to open the control functions.\n"; - return false; - } - - inputs.push_back(isa_version_bc); - - auto correctly_rounded_sqrt = - get_oclc_correctly_rounded_sqrt(options->oVariables->FP32RoundDivideSqrt); - Data* correctly_rounded_sqrt_bc = C->NewBufferReference(DT_LLVM_BC, - reinterpret_cast(std::get<1>(correctly_rounded_sqrt)), - std::get<2>(correctly_rounded_sqrt)); - - auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero || - AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 || - (device().info().gfxipVersion_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)); - Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC, - reinterpret_cast(std::get<1>(daz_opt)), std::get<2>(daz_opt)); - - auto finite_only = get_oclc_finite_only(options->oVariables->FiniteMathOnly || - options->oVariables->FastRelaxedMath); - Data* finite_only_bc = C->NewBufferReference(DT_LLVM_BC, - reinterpret_cast(std::get<1>(finite_only)), std::get<2>(finite_only)); - - auto unsafe_math = get_oclc_unsafe_math(options->oVariables->UnsafeMathOpt || - options->oVariables->FastRelaxedMath); - Data* unsafe_math_bc = C->NewBufferReference(DT_LLVM_BC, - reinterpret_cast(std::get<1>(unsafe_math)), std::get<2>(unsafe_math)); - - auto wavefrontsize64 = get_oclc_wavefrontsize64(device().settings().lcWavefrontSize64_); - Data* wavefrontsize64_bc = C->NewBufferReference(DT_LLVM_BC, - reinterpret_cast(std::get<1>(wavefrontsize64)), std::get<2>(wavefrontsize64)); - - if (!correctly_rounded_sqrt_bc || !daz_opt_bc || !finite_only_bc || !unsafe_math_bc || - !wavefrontsize64_bc) { - buildLog_ += "Error: Failed to open the control functions.\n"; - return false; - } - - inputs.push_back(correctly_rounded_sqrt_bc); - inputs.push_back(daz_opt_bc); - inputs.push_back(finite_only_bc); - inputs.push_back(unsafe_math_bc); - inputs.push_back(wavefrontsize64_bc); - - // open the linked output - std::vector linkOptions; - Buffer* linked_bc = C->NewBuffer(DT_LLVM_BC); - - if (!linked_bc) { - buildLog_ += "Error: Failed to open the linked program.\n"; - return false; - } - - // NOTE: The linkOptions parameter is also used to identy cached code object. This parameter - // should not contain any dyanamically generated filename. - bool ret = device().cacheCompilation()->linkLLVMBitcode( - C.get(), inputs, linked_bc, linkOptions, buildLog_); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n"; - return false; - } - - if (options->isDumpFlagSet(amd::option::DUMP_BC_LINKED)) { - std::ofstream f(options->getDumpFileName("_linked.bc").c_str(), - std::ios::binary | std::ios::trunc); - if (f.is_open()) { - f.write(linked_bc->Buf().data(), linked_bc->Size()); - f.close(); - } - else { - buildLog_ += "Warning: opening the file to dump the linked IR failed.\n"; - } - } - - inputs.clear(); - inputs.push_back(linked_bc); - } - - Buffer* out_exec = C->NewBuffer(DT_EXECUTABLE); - if (!out_exec) { - buildLog_ += "Error: Failed to create the linked executable.\n"; - return false; - } - - std::string codegenOptions(options->llvmOptions); - - // Set the machine target - codegenOptions.append(" -mcpu="); - codegenOptions.append(machineTarget_); - - // Set xnack option if needed - if (xnackEnabled_) { - codegenOptions.append(" -mxnack"); - } - - // Set SRAM ECC option if needed - if (sramEccEnabled_) { - codegenOptions.append(" -msram-ecc"); - } - else { - codegenOptions.append(" -mno-sram-ecc"); - } - - // Set the -O# - std::ostringstream optLevel; - optLevel << "-O" << options->oVariables->OptLevel; - codegenOptions.append(" ").append(optLevel.str()); - - // Pass clang options - std::ostringstream ostrstr; - std::copy(options->clangOptions.begin(), options->clangOptions.end(), - std::ostream_iterator(ostrstr, " ")); - codegenOptions.append(" ").append(ostrstr.str()); - - // Force object code v2. - codegenOptions.append(" -mno-code-object-v3"); - // Set whole program mode - codegenOptions.append(" -mllvm -amdgpu-internalize-symbols" AMDGPU_EARLY_INLINE_ALL_OPTION); - - if (!device().settings().enableWgpMode_) { - codegenOptions.append(" -mcumode"); - } - - if (device().settings().lcWavefrontSize64_) { - codegenOptions.append(" -mwavefrontsize64"); - } - - // Tokenize the options string into a vector of strings - std::istringstream strstr(codegenOptions); - std::istream_iterator sit(strstr), end; - std::vector params(sit, end); - - // NOTE: The params is also used to identy cached code object. This parameter - // should not contain any dyanamically generated filename. - bool ret = device().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params, - buildLog_); - buildLog_ += C->Output(); - if (!ret) { - if (continueCompileFrom == ACL_TYPE_ASM_TEXT) { - buildLog_ += "Error: Creating the executable from ISA assembly text failed.\n"; - } - else { - buildLog_ += "Error: Creating the executable from LLVM IRs failed.\n"; - } - return false; - } - - if (options->isDumpFlagSet(amd::option::DUMP_O)) { - std::ofstream f(options->getDumpFileName(".so").c_str(), std::ios::binary | std::ios::trunc); - if (f.is_open()) { - f.write(out_exec->Buf().data(), out_exec->Size()); - f.close(); - } - else { - buildLog_ += "Warning: opening the file to dump the code object failed.\n"; - } - } - - if (options->isDumpFlagSet(amd::option::DUMP_ISA)) { - std::string name = options->getDumpFileName(".s"); - File* dump = C->NewFile(DT_INTERNAL, name); - if (!C->DumpExecutableAsText(out_exec, dump)) { - buildLog_ += "Warning: failed to dump code object.\n"; - } - } - - // Call the device layer to setup all available kernels on the actual device - if (!setKernels(options, out_exec->Buf().data(), out_exec->Size())) { - return false; - } - - // Save the binary and type - clBinary()->saveBIFBinary(reinterpret_cast(out_exec->Buf().data()), out_exec->Size()); - setType(TYPE_EXECUTABLE); - - return true; -#else - return false; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -} #endif // defined(USE_COMGR_LIBRARY) @@ -2640,7 +2052,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector& completeSt bool& needOptionsCheck) { aclType from = ACL_TYPE_DEFAULT; if (isLC()) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) completeStages.clear(); needOptionsCheck = true; //! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT? @@ -2678,7 +2090,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector& completeSt default: break; } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) } else { #if defined(WITH_COMPILER_LIB) acl_error errorCode; @@ -3001,7 +2413,7 @@ bool Program::createKernelMetadataMap() { #endif bool Program::FindGlobalVarSize(void* binary, size_t binSize) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) size_t progvarsTotalSize = 0; size_t dynamicSize = 0; size_t progvarsWriteSize = 0; @@ -3044,7 +2456,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { note->n_namesz == sizeof "AMD" && !memcmp(name, "AMD", note->n_namesz)) || (note->n_type == 32 /* NT_AMD_AMDGPU_HSA_METADATA V3 */ && note->n_namesz == sizeof "AMDGPU" && !memcmp(name, "AMDGPU", note->n_namesz))) { -#if defined(USE_COMGR_LIBRARY) amd_comgr_status_t status; amd_comgr_data_t binaryData; @@ -3064,16 +2475,6 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { buildLog_ += "Error: COMGR fails to get the metadata.\n"; return false; } -#else - std::string metadataStr((const char*)desc, (size_t)note->n_descsz); - metadata_ = new CodeObjectMD(); - if (llvm::AMDGPU::HSAMD::fromString(metadataStr, *metadata_)) { - buildLog_ += "Error: failed to process metadata\n"; - return false; - } - // We've found and loaded the runtime metadata, exit the - // note record loop now. -#endif metadata_found = true; break; } @@ -3102,13 +2503,11 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { return false; } -#if defined(USE_COMGR_LIBRARY) if (!createKernelMetadataMap()) { buildLog_ += "Error: create kernel metadata map using COMgr\n"; return false; } -#endif progvarsTotalSize -= dynamicSize; setGlobalVariableTotalSize(progvarsTotalSize); @@ -3116,7 +2515,7 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { if (progvarsWriteSize != dynamicSize) { hasGlobalStores_ = true; } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) return true; } diff --git a/rocclr/runtime/device/devprogram.hpp b/rocclr/runtime/device/devprogram.hpp index a4bf47986c..33a18b3874 100644 --- a/rocclr/runtime/device/devprogram.hpp +++ b/rocclr/runtime/device/devprogram.hpp @@ -9,13 +9,8 @@ #include "platform/memory.hpp" #include "devwavelimiter.hpp" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -#ifndef USE_COMGR_LIBRARY -#include "driver/AmdCompiler.h" -#else +#if defined(USE_COMGR_LIBRARY) #include "amd_comgr.h" -#endif -//#include "llvm/Support/AMDGPUMetadata.h" namespace llvm { namespace AMDGPU { @@ -28,8 +23,7 @@ namespace llvm { #define LC_METADATA 1 typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -//typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) #ifndef LC_METADATA typedef char CodeObjectMD; @@ -319,11 +313,6 @@ class Program : public amd::HeapObject { void setType(type_t newType) { type_ = newType; } -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) - //! Return a new transient compiler instance. - static std::unique_ptr newCompilerInstance(); -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) - /* \brief Returns the next stage to compile from, based on sections in binary, * also returns completeStages in a vector, which contains at least ACL_TYPE_DEFAULT, * sets needOptionsCheck to true if options check is needed to decide whether or not to recompile diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp index b2c9a3947e..933b011bba 100644 --- a/rocclr/runtime/device/pal/paldevice.cpp +++ b/rocclr/runtime/device/pal/paldevice.cpp @@ -307,35 +307,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64; - if (settings().useLightning_) { -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) - // create compilation object with cache support - int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100; - int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10; - int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10; - - // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) - // with dash as delimiter to be compatible with Windows directory name - std::ostringstream cacheTarget; - cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; - if (hwInfo_->xnackEnabled_) { - cacheTarget << "+xnack"; - } - if (info_.sramEccEnabled_) { - cacheTarget << "+sram-ecc"; - } - - // Create CacheCompilation for the offline device - amd::CacheCompilation* compObj = new amd::CacheCompilation( - cacheTarget.str(), "_null_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); - if (!compObj) { - LogError("Unable to create cache compilation object!"); - return false; - } - - cacheCompilation_.reset(compObj); -#endif - } else { + if (!settings().useLightning_) { #if defined(WITH_COMPILER_LIB) const char* library = getenv("HSA_COMPILER_LIBRARY"); aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8), @@ -1029,34 +1001,7 @@ bool Device::create(Pal::IDevice* device) { allocedMem[i] = 0; } - if (settings().useLightning_) { -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) - // create compilation object with cache support - int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100; - int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10; - int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10; - - // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) - // with dash as delimiter to be compatible with Windows directory name - std::ostringstream cacheTarget; - cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; - if (isXNACKSupported) { - cacheTarget << "+xnack"; - } - if (info_.sramEccEnabled_) { - cacheTarget << "+sram-ecc"; - } - - amd::CacheCompilation* compObj = new amd::CacheCompilation( - cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); - if (!compObj) { - LogError("Unable to create cache compilation object!"); - return false; - } - - cacheCompilation_.reset(compObj); -#endif - } else { + if (!settings().useLightning_) { #if defined(WITH_COMPILER_LIB) const char* library = getenv("HSA_COMPILER_LIBRARY"); aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8), diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp index dc4977bd3d..33f9e39f40 100644 --- a/rocclr/runtime/device/pal/palkernel.cpp +++ b/rocclr/runtime/device/pal/palkernel.cpp @@ -10,11 +10,11 @@ #include "utils/options.hpp" #include "acl.h" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) #include "llvm/Support/AMDGPUMetadata.h" typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) #include #include @@ -413,7 +413,7 @@ const LightningProgram& LightningKernel::prog() const { return reinterpret_cast(prog_); } -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) { for (const KernelMD& kernelMD : programMD->mKernels) { if (kernelMD.mName == name) { @@ -422,9 +422,7 @@ static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const s } return nullptr; } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -#if defined(USE_COMGR_LIBRARY) bool LightningKernel::init() { flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false; @@ -508,87 +506,4 @@ bool LightningKernel::init() { } #endif // defined(USE_COMGR_LIBRARY) -bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { -#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY) - flags_.internalKernel_ = - (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false; - - aqlCreateHWInfo(symbol); - - const CodeObjectMD* programMD = prog().metadata(); - assert(programMD != nullptr); - - const KernelMD* kernelMD = FindKernelMetadata(programMD, name()); - - if (kernelMD == nullptr) { - return false; - } - - // Set the argList - InitParameters(*kernelMD, argsBufferSize()); - - if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) { - const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize; - workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0]; - workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1]; - workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2]; - } - - if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) { - const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint; - workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0]; - workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1]; - workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2]; - } - - if (!kernelMD->mAttrs.mVecTypeHint.empty()) { - workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str(); - } - - if (!kernelMD->mAttrs.mRuntimeHandle.empty()) { - hsa_agent_t agent; - agent.handle = 1; - amd::hsa::loader::Symbol* rth_symbol; - - // Get the runtime handle symbol GPU address - rth_symbol = prog().GetSymbol(const_cast(kernelMD->mAttrs.mRuntimeHandle.c_str()), - const_cast(&agent)); - uint64_t symbol_address; - rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address); - - // Copy the kernel_object pointer to the runtime handle symbol GPU address - const Memory& codeSegGpu = prog().codeSegGpu(); - uint64_t offset = symbol_address - codeSegGpu.vmAddress(); - VirtualGPU* gpu = codeSegGpu.dev().xferQueue(); - - const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()}; - - codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true); - } - - // Copy wavefront size - workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_; - - workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize; - if (workGroupInfo_.size_ == 0) { - return false; - } - - InitPrintf(programMD->mPrintf); - - /*FIXME_lmoriche: - size_t sizeOfWavesPerSimdHint = sizeof(workGroupInfo_.wavesPerSimdHint_); - error = aclQueryInfo(dev().compiler(), prog().binaryElf(), - RT_WAVES_PER_SIMD_HINT, openClKernelName.c_str(), - &workGroupInfo_.wavesPerSimdHint_, &sizeOfWavesPerSimdHint); - if (error != ACL_SUCCESS) { - return false; - } - - waveLimiter_.enable(); - */ -#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY) - return true; -} - } // namespace pal diff --git a/rocclr/runtime/device/pal/palmemory.cpp b/rocclr/runtime/device/pal/palmemory.cpp index 555aa8a550..cf69049165 100644 --- a/rocclr/runtime/device/pal/palmemory.cpp +++ b/rocclr/runtime/device/pal/palmemory.cpp @@ -173,6 +173,9 @@ bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params if ((params != nullptr) && (memoryType() == Pinned)) { memRef()->gpu_ = params->gpu_; } + if (memRef() != nullptr) { +// printf("VM:%llx\n", iMem()->Desc().gpuVirtAddr); + } } return result; diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp index 1813c42bb0..c32a692bd3 100644 --- a/rocclr/runtime/device/pal/palprogram.cpp +++ b/rocclr/runtime/device/pal/palprogram.cpp @@ -17,14 +17,10 @@ #include "hsa.h" #include "hsa_ext_image.h" #include "amd_hsa_loader.hpp" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -#ifndef USE_COMGR_LIBRARY -#include "driver/AmdCompiler.h" -#include "libraries.amdgcn.inc" -#endif +#if defined(USE_COMGR_LIBRARY) #include "llvm/Support/AMDGPUMetadata.h" #include "gelf.h" -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) namespace pal { @@ -691,7 +687,7 @@ hsa_status_t PALHSALoaderContext::SamplerDestroy(hsa_agent_t agent, return HSA_STATUS_SUCCESS; } -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executable_symbol_t hSymbol, void* data) { @@ -720,20 +716,20 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl return HSA_STATUS_SUCCESS; } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) bool LightningProgram::createBinary(amd::option::Options* options) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) { LogError("Failed to create ELF binary image!"); return false; } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) return true; } bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) hsa_agent_t agent; agent.handle = 1; @@ -767,7 +763,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s return false; } -#if defined(USE_COMGR_LIBRARY) for (const auto& kernelMeta : kernelMetadataMap_) { auto kernelName = kernelMeta.first; auto kernel = @@ -785,44 +780,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s maxScratchRegs_ = std::max(static_cast(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_); } -#else - // Get the list of kernels - std::vector kernelNameList; - status = executable_->IterateSymbols(GetKernelNamesCallback, &kernelNameList); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get kernel names\n"; - return false; - } - - for (const auto& kernelName : kernelNameList) { - auto kernel = - new LightningKernel(kernelName, this, options->origOptionStr + ProcessOptionsFlattened(options)); - - kernels()[kernelName] = kernel; - - auto symbol = executable_->GetSymbol(kernelName.c_str(), &agent); - if (!symbol) { - buildLog_ += "Error: Getting kernel symbol '" + kernelName + - "' from AMD HSA Code Object failed. " - "Kernel initialization failed.\n"; - return false; - } - if (!kernel->init(symbol)) { - buildLog_ += "Error: Kernel '" + kernelName + "' initialization failed.\n"; - return false; - } - buildLog_ += kernel->buildLog(); - - kernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize); - - // Find max scratch regs used in the program. It's used for scratch buffer preallocation - // with dynamic parallelism, since runtime doesn't know which child kernel will be called - maxScratchRegs_ = - std::max(static_cast(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_); - } -#endif // defined(USE_COMGR_LIBRARY) DestroySegmentCpuAccess(); -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) return true; } diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp index 17b205f81c..dce32bce70 100644 --- a/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/rocclr/runtime/device/rocm/rocdevice.cpp @@ -19,9 +19,6 @@ #include "device/rocm/rocblit.hpp" #include "device/rocm/rocvirtual.hpp" #include "device/rocm/rocprogram.hpp" -#if defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY) -#include "driver/AmdCompiler.h" -#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY) #include "device/rocm/rocmemory.hpp" #include "device/rocm/rocglinterop.hpp" #ifdef WITH_AMDGPU_PRO @@ -659,7 +656,7 @@ bool Device::create(bool sramEccEnabled) { const char* scheduler = nullptr; -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) std::string sch = SchedulerSourceCode; if (settings().useLightning_) { if (info().cooperativeGroups_) { @@ -667,33 +664,7 @@ bool Device::create(bool sramEccEnabled) { } scheduler = sch.c_str(); } -#ifndef USE_COMGR_LIBRARY - // create compilation object with cache support - int gfxipMajor = deviceInfo_.gfxipVersion_ / 100; - int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10; - int gfxipStepping = deviceInfo_.gfxipVersion_ % 10; - - // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) - // with dash as delimiter to be compatible with Windows directory name - std::ostringstream cacheTarget; - cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; - if (settings().enableXNACK_) { - cacheTarget << "+xnack"; - } - if (info_.sramEccEnabled_) { - cacheTarget << "+sram-ecc"; - } - - amd::CacheCompilation* compObj = new amd::CacheCompilation( - cacheTarget.str(), "_rocm", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); - if (!compObj) { - LogError("Unable to create cache compilation object!"); - return false; - } - - cacheCompilation_.reset(compObj); #endif // USE_COMGR_LIBRARY -#endif amd::Context::Info info = {0}; std::vector devices; diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp index 43cc5532c7..22c047e4b3 100644 --- a/rocclr/runtime/device/rocm/rockernel.cpp +++ b/rocclr/runtime/device/rocm/rockernel.cpp @@ -9,15 +9,12 @@ #ifndef WITHOUT_HSA_BACKEND -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) -#ifndef USE_COMGR_LIBRARY -#include "driver/AmdCompiler.h" -#endif +#if defined(USE_COMGR_LIBRARY) #include "llvm/Support/AMDGPUMetadata.h" typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) namespace roc { @@ -40,7 +37,6 @@ Kernel::Kernel(std::string name, Program* prog) kernargSegmentByteSize_(0), kernargSegmentAlignment_(0) {} -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) #if defined(USE_COMGR_LIBRARY) bool LightningKernel::init() { @@ -170,131 +166,7 @@ bool LightningKernel::init() { } return true; } -#else -static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) { - for (const KernelMD& kernelMD : programMD->mKernels) { - if (kernelMD.mName == name) { - return &kernelMD; - } - } - return nullptr; -} - -bool LightningKernel::init() { - hsa_agent_t hsaDevice = program()->hsaDevice(); - - // Pull out metadata from the ELF - const CodeObjectMD* programMD = static_cast(program())->metadata(); - assert(programMD != nullptr); - - const KernelMD* kernelMD = FindKernelMetadata(programMD, name()); - if (kernelMD == nullptr) { - return false; - } - InitParameters(*kernelMD, KernargSegmentByteSize()); - - // Set the workgroup information for the kernel - workGroupInfo_.availableLDSSize_ = program()->dev().info().localMemSizePerCU_; - assert(workGroupInfo_.availableLDSSize_ > 0); - workGroupInfo_.availableSGPRs_ = 104; - workGroupInfo_.availableVGPRs_ = 256; - - if (!kernelMD->mAttrs.mReqdWorkGroupSize.empty()) { - const auto& requiredWorkgroupSize = kernelMD->mAttrs.mReqdWorkGroupSize; - workGroupInfo_.compileSize_[0] = requiredWorkgroupSize[0]; - workGroupInfo_.compileSize_[1] = requiredWorkgroupSize[1]; - workGroupInfo_.compileSize_[2] = requiredWorkgroupSize[2]; - } - - if (!kernelMD->mAttrs.mWorkGroupSizeHint.empty()) { - const auto& workgroupSizeHint = kernelMD->mAttrs.mWorkGroupSizeHint; - workGroupInfo_.compileSizeHint_[0] = workgroupSizeHint[0]; - workGroupInfo_.compileSizeHint_[1] = workgroupSizeHint[1]; - workGroupInfo_.compileSizeHint_[2] = workgroupSizeHint[2]; - } - - if (!kernelMD->mAttrs.mVecTypeHint.empty()) { - workGroupInfo_.compileVecTypeHint_ = kernelMD->mAttrs.mVecTypeHint.c_str(); - } - - if (!kernelMD->mAttrs.mRuntimeHandle.empty()) { - hsa_agent_t agent = program()->hsaDevice(); - hsa_executable_symbol_t kernelSymbol; - hsa_status_t status; - int variable_size; - uint64_t variable_address; - - // Only kernels that could be enqueued by another kernel has the RuntimeHandle metadata. The RuntimeHandle - // metadata is a string that represents a variable from which the library code can retrieve the kernel code - // object handle of such a kernel. The address of the variable and the kernel code object handle are known - // only after the hsa executable is loaded. The below code copies the kernel code object handle to the - // address of the variable. - - status = hsa_executable_get_symbol_by_name(program()->hsaExecutable(), kernelMD->mAttrs.mRuntimeHandle.c_str(), - &agent, &kernelSymbol); - if (status != HSA_STATUS_SUCCESS) { - return false; - } - - status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, - &variable_size); - if (status != HSA_STATUS_SUCCESS) { - return false; - } - - status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, - &variable_address); - if (status != HSA_STATUS_SUCCESS) { - return false; - } - - const struct RuntimeHandle runtime_handle = { - kernelCodeHandle_, - workitemPrivateSegmentByteSize(), - WorkgroupGroupSegmentByteSize() - }; - - status = hsa_memory_copy(reinterpret_cast(variable_address), &runtime_handle, variable_size); - if (status != HSA_STATUS_SUCCESS) { - return false; - } - } - - uint32_t wavefront_size = 0; - if (hsa_agent_get_info(program()->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size) != - HSA_STATUS_SUCCESS) { - return false; - } - assert(wavefront_size > 0); - - workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_; - workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_; - workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_; - - workGroupInfo_.preferredSizeMultiple_ = wavefront_size; - - /// TODO: Are there any other fields that are getting queried from akc? - /// If so, code properties metadata should be used instead. - workGroupInfo_.usedSGPRs_ = kernelMD->mCodeProps.mNumSGPRs; - workGroupInfo_.usedVGPRs_ = kernelMD->mCodeProps.mNumVGPRs; - - workGroupInfo_.usedStackSize_ = 0; - - workGroupInfo_.wavefrontPerSIMD_ = program()->dev().info().maxWorkItemSizes_[0] / wavefront_size; - - workGroupInfo_.wavefrontSize_ = wavefront_size; - - workGroupInfo_.size_ = kernelMD->mCodeProps.mMaxFlatWorkGroupSize; - if (workGroupInfo_.size_ == 0) { - return false; - } - - InitPrintf(programMD->mPrintf); - - return true; -} #endif // defined(USE_COMGR_LIBRARY) -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) #if defined(WITH_COMPILER_LIB) bool HSAILKernel::init() { diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp index b36fcdcbea..66e48bbfbb 100644 --- a/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/rocclr/runtime/device/rocm/rocprogram.cpp @@ -7,13 +7,9 @@ #include "utils/options.hpp" #include "rockernel.hpp" -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) #include -#ifndef USE_COMGR_LIBRARY -#include "driver/AmdCompiler.h" -#include "libraries.amdgcn.inc" -#endif -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) #include "utils/bif_section_labels.hpp" #include "amd_hsa_kernel_code.h" @@ -428,17 +424,17 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner) } bool LightningProgram::createBinary(amd::option::Options* options) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) { LogError("Failed to create ELF binary image!"); return false; } -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) return true; } bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t size) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) // Write binary to memory if (type == TYPE_EXECUTABLE) { // handle code object binary assert(rawBinary != nullptr && size != 0 && "must pass in the binary"); @@ -455,12 +451,12 @@ bool LightningProgram::saveBinaryAndSetType(type_t type, void* rawBinary, size_t // Set the type of binary setType(type); -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) return true; } bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) { -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) // Find the size of global variables from the binary if (!FindGlobalVarSize(binary, binSize)) { return false; @@ -509,7 +505,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s return false; } -#if defined(USE_COMGR_LIBRARY) for (const auto &kernelMeta : kernelMetadataMap_) { const std::string kernelName = kernelMeta.first; Kernel* aKernel = new roc::LightningKernel(kernelName, this); @@ -521,103 +516,7 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s std::string::npos); kernels()[kernelName] = aKernel; } -#else - // Get the list of kernels - std::vector kernelNameList; - status = hsa_executable_iterate_agent_symbols(hsaExecutable_, agent, GetKernelNamesCallback, - (void*)&kernelNameList); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get kernel names: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - for (auto& kernelName : kernelNameList) { - hsa_executable_symbol_t kernelSymbol; - - status = hsa_executable_get_symbol_by_name(hsaExecutable_, kernelName.c_str(), &agent, - &kernelSymbol); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get the symbol: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - uint64_t kernelCodeHandle; - status = hsa_executable_symbol_get_info(kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &kernelCodeHandle); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get the kernel code: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - uint32_t workgroupGroupSegmentByteSize; - status = hsa_executable_symbol_get_info(kernelSymbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &workgroupGroupSegmentByteSize); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get group segment size info: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - uint32_t workitemPrivateSegmentByteSize; - status = hsa_executable_symbol_get_info(kernelSymbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &workitemPrivateSegmentByteSize); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get private segment size info: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - uint32_t kernargSegmentByteSize; - status = hsa_executable_symbol_get_info(kernelSymbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, - &kernargSegmentByteSize); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get kernarg segment size info: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - uint32_t kernargSegmentAlignment; - status = hsa_executable_symbol_get_info( - kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, - &kernargSegmentAlignment); - if (status != HSA_STATUS_SUCCESS) { - buildLog_ += "Error: Failed to get kernarg segment alignment info: "; - buildLog_ += hsa_strerror(status); - buildLog_ += "\n"; - return false; - } - - // FIME_lmoriche: the compiler should set the kernarg alignment based - // on the alignment requirement of the parameters. For now, bump it to - // the worse case: 128byte aligned. - kernargSegmentAlignment = std::max(kernargSegmentAlignment, 128u); - - Kernel* aKernel = new roc::LightningKernel( - kernelName, this, kernelCodeHandle, workgroupGroupSegmentByteSize, - workitemPrivateSegmentByteSize, kernargSegmentByteSize, - amd::alignUp(kernargSegmentAlignment, device().info().globalMemCacheLineSize_)); - if (!aKernel->init()) { - return false; - } - aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize); - aKernel->setInternalKernelFlag(compileOptions_.find("-cl-internal-kernel") != - std::string::npos); - kernels()[kernelName] = aKernel; - } -#endif // defined(USE_COMGR_LIBRARY) -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) return true; } diff --git a/rocclr/runtime/device/rocm/rocvirtual.cpp b/rocclr/runtime/device/rocm/rocvirtual.cpp index 1b1de74087..a22ebd189d 100644 --- a/rocclr/runtime/device/rocm/rocvirtual.cpp +++ b/rocclr/runtime/device/rocm/rocvirtual.cpp @@ -315,9 +315,9 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para "Unsupported address qualifier"); const bool readOnly = -#if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#if defined(USE_COMGR_LIBRARY) desc.typeQualifier_ == CL_KERNEL_ARG_TYPE_CONST || -#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY) +#endif // defined(USE_COMGR_LIBRARY) (mem->getMemFlags() & CL_MEM_READ_ONLY) != 0; if (!readOnly) { diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp index b6f22d5960..f8251077c8 100644 --- a/rocclr/runtime/platform/program.cpp +++ b/rocclr/runtime/platform/program.cpp @@ -150,19 +150,6 @@ cl_int Program::addDeviceProgram(Device& device, const void* image, size_t lengt delete program; return CL_INVALID_BINARY; } - -#if 0 && defined(WITH_LIGHTNING_COMPILER) - // load the compiler options from the binary if it is not provided - std::string sBinOptions = program->compileOptions(); - if (!sBinOptions.empty() && emptyOptions) { - if (!amd::option::parseAllOptions(sBinOptions, *options, false, - device.settings().useLightning_)) { - programLog_ = options->optionsLog(); - LogError("Parsing compilation options from binary failed."); - return CL_INVALID_COMPILER_OPTIONS; - } - } -#endif // defined(WITH_LIGHTNING_COMPILER) } devicePrograms_[&rootDev] = program;