From b9916d35f4e020feab3dca5821fdd78054fb9510 Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 3 Jan 2017 16:56:06 -0500 Subject: [PATCH] P4 to Git Change 1358063 by wchau@wchau_OCL_boltzmann on 2017/01/03 16:44:42 SWDEV-102698 - [OCL-LC-ROCm] Add code caching support to OpenCL program manager Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#146 edit ... //depot/stg/opencl/drivers/opencl/compiler/tools/Makefile#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/build/Makefile.runtime#65 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#205 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#280 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#25 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#49 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#17 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#10 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#261 edit --- rocclr/runtime/device/device.cpp | 204 +++++++++++++++++++++ rocclr/runtime/device/device.hpp | 68 ++++++- rocclr/runtime/device/rocm/roccompiler.cpp | 13 +- rocclr/runtime/device/rocm/rocdevice.cpp | 27 ++- rocclr/runtime/device/rocm/rocdevice.hpp | 12 +- rocclr/runtime/device/rocm/rocprogram.cpp | 19 +- rocclr/runtime/device/rocm/rocsettings.cpp | 8 + rocclr/runtime/device/rocm/rocsettings.hpp | 4 +- rocclr/runtime/utils/flags.hpp | 7 +- 9 files changed, 336 insertions(+), 26 deletions(-) diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp index b05b09691c..75f73ebb20 100644 --- a/rocclr/runtime/device/device.cpp +++ b/rocclr/runtime/device/device.cpp @@ -7,6 +7,9 @@ #include "thread/monitor.hpp" #if defined(WITH_HSA_DEVICE) +#if defined(WITH_LIGHTNING_COMPILER) +#include "SCLib_Ver.h" +#endif #include "device/rocm/rocdevice.hpp" extern amd::AppProfile* rocCreateAppProfile(); #endif @@ -614,6 +617,207 @@ Device::allocMapTarget( return devMem->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch); } + +#if defined(WITH_LIGHTNING_COMPILER) +CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, bool resetCache) + : codeCache_ ( targetStr, SC_BUILD_NUMBER, AMD_PLATFORM_BUILD_NUMBER, postfix ) + , isCodeCacheEnabled_ (enableCache) +{ + if (resetCache) { + // clean up the cached data of the target device + StringCache emptyCache(targetStr, 0, 0, postfix); + } +} + +bool +CacheCompilation::cacheProcess( + amd::opencl_driver::Compiler* C, + std::vector inputs, + amd::opencl_driver::Buffer* output, + std::vector options, + std::string cacheOpt, + COMPILER_OPERATION operation) +{ + using namespace amd::opencl_driver; + + std::vector bcSet; + bool cachedCodeExist = false; + std::string cacheMsg; + + bool checkCache = true; + switch (operation) { // for link LLVM bitcodes + case LINK_LLVM_BITCODES: + cacheMsg = "Link LLVM Bitcodes"; + for (auto &input : inputs) { + assert(input->Type() == DT_LLVM_BC); + + BufferReference* bc = reinterpret_cast(input); + StringCache::CachedData cachedData = { bc->Ptr(), bc->Size() }; + bcSet.push_back(cachedData); + } + break; + case COMPILE_TO_LLVM: + cacheMsg = "Compile to LLVM Bitcodes"; + for (auto &input : inputs) { + if (input->Type() == DT_CL) { + BufferReference* bc = reinterpret_cast(input); + StringCache::CachedData cachedData = { bc->Ptr(), bc->Size() }; + bcSet.push_back(cachedData); + } + else if (input->Type() == DT_CL_HEADER) { + FileReference* bcFile = reinterpret_cast(input); + std::string bc; + bcFile->ReadToString(bc); + StringCache::CachedData cachedData = { bc.c_str(), bc.size() }; + bcSet.push_back(cachedData); + } + else { + buildLog_ += "Error: unsupported bitcode type for checking cache.\n"; + checkCache = false; + break; + } + } + break; + case COMPILE_AND_LINK_EXEC: + cacheMsg = "Compile and Link Executable"; + for (auto &input : inputs) { + assert(input->Type() == DT_LLVM_BC); + + amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*) input; + StringCache::CachedData cachedData = { bc->Buf().data(), bc->Size() }; + bcSet.push_back(cachedData); + } + break; + default: + assert(!"Unknown compiler operation"); + checkCache = false; + break; + } + + std::string dstData = ""; + if (checkCache && + codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), + cacheOpt, dstData, cacheMsg)) { + std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); + cachedCodeExist = true; + } + + if (!cachedCodeExist) { // bitcodes not found in cache + bool ret = false; + switch (operation) { // for link LLVM bitcodes + case LINK_LLVM_BITCODES: + ret = C->LinkLLVMBitcode(inputs, output, options); + break; + case COMPILE_TO_LLVM: + ret = C->CompileToLLVMBitcode(inputs, output, options); + break; + case COMPILE_AND_LINK_EXEC: + ret = C->CompileAndLinkExecutable(inputs, output, options); + break; + } + + if (!ret) { + return false; + } + + std::string dstData(output->Buf().data(), output->Buf().size()); + if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { + buildLog_ += "Error: Failed to caching codes.\n"; + return false; + } + } + + return true; +} + +bool +CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C, + std::vector& inputs, + amd::opencl_driver::Buffer* output, + std::vector& options, + std::string cacheOpt) +{ + buildLog_.clear(); + + bool ret = false; + if (isCodeCacheEnabled_) { + ret = cacheProcess(C, inputs, output, options, cacheOpt, LINK_LLVM_BITCODES); + if (!ret) { + LogWarning("Cache look-up failed!"); + } + } + + if (!ret) { + ret = C->LinkLLVMBitcode(inputs, output, options); + buildLog_ += C->Output(); + } + + if (!ret) { + buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n"; + } + + return ret; +} + +bool +CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C, + std::vector& inputs, + amd::opencl_driver::Buffer* output, + std::vector& options, + std::string cacheOpt) +{ + buildLog_.clear(); + + bool ret = false; + if (isCodeCacheEnabled_) { + ret = cacheProcess(C, inputs, output, options, cacheOpt, COMPILE_TO_LLVM); + if (!ret) { + LogWarning("Cache look-up failed!"); + } + } + + if (!ret) { + ret = C->CompileToLLVMBitcode(inputs, output, options); + buildLog_ += C->Output(); + } + + if (!ret) { + buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n"; + } + + return ret; +} + +bool +CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C, + std::vector& inputs, + amd::opencl_driver::Buffer* output, + std::vector& options, + std::string cacheOpt) +{ + buildLog_.clear(); + + bool ret = false; + if (isCodeCacheEnabled_) { + ret = cacheProcess(C, inputs, output, options, cacheOpt, COMPILE_AND_LINK_EXEC); + if (!ret) { + LogWarning("Cache look-up failed!"); + } + } + + if (!ret) { + ret = C->CompileAndLinkExecutable(inputs, output, options); + buildLog_ += C->Output(); + } + + if (!ret) { + buildLog_ += "Error: Creating the executable failed: Compiling LLVM IRs to exeutable\n"; + } + + return ret; +} +#endif + } // namespace amd namespace device { diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index a31d7672b7..818cce3c9b 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -17,6 +17,7 @@ #include "appprofile.hpp" #if defined(WITH_LIGHTNING_COMPILER) +#include "caching/cache.hpp" #include "driver/AmdCompiler.h" #endif // defined(WITH_LIGHTNING_COMPILER) #include "acl.h" @@ -1632,7 +1633,7 @@ public: inline bool isFineGrainedSystem(bool FGSOPT = false) const { return FGSOPT && (info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) != 0 ? true : false; } - + //! Return this device's type. cl_device_type type() const { return info().type_ & ~(CL_DEVICE_TYPE_DEFAULT | CL_HSA_ENABLED_AMD @@ -1843,6 +1844,71 @@ struct KernelParameterDescriptor const char* typeName_; //!< Argument's type name }; +#if defined(WITH_LIGHTNING_COMPILER) + +//! Compilation process with cache support. +class CacheCompilation : public amd::HeapObject +{ +public: + + enum COMPILER_OPERATION { + LINK_LLVM_BITCODES = 0, + COMPILE_TO_LLVM, + COMPILE_AND_LINK_EXEC + }; + + //! Constructor + CacheCompilation(std::string targetStr, + std::string postfix, + bool enableCache, + bool resetCache); + + //! return the log string of the operation + std::string buildLog() const { return buildLog_; } + + //! NB, the cacheOpt argument is used for specifying the operation + //! condition, normally would be the same as the options argument. + //! However, the cacheOpt argument should not include any option + //! that would be modified each time but not affect the operation, + //! e.g. output file name. + + //! Link LLVM bitcode + bool linkLLVMBitcode(amd::opencl_driver::Compiler* C, + std::vector& inputs, + amd::opencl_driver::Buffer* output, + std::vector& options, + std::string cacheOpt); + + //! Compile to LLVM bitcode + bool compileToLLVMBitcode(amd::opencl_driver::Compiler* C, + std::vector& inputs, + amd::opencl_driver::Buffer* output, + std::vector& options, + std::string cacheOpt); + + //! Compile and link executable + bool compileAndLinkExecutable(amd::opencl_driver::Compiler* C, + std::vector& inputs, + amd::opencl_driver::Buffer* output, + std::vector& options, + std::string cacheOpt); + +private: + //! Invoke operations with cache support + bool cacheProcess(amd::opencl_driver::Compiler* C, + std::vector inputs, + amd::opencl_driver::Buffer* output, + std::vector options, + std::string cacheOpt, + COMPILER_OPERATION operation); + + StringCache codeCache_; //! Cached codes + const bool isCodeCacheEnabled_; //! Code cache enable + std::string buildLog_; //! log of the operation +}; + +#endif + /*! @} * @} */ diff --git a/rocclr/runtime/device/rocm/roccompiler.cpp b/rocclr/runtime/device/rocm/roccompiler.cpp index 87ab556a24..cad1f22f4e 100644 --- a/rocclr/runtime/device/rocm/roccompiler.cpp +++ b/rocclr/runtime/device/rocm/roccompiler.cpp @@ -195,19 +195,20 @@ HSAILProgram::compileImpl_LC( return false; } - driverOptions.append(" -include-pch " + pch->Name()); driverOptions.append(" -Xclang -fno-validate-pch"); + // save the options for caching before including the temporary header file for amdgcn + std::string cacheOpts = driverOptions + std::to_string(clcStd); + + driverOptions.append(" -include-pch " + pch->Name()); + // Tokenize the options string into a vector of strings std::istringstream istrstr(driverOptions); std::istream_iterator sit(istrstr), end; std::vector params(sit, end); - // Compile source to IR - bool ret = C->CompileToLLVMBitcode(inputs, output, params); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n"; + if (!dev().cacheCompilation()->compileToLLVMBitcode(C.get(), inputs, output, params, cacheOpts)) { + buildLog_ += dev().cacheCompilation()->buildLog(); return false; } diff --git a/rocclr/runtime/device/rocm/rocdevice.cpp b/rocclr/runtime/device/rocm/rocdevice.cpp index ca33e9cead..cbcd9852fc 100644 --- a/rocclr/runtime/device/rocm/rocdevice.cpp +++ b/rocclr/runtime/device/rocm/rocdevice.cpp @@ -296,7 +296,7 @@ bool NullDevice::init() { bool isOnline = false; //Check if the particular device is online for (unsigned int i=0; i< devices.size(); i++) { - if (static_cast(devices[i])->deviceInfo_.hsaDeviceId_ == + if (static_cast(devices[i])->deviceInfo_.hsaDeviceId_ == DeviceInfo[id].hsaDeviceId_){ isOnline = true; } @@ -588,6 +588,29 @@ Device::mapHSADeviceToOpenCLDevice(hsa_agent_t dev) } } +#if defined(WITH_LIGHTNING_COMPILER) + // create compilation object with cache support + int gfxipMajor = deviceInfo_.gfxipVersion_ / 100; + int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10; + int gfxipStepping = deviceInfo_.gfxipVersion_ % 10; + + // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) + // with dash as delimiter to be compatible with Windows directory name + std::ostringstream cacheTarget; + cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; + + amd::CacheCompilation* compObj = new amd::CacheCompilation(cacheTarget.str(), + "_rocm", + hsaSettings->enableCodeCache_, + hsaSettings->resetCodeCache_); + if (!compObj) { + LogError("Unable to create cache compilation object!"); + return false; + } + + cacheCompilation_.reset(compObj); +#endif + return true; } @@ -1076,7 +1099,7 @@ Device::bindExternalDevice( #else if((flags&amd::Context::GLDeviceKhr)==0) return false; - + MesaInterop::MESA_INTEROP_KIND kind=MesaInterop::MESA_INTEROP_NONE; MesaInterop::DisplayHandle display; MesaInterop::ContextHandle context; diff --git a/rocclr/runtime/device/rocm/rocdevice.hpp b/rocclr/runtime/device/rocm/rocdevice.hpp index 32213f1fb5..cd6b4505dd 100644 --- a/rocclr/runtime/device/rocm/rocdevice.hpp +++ b/rocclr/runtime/device/rocm/rocdevice.hpp @@ -80,6 +80,8 @@ public: Compiler* compiler() const { return compilerHandle_; } + const Settings &settings() const { return reinterpret_cast(*settings_); } + //! Construct an HSAIL program object from the ELF assuming it is valid virtual device::Program *createProgram(amd::option::Options* options = NULL); const AMDDeviceInfo& deviceInfo() const { @@ -193,6 +195,10 @@ public: return false; } +#if defined(WITH_LIGHTNING_COMPILER) + amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); } +#endif + protected: //! Initialize compiler instance and handle static bool initCompiler(bool isOffline); @@ -202,6 +208,10 @@ protected: static Compiler* compilerHandle_; //! Device Id for an HsaDevice AMDDeviceInfo deviceInfo_; +#if defined(WITH_LIGHTNING_COMPILER) + //! Compilation with cache support + std::unique_ptr cacheCompilation_; +#endif private: static const bool offlineDevice_; }; @@ -329,8 +339,6 @@ public: virtual void svmFree(void* ptr) const; - const Settings &settings() const { return reinterpret_cast(*settings_); } - //! Returns transfer engine object const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr(); } diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp index c09c5faa80..02e8c772f9 100644 --- a/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/rocclr/runtime/device/rocm/rocprogram.cpp @@ -531,10 +531,8 @@ HSAILProgram::linkImpl_LC( } std::vector linkOptions; - bool ret = C->LinkLLVMBitcode(inputs, output, linkOptions); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n"; + if (!dev().cacheCompilation()->linkLLVMBitcode(C.get(), inputs, output, linkOptions, "")) { + buildLog_ += dev().cacheCompilation()->buildLog(); return false; } @@ -770,10 +768,8 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options) return false; } - bool ret = C->LinkLLVMBitcode(inputs, linked_bc, linkOptions); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n"; + if (!dev().cacheCompilation()->linkLLVMBitcode(C.get(), inputs, linked_bc, linkOptions, "")) { + buildLog_ += dev().cacheCompilation()->buildLog(); return false; } @@ -812,10 +808,9 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options) std::istream_iterator sit(strstr), end; std::vector params(sit, end); - ret = C->CompileAndLinkExecutable(inputs, out_exec, params); - buildLog_ += C->Output(); - if (!ret) { - buildLog_ += "Error: Creating the executable failed: Compiling LLVM IRs to exe.\n"; + if (!dev().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params, + codegenOptions)) { + buildLog_ += dev().cacheCompilation()->buildLog(); return false; } diff --git a/rocclr/runtime/device/rocm/rocsettings.cpp b/rocclr/runtime/device/rocm/rocsettings.cpp index 0d1bdebcd5..453cc78e2c 100644 --- a/rocclr/runtime/device/rocm/rocsettings.cpp +++ b/rocclr/runtime/device/rocm/rocsettings.cpp @@ -54,6 +54,14 @@ Settings::Settings() partialDispatch_ = (partialDispatch) ? false : true; commandQueues_ = 100; //!< Field value set to maximum number //!< concurrent Virtual GPUs for ROCm backend + + // Determine if user is requesting code caching for + // compiling and linking when using Lightening Compiler + enableCodeCache_ = OCL_CODE_CACHE_ENABLE; + + // Determine if user is requesting reset the code cache + // storage (note that code cache must be enable) + resetCodeCache_ = OCL_CODE_CACHE_RESET; } bool diff --git a/rocclr/runtime/device/rocm/rocsettings.hpp b/rocclr/runtime/device/rocm/rocsettings.hpp index fc716584ad..ef679d1f93 100644 --- a/rocclr/runtime/device/rocm/rocsettings.hpp +++ b/rocclr/runtime/device/rocm/rocsettings.hpp @@ -26,7 +26,9 @@ public: uint enableImageHandle_: 1; //!< Use HSAIL image/sampler pointer uint enableNCMode_: 1; //!< Enable Non Coherent mode for system memory uint enablePartialDispatch_: 1; //!< Enable support for Partial Dispatch - uint reserved_: 26; + uint enableCodeCache_: 1; //!< Enable support for compiler code cache + uint resetCodeCache_: 1; //!< Reset the compiler code cache storage + uint reserved_: 24; }; uint value_; }; diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp index 5d4801f766..803046cb01 100644 --- a/rocclr/runtime/utils/flags.hpp +++ b/rocclr/runtime/utils/flags.hpp @@ -198,8 +198,11 @@ release_on_stg(uint, GPU_WAVE_LIMIT_DSC_THRESH, 10, \ release_on_stg(cstring, GPU_WAVE_LIMIT_DUMP, "", \ "File path prefix for dumping wave limiter output") \ release_on_stg(cstring, GPU_WAVE_LIMIT_TRACE, "", \ - "File path prefix for tracing wave limiter") - + "File path prefix for tracing wave limiter") \ +release(bool, OCL_CODE_CACHE_ENABLE, false, \ + "1 = Enable compiler code cache") \ +release(bool, OCL_CODE_CACHE_RESET, false, \ + "1 = Reset the compiler code cache storage") namespace amd {