P4 to Git Change 1358063 by wchau@wchau_OCL_boltzmann on 2017/01/03 16:44:42

SWDEV-102698 - [OCL-LC-ROCm] Add code caching support to OpenCL program manager

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#146 edit
... //depot/stg/opencl/drivers/opencl/compiler/tools/Makefile#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/build/Makefile.runtime#65 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#205 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#280 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#49 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#261 edit
This commit is contained in:
foreman
2017-01-03 16:56:06 -05:00
rodzic b48cac624d
commit b9916d35f4
9 zmienionych plików z 336 dodań i 26 usunięć
+204
Wyświetl plik
@@ -7,6 +7,9 @@
#include "thread/monitor.hpp"
#if defined(WITH_HSA_DEVICE)
#if defined(WITH_LIGHTNING_COMPILER)
#include "SCLib_Ver.h"
#endif
#include "device/rocm/rocdevice.hpp"
extern amd::AppProfile* rocCreateAppProfile();
#endif
@@ -614,6 +617,207 @@ Device::allocMapTarget(
return devMem->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch);
}
#if defined(WITH_LIGHTNING_COMPILER)
CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, bool resetCache)
: codeCache_ ( targetStr, SC_BUILD_NUMBER, AMD_PLATFORM_BUILD_NUMBER, postfix )
, isCodeCacheEnabled_ (enableCache)
{
if (resetCache) {
// clean up the cached data of the target device
StringCache emptyCache(targetStr, 0, 0, postfix);
}
}
bool
CacheCompilation::cacheProcess(
amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*> inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string> options,
std::string cacheOpt,
COMPILER_OPERATION operation)
{
using namespace amd::opencl_driver;
std::vector<StringCache::CachedData> bcSet;
bool cachedCodeExist = false;
std::string cacheMsg;
bool checkCache = true;
switch (operation) { // for link LLVM bitcodes
case LINK_LLVM_BITCODES:
cacheMsg = "Link LLVM Bitcodes";
for (auto &input : inputs) {
assert(input->Type() == DT_LLVM_BC);
BufferReference* bc = reinterpret_cast<BufferReference*>(input);
StringCache::CachedData cachedData = { bc->Ptr(), bc->Size() };
bcSet.push_back(cachedData);
}
break;
case COMPILE_TO_LLVM:
cacheMsg = "Compile to LLVM Bitcodes";
for (auto &input : inputs) {
if (input->Type() == DT_CL) {
BufferReference* bc = reinterpret_cast<BufferReference*>(input);
StringCache::CachedData cachedData = { bc->Ptr(), bc->Size() };
bcSet.push_back(cachedData);
}
else if (input->Type() == DT_CL_HEADER) {
FileReference* bcFile = reinterpret_cast<FileReference*>(input);
std::string bc;
bcFile->ReadToString(bc);
StringCache::CachedData cachedData = { bc.c_str(), bc.size() };
bcSet.push_back(cachedData);
}
else {
buildLog_ += "Error: unsupported bitcode type for checking cache.\n";
checkCache = false;
break;
}
}
break;
case COMPILE_AND_LINK_EXEC:
cacheMsg = "Compile and Link Executable";
for (auto &input : inputs) {
assert(input->Type() == DT_LLVM_BC);
amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*) input;
StringCache::CachedData cachedData = { bc->Buf().data(), bc->Size() };
bcSet.push_back(cachedData);
}
break;
default:
assert(!"Unknown compiler operation");
checkCache = false;
break;
}
std::string dstData = "";
if (checkCache &&
codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(),
cacheOpt, dstData, cacheMsg)) {
std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
cachedCodeExist = true;
}
if (!cachedCodeExist) { // bitcodes not found in cache
bool ret = false;
switch (operation) { // for link LLVM bitcodes
case LINK_LLVM_BITCODES:
ret = C->LinkLLVMBitcode(inputs, output, options);
break;
case COMPILE_TO_LLVM:
ret = C->CompileToLLVMBitcode(inputs, output, options);
break;
case COMPILE_AND_LINK_EXEC:
ret = C->CompileAndLinkExecutable(inputs, output, options);
break;
}
if (!ret) {
return false;
}
std::string dstData(output->Buf().data(), output->Buf().size());
if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
buildLog_ += "Error: Failed to caching codes.\n";
return false;
}
}
return true;
}
bool
CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*>& inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string>& options,
std::string cacheOpt)
{
buildLog_.clear();
bool ret = false;
if (isCodeCacheEnabled_) {
ret = cacheProcess(C, inputs, output, options, cacheOpt, LINK_LLVM_BITCODES);
if (!ret) {
LogWarning("Cache look-up failed!");
}
}
if (!ret) {
ret = C->LinkLLVMBitcode(inputs, output, options);
buildLog_ += C->Output();
}
if (!ret) {
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
}
return ret;
}
bool
CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*>& inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string>& options,
std::string cacheOpt)
{
buildLog_.clear();
bool ret = false;
if (isCodeCacheEnabled_) {
ret = cacheProcess(C, inputs, output, options, cacheOpt, COMPILE_TO_LLVM);
if (!ret) {
LogWarning("Cache look-up failed!");
}
}
if (!ret) {
ret = C->CompileToLLVMBitcode(inputs, output, options);
buildLog_ += C->Output();
}
if (!ret) {
buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n";
}
return ret;
}
bool
CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*>& inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string>& options,
std::string cacheOpt)
{
buildLog_.clear();
bool ret = false;
if (isCodeCacheEnabled_) {
ret = cacheProcess(C, inputs, output, options, cacheOpt, COMPILE_AND_LINK_EXEC);
if (!ret) {
LogWarning("Cache look-up failed!");
}
}
if (!ret) {
ret = C->CompileAndLinkExecutable(inputs, output, options);
buildLog_ += C->Output();
}
if (!ret) {
buildLog_ += "Error: Creating the executable failed: Compiling LLVM IRs to exeutable\n";
}
return ret;
}
#endif
} // namespace amd
namespace device {
+67 -1
Wyświetl plik
@@ -17,6 +17,7 @@
#include "appprofile.hpp"
#if defined(WITH_LIGHTNING_COMPILER)
#include "caching/cache.hpp"
#include "driver/AmdCompiler.h"
#endif // defined(WITH_LIGHTNING_COMPILER)
#include "acl.h"
@@ -1632,7 +1633,7 @@ public:
inline bool isFineGrainedSystem(bool FGSOPT = false) const {
return FGSOPT && (info().svmCapabilities_ & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) != 0 ? true : false;
}
//! Return this device's type.
cl_device_type type() const {
return info().type_ & ~(CL_DEVICE_TYPE_DEFAULT | CL_HSA_ENABLED_AMD
@@ -1843,6 +1844,71 @@ struct KernelParameterDescriptor
const char* typeName_; //!< Argument's type name
};
#if defined(WITH_LIGHTNING_COMPILER)
//! Compilation process with cache support.
class CacheCompilation : public amd::HeapObject
{
public:
enum COMPILER_OPERATION {
LINK_LLVM_BITCODES = 0,
COMPILE_TO_LLVM,
COMPILE_AND_LINK_EXEC
};
//! Constructor
CacheCompilation(std::string targetStr,
std::string postfix,
bool enableCache,
bool resetCache);
//! return the log string of the operation
std::string buildLog() const { return buildLog_; }
//! NB, the cacheOpt argument is used for specifying the operation
//! condition, normally would be the same as the options argument.
//! However, the cacheOpt argument should not include any option
//! that would be modified each time but not affect the operation,
//! e.g. output file name.
//! Link LLVM bitcode
bool linkLLVMBitcode(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*>& inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string>& options,
std::string cacheOpt);
//! Compile to LLVM bitcode
bool compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*>& inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string>& options,
std::string cacheOpt);
//! Compile and link executable
bool compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*>& inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string>& options,
std::string cacheOpt);
private:
//! Invoke operations with cache support
bool cacheProcess(amd::opencl_driver::Compiler* C,
std::vector<amd::opencl_driver::Data*> inputs,
amd::opencl_driver::Buffer* output,
std::vector<std::string> options,
std::string cacheOpt,
COMPILER_OPERATION operation);
StringCache codeCache_; //! Cached codes
const bool isCodeCacheEnabled_; //! Code cache enable
std::string buildLog_; //! log of the operation
};
#endif
/*! @}
* @}
*/
@@ -195,19 +195,20 @@ HSAILProgram::compileImpl_LC(
return false;
}
driverOptions.append(" -include-pch " + pch->Name());
driverOptions.append(" -Xclang -fno-validate-pch");
// save the options for caching before including the temporary header file for amdgcn
std::string cacheOpts = driverOptions + std::to_string(clcStd);
driverOptions.append(" -include-pch " + pch->Name());
// Tokenize the options string into a vector of strings
std::istringstream istrstr(driverOptions);
std::istream_iterator<std::string> sit(istrstr), end;
std::vector<std::string> params(sit, end);
// Compile source to IR
bool ret = C->CompileToLLVMBitcode(inputs, output, params);
buildLog_ += C->Output();
if (!ret) {
buildLog_ += "Error: Failed to compile opencl source (from CL to LLVM IR).\n";
if (!dev().cacheCompilation()->compileToLLVMBitcode(C.get(), inputs, output, params, cacheOpts)) {
buildLog_ += dev().cacheCompilation()->buildLog();
return false;
}
+25 -2
Wyświetl plik
@@ -296,7 +296,7 @@ bool NullDevice::init() {
bool isOnline = false;
//Check if the particular device is online
for (unsigned int i=0; i< devices.size(); i++) {
if (static_cast<NullDevice*>(devices[i])->deviceInfo_.hsaDeviceId_ ==
if (static_cast<NullDevice*>(devices[i])->deviceInfo_.hsaDeviceId_ ==
DeviceInfo[id].hsaDeviceId_){
isOnline = true;
}
@@ -588,6 +588,29 @@ Device::mapHSADeviceToOpenCLDevice(hsa_agent_t dev)
}
}
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
std::ostringstream cacheTarget;
cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
amd::CacheCompilation* compObj = new amd::CacheCompilation(cacheTarget.str(),
"_rocm",
hsaSettings->enableCodeCache_,
hsaSettings->resetCodeCache_);
if (!compObj) {
LogError("Unable to create cache compilation object!");
return false;
}
cacheCompilation_.reset(compObj);
#endif
return true;
}
@@ -1076,7 +1099,7 @@ Device::bindExternalDevice(
#else
if((flags&amd::Context::GLDeviceKhr)==0)
return false;
MesaInterop::MESA_INTEROP_KIND kind=MesaInterop::MESA_INTEROP_NONE;
MesaInterop::DisplayHandle display;
MesaInterop::ContextHandle context;
+10 -2
Wyświetl plik
@@ -80,6 +80,8 @@ public:
Compiler* compiler() const { return compilerHandle_; }
const Settings &settings() const { return reinterpret_cast<Settings &>(*settings_); }
//! Construct an HSAIL program object from the ELF assuming it is valid
virtual device::Program *createProgram(amd::option::Options* options = NULL);
const AMDDeviceInfo& deviceInfo() const {
@@ -193,6 +195,10 @@ public:
return false;
}
#if defined(WITH_LIGHTNING_COMPILER)
amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); }
#endif
protected:
//! Initialize compiler instance and handle
static bool initCompiler(bool isOffline);
@@ -202,6 +208,10 @@ protected:
static Compiler* compilerHandle_;
//! Device Id for an HsaDevice
AMDDeviceInfo deviceInfo_;
#if defined(WITH_LIGHTNING_COMPILER)
//! Compilation with cache support
std::unique_ptr<amd::CacheCompilation> cacheCompilation_;
#endif
private:
static const bool offlineDevice_;
};
@@ -329,8 +339,6 @@ public:
virtual void svmFree(void* ptr) const;
const Settings &settings() const { return reinterpret_cast<Settings &>(*settings_); }
//! Returns transfer engine object
const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr(); }
+7 -12
Wyświetl plik
@@ -531,10 +531,8 @@ HSAILProgram::linkImpl_LC(
}
std::vector<std::string> linkOptions;
bool ret = C->LinkLLVMBitcode(inputs, output, linkOptions);
buildLog_ += C->Output();
if (!ret) {
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
if (!dev().cacheCompilation()->linkLLVMBitcode(C.get(), inputs, output, linkOptions, "")) {
buildLog_ += dev().cacheCompilation()->buildLog();
return false;
}
@@ -770,10 +768,8 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
return false;
}
bool ret = C->LinkLLVMBitcode(inputs, linked_bc, linkOptions);
buildLog_ += C->Output();
if (!ret) {
buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n";
if (!dev().cacheCompilation()->linkLLVMBitcode(C.get(), inputs, linked_bc, linkOptions, "")) {
buildLog_ += dev().cacheCompilation()->buildLog();
return false;
}
@@ -812,10 +808,9 @@ HSAILProgram::linkImpl_LC(amd::option::Options *options)
std::istream_iterator<std::string> sit(strstr), end;
std::vector<std::string> params(sit, end);
ret = C->CompileAndLinkExecutable(inputs, out_exec, params);
buildLog_ += C->Output();
if (!ret) {
buildLog_ += "Error: Creating the executable failed: Compiling LLVM IRs to exe.\n";
if (!dev().cacheCompilation()->compileAndLinkExecutable(C.get(), inputs, out_exec, params,
codegenOptions)) {
buildLog_ += dev().cacheCompilation()->buildLog();
return false;
}
@@ -54,6 +54,14 @@ Settings::Settings()
partialDispatch_ = (partialDispatch) ? false : true;
commandQueues_ = 100; //!< Field value set to maximum number
//!< concurrent Virtual GPUs for ROCm backend
// Determine if user is requesting code caching for
// compiling and linking when using Lightening Compiler
enableCodeCache_ = OCL_CODE_CACHE_ENABLE;
// Determine if user is requesting reset the code cache
// storage (note that code cache must be enable)
resetCodeCache_ = OCL_CODE_CACHE_RESET;
}
bool
@@ -26,7 +26,9 @@ public:
uint enableImageHandle_: 1; //!< Use HSAIL image/sampler pointer
uint enableNCMode_: 1; //!< Enable Non Coherent mode for system memory
uint enablePartialDispatch_: 1; //!< Enable support for Partial Dispatch
uint reserved_: 26;
uint enableCodeCache_: 1; //!< Enable support for compiler code cache
uint resetCodeCache_: 1; //!< Reset the compiler code cache storage
uint reserved_: 24;
};
uint value_;
};
+5 -2
Wyświetl plik
@@ -198,8 +198,11 @@ release_on_stg(uint, GPU_WAVE_LIMIT_DSC_THRESH, 10, \
release_on_stg(cstring, GPU_WAVE_LIMIT_DUMP, "", \
"File path prefix for dumping wave limiter output") \
release_on_stg(cstring, GPU_WAVE_LIMIT_TRACE, "", \
"File path prefix for tracing wave limiter")
"File path prefix for tracing wave limiter") \
release(bool, OCL_CODE_CACHE_ENABLE, false, \
"1 = Enable compiler code cache") \
release(bool, OCL_CODE_CACHE_RESET, false, \
"1 = Reset the compiler code cache storage")
namespace amd {