From d219d45f7da907a832be52050c9e81671d20a7ac Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 2 Oct 2018 11:52:23 -0400
Subject: [PATCH] P4 to Git Change 1613522 by gandryey@gera-ocl-lc on
2018/10/02 11:44:38
SWDEV-79445 - OCL generic changes and code clean-up
Program compilation clean-up. Step#7:
- Introduce a new key to control the compilation path dynamically
- InitBuild/finiBuild clean-up
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#73 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#231 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#321 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#245 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#111 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#67 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#25 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#80 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#36 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#93 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#37 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#96 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#299 edit
---
rocclr/runtime/device/device.cpp | 2 +-
rocclr/runtime/device/device.hpp | 3 +-
rocclr/runtime/device/devprogram.cpp | 33 ++++-
rocclr/runtime/device/devprogram.hpp | 8 +-
rocclr/runtime/device/gpu/gpuprogram.cpp | 80 ----------
rocclr/runtime/device/gpu/gpuprogram.hpp | 19 ---
rocclr/runtime/device/pal/paldevice.cpp | 164 ++++++++++++---------
rocclr/runtime/device/pal/palkernel.cpp | 22 +--
rocclr/runtime/device/pal/palkernel.hpp | 9 --
rocclr/runtime/device/pal/palprogram.cpp | 64 ++------
rocclr/runtime/device/pal/palprogram.hpp | 18 ---
rocclr/runtime/device/pal/palsettings.cpp | 1 +
rocclr/runtime/device/rocm/rockernel.cpp | 8 +
rocclr/runtime/device/rocm/rocprogram.cpp | 47 +-----
rocclr/runtime/device/rocm/rocprogram.hpp | 21 ---
rocclr/runtime/device/rocm/rocsettings.cpp | 2 +
rocclr/runtime/platform/program.cpp | 21 +--
rocclr/runtime/utils/flags.hpp | 2 +
18 files changed, 179 insertions(+), 345 deletions(-)
diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp
index 392d492572..e19f4f3a6b 100644
--- a/rocclr/runtime/device/device.cpp
+++ b/rocclr/runtime/device/device.cpp
@@ -573,7 +573,7 @@ bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
namespace device {
-Settings::Settings() {
+Settings::Settings() : value_(0) {
assert((ClExtTotal < (8 * sizeof(extensions_))) && "Too many extensions!");
extensions_ = 0;
partialDispatch_ = false;
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index b2b82adb50..e03c49931c 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -519,7 +519,8 @@ class Settings : public amd::HeapObject {
uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program
uint singleFpDenorm_ : 1; //!< Support Single FP Denorm
uint gfx10Hsail_ : 1 ; //!< GFX10 HSAIL path
- uint reserved_ : 21;
+ uint useLightning_ : 1; //!< Enable LC path for this device
+ uint reserved_ : 20;
};
uint value_;
};
diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp
index 1d8dca35fb..e531993025 100644
--- a/rocclr/runtime/device/devprogram.cpp
+++ b/rocclr/runtime/device/devprogram.cpp
@@ -1044,6 +1044,7 @@ void Program::releaseClBinary() {
// ================================================================================================
bool Program::initBuild(amd::option::Options* options) {
+ compileOptions_ = options->origOptionStr;
programOptions_ = options;
if (options->oVariables->DumpFlags > 0) {
@@ -1054,11 +1055,41 @@ bool Program::initBuild(amd::option::Options* options) {
if (!initClBinary()) {
return false;
}
+
+ const char* devName = machineTarget_;
+ options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
+ clBinary()->getEncryptCode(), true);
+
+ // Elf Binary setup
+ std::string outFileName;
+
+ // true means hsail required
+ clBinary()->init(options, true);
+ if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
+ outFileName = options->getDumpFileName(".bin");
+ }
+
+ if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
+ (outFileName.size() > 0) ? outFileName.c_str() : nullptr)) {
+ LogError("Setup elf out for gpu failed");
+ return false;
+ }
+
return true;
}
// ================================================================================================
-bool Program::finiBuild(bool isBuildGood) { return true; }
+bool Program::finiBuild(bool isBuildGood) {
+ clBinary()->resetElfOut();
+ clBinary()->resetElfIn();
+
+ if (!isBuildGood) {
+ // Prevent the encrypted binary form leaking out
+ clBinary()->setBinary(nullptr, 0);
+ }
+
+ return true;
+}
// ================================================================================================
cl_int Program::compile(const std::string& sourceCode,
diff --git a/rocclr/runtime/device/devprogram.hpp b/rocclr/runtime/device/devprogram.hpp
index 179b56f720..332d0b0c5c 100644
--- a/rocclr/runtime/device/devprogram.hpp
+++ b/rocclr/runtime/device/devprogram.hpp
@@ -197,10 +197,10 @@ class Program : public amd::HeapObject {
protected:
//! pre-compile setup
- virtual bool initBuild(amd::option::Options* options);
+ bool initBuild(amd::option::Options* options);
//! post-compile cleanup
- virtual bool finiBuild(bool isBuildGood);
+ bool finiBuild(bool isBuildGood);
/*! \brief Compiles GPU CL program to LLVM binary (compiler frontend)
*
@@ -236,8 +236,6 @@ class Program : public amd::HeapObject {
//! return target info
virtual const aclTargetInfo& info(const char* str = "") = 0;
- virtual bool isElf(const char* bin) const = 0;
-
virtual bool setKernels(
amd::option::Options* options, void* binary, size_t binSize) { return true; }
@@ -271,6 +269,8 @@ class Program : public amd::HeapObject {
//! Finds the total size of all global variables in the program
bool FindGlobalVarSize(void* binary, size_t binSize);
+ bool isElf(const char* bin) const { return amd::isElfMagic(bin); }
+
private:
//! Compile the device program with LC path
bool compileImplLC(const std::string& sourceCode,
diff --git a/rocclr/runtime/device/gpu/gpuprogram.cpp b/rocclr/runtime/device/gpu/gpuprogram.cpp
index e808e9e434..482909b6ea 100644
--- a/rocclr/runtime/device/gpu/gpuprogram.cpp
+++ b/rocclr/runtime/device/gpu/gpuprogram.cpp
@@ -23,48 +23,6 @@
namespace gpu {
-bool NullProgram::initBuild(amd::option::Options* options) {
- if (!device::Program::initBuild(options)) {
- return false;
- }
-
- const char* devname = dev().hwInfo()->machineTarget_;
- options->setPerBuildInfo((devname && (devname[0] != '\0')) ? devname : "gpu",
- clBinary()->getEncryptCode(),
- true // FIXME: the dev ptr is used to query the wavefront size.
- );
-
- // Elf Binary setup
- std::string outFileName;
-
- // Recompile from IL may happen (invoking Kernel::recompil()) to generate correct
- // isa code for 7xx. Because of this, force saving AMDIL into the binary.
- clBinary()->init(options, (dev().calTarget() <= CAL_TARGET_730));
- if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
- outFileName = options->getDumpFileName(".bin");
- }
-
- bool useELF64 = dev().settings().use64BitPtr_;
- if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
- (outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
- LogError("Setup elf out for gpu failed");
- return false;
- }
- return true;
-}
-
-bool NullProgram::finiBuild(bool isBuildGood) {
- clBinary()->resetElfOut();
- clBinary()->resetElfIn();
-
- if (!isBuildGood) {
- // Prevent the encrypted binary form leaking out
- clBinary()->setBinary(NULL, 0);
- }
-
- return device::Program::finiBuild(isBuildGood);
-}
-
const aclTargetInfo& NullProgram::info(const char* str) {
acl_error err;
std::string arch = GPU_TARGET_INFO_ARCH;
@@ -1572,44 +1530,6 @@ HSAILProgram::~HSAILProgram() {
amd::hsa::loader::Loader::Destroy(loader_);
}
-bool HSAILProgram::initBuild(amd::option::Options* options) {
- if (!device::Program::initBuild(options)) {
- return false;
- }
-
- const char* devName = dev().hwInfo()->machineTarget_;
- options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
- clBinary()->getEncryptCode(), true);
-
- // Elf Binary setup
- std::string outFileName;
-
- // true means fsail required
- clBinary()->init(options, true);
- if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
- outFileName = options->getDumpFileName(".bin");
- }
-
- if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
- (outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
- LogError("Setup elf out for gpu failed");
- return false;
- }
- return true;
-}
-
-bool HSAILProgram::finiBuild(bool isBuildGood) {
- clBinary()->resetElfOut();
- clBinary()->resetElfIn();
-
- if (!isBuildGood) {
- // Prevent the encrypted binary form leaking out
- clBinary()->setBinary(NULL, 0);
- }
-
- return device::Program::finiBuild(isBuildGood);
-}
-
inline static std::vector splitSpaceSeparatedString(char* str) {
std::string s(str);
std::stringstream ss(s);
diff --git a/rocclr/runtime/device/gpu/gpuprogram.hpp b/rocclr/runtime/device/gpu/gpuprogram.hpp
index 102d2c035a..ead578304a 100644
--- a/rocclr/runtime/device/gpu/gpuprogram.hpp
+++ b/rocclr/runtime/device/gpu/gpuprogram.hpp
@@ -119,12 +119,6 @@ class NullProgram : public device::Program {
const std::vector& glbCb() const { return glbCb_; }
protected:
- //! pre-compile setup for GPU
- virtual bool initBuild(amd::option::Options* options);
-
- //! post-compile setup for GPU
- virtual bool finiBuild(bool isBuildGood);
-
/*! \brief Compiles GPU CL program to LLVM binary (compiler frontend)
*
* \return True if we successefully compiled a GPU program
@@ -261,8 +255,6 @@ class NullProgram : public device::Program {
std::vector printf_; //!< Format strings for GPU printf support
std::vector glbCb_; //!< Global constant buffers
- virtual bool isElf(const char* bin) const { return amd::isElfMagic(bin); }
-
virtual const aclTargetInfo& info(const char* str = "");
virtual bool saveBinaryAndSetType(type_t type) { return true; }
@@ -477,12 +469,6 @@ class HSAILProgram : public device::Program {
bool isStaticSampler() const { return (staticSamplers_.size() != 0); }
protected:
- //! pre-compile setup for GPU
- virtual bool initBuild(amd::option::Options* options);
-
- //! post-compile setup for GPU
- virtual bool finiBuild(bool isBuildGood);
-
bool saveBinaryAndSetType(type_t type);
virtual bool linkImpl(amd::option::Options* options);
@@ -491,11 +477,6 @@ class HSAILProgram : public device::Program {
virtual const aclTargetInfo& info(const char* str = "");
- virtual bool isElf(const char* bin) const {
- return amd::isElfMagic(bin);
- // return false;
- }
-
private:
//! Disable default copy constructor
HSAILProgram(const HSAILProgram&);
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index 49ba683736..6d067b51e7 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -74,7 +74,7 @@ bool NullDevice::init() {
// Comment out this section for SWDEV-146950 since Kalindi and Mullins
// does not works for LC offline compilation without knowing which GFXIP
// should be used for them.
-#ifndef WITH_LIGHTNING_COMPILER
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
// Loop through all supported devices and create each of them
for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
@@ -110,7 +110,7 @@ bool NullDevice::init() {
}
}
}
-#endif
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
// Loop through all supported devices and create each of them
for (uint id = 0;
@@ -272,6 +272,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
info_.wavefrontWidth_ = (ipLevel >= Pal::GfxIpLevel::GfxIp10) ? 32 : 64;
+ if (settings().useLightning_) {
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
@@ -296,17 +297,37 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
cacheCompilation_.reset(compObj);
#endif
+ } else {
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+ const char* library = getenv("HSA_COMPILER_LIBRARY");
+ aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8),
+ library,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ AMD_OCL_SC_LIB };
+ // Initialize the compiler handle
+ acl_error error;
+ compiler_ = aclCompilerInit(&opts, &error);
+ if (error != ACL_SUCCESS) {
+ LogError("Error initializing the compiler");
+ return false;
+ }
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+ }
return true;
}
device::Program* NullDevice::createProgram(amd::option::Options* options) {
device::Program* program;
-#if defined(WITH_LIGHTNING_COMPILER)
- program = new LightningProgram(*this);
-#else // !defined(WITH_LIGHTNING_COMPILER)
- program = new HSAILProgram(*this);
-#endif // defined(WITH_LIGHTNING_COMPILER)
+ if (settings().useLightning_) {
+ program = new LightningProgram(*this);
+ } else {
+ program = new HSAILProgram(*this);
+ }
if (program == nullptr) {
LogError("Memory allocation has failed!");
@@ -506,12 +527,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
}
::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)",
-#if defined(WITH_LIGHTNING_COMPILER)
- ",LC"
-#else // ! defined(WITH_LIGHTNING_COMPILER)
- ",HSAIL"
-#endif // ! defined(WITH_LIGHTNING_COMPILER)
- );
+ settings().useLightning_ ? ",LC" : ",HSAIL");
info_.profile_ = "FULL_PROFILE";
if (settings().oclVersion_ >= OpenCL20) {
@@ -922,6 +938,52 @@ bool Device::create(Pal::IDevice* device) {
allocedMem[i] = 0;
}
+ if (settings().useLightning_) {
+#if defined(WITH_LIGHTNING_COMPILER)
+ // create compilation object with cache support
+ int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
+ int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
+ int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
+
+ // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
+ // with dash as delimiter to be compatible with Windows directory name
+ std::ostringstream cacheTarget;
+ cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
+ if (isXNACKSupported) {
+ cacheTarget << "-xnack";
+ }
+
+ amd::CacheCompilation* compObj = new amd::CacheCompilation(
+ cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
+ if (!compObj) {
+ LogError("Unable to create cache compilation object!");
+ return false;
+ }
+
+ cacheCompilation_.reset(compObj);
+#endif
+ }
+ else {
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+ const char* library = getenv("HSA_COMPILER_LIBRARY");
+ aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8),
+ library,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ AMD_OCL_SC_LIB };
+ // Initialize the compiler handle
+ acl_error error;
+ compiler_ = aclCompilerInit(&opts, &error);
+ if (error != ACL_SUCCESS) {
+ LogError("Error initializing the compiler");
+ return false;
+ }
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+ }
+
// Allocate SRD manager
srdManager_ = new SrdManager(*this, std::max(HsaImageObjectSize, HsaSamplerObjectSize), 64 * Ki);
if (srdManager_ == nullptr) {
@@ -933,30 +995,6 @@ bool Device::create(Pal::IDevice* device) {
hwDebugMgr_ = new GpuDebugManager(this);
}
-#if defined(WITH_LIGHTNING_COMPILER)
- // create compilation object with cache support
- int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
- int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
- int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
-
- // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
- // with dash as delimiter to be compatible with Windows directory name
- std::ostringstream cacheTarget;
- cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
- if (isXNACKSupported) {
- cacheTarget << "-xnack";
- }
-
- amd::CacheCompilation* compObj = new amd::CacheCompilation(
- cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
- if (!compObj) {
- LogError("Unable to create cache compilation object!");
- return false;
- }
-
- cacheCompilation_.reset(compObj);
-#endif
-
return true;
}
@@ -1090,11 +1128,12 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) {
device::Program* Device::createProgram(amd::option::Options* options) {
device::Program* program;
-#if defined(WITH_LIGHTNING_COMPILER)
- program = new LightningProgram(*this);
-#else // !defined(WITH_LIGHTNING_COMPILER)
- program = new HSAILProgram(*this);
-#endif // defined(WITH_LIGHTNING_COMPILER)
+ if (settings().useLightning_) {
+ program = new LightningProgram(*this);
+ }
+ else {
+ program = new HSAILProgram(*this);
+ }
if (program == nullptr) {
LogError("We failed memory allocation for program!");
}
@@ -1154,25 +1193,6 @@ bool Device::init() {
bool useDeviceList = false;
requestedDevices_t requestedDevices;
-#if !defined(WITH_LIGHTNING_COMPILER)
- const char* library = getenv("HSA_COMPILER_LIBRARY");
- aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
- library,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- AMD_OCL_SC_LIB};
- // Initialize the compiler handle
- acl_error error;
- compiler_ = aclCompilerInit(&opts, &error);
- if (error != ACL_SUCCESS) {
- LogError("Error initializing the compiler");
- return false;
- }
-#endif // !defined(WITH_LIGHTNING_COMPILER)
-
size_t size = Pal::GetPlatformSize();
platformObj_ = new char[size];
Pal::PlatformCreateInfo info = {};
@@ -1242,12 +1262,12 @@ void Device::tearDown() {
platform_ = nullptr;
}
-#if !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
if (compiler_ != nullptr) {
aclCompilerFini(compiler_);
compiler_ = nullptr;
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
}
Memory* Device::getGpuMemory(amd::Memory* mem) const {
@@ -2187,15 +2207,15 @@ bool Device::createBlitProgram() {
if (settings().oclVersion_ >= OpenCL20) {
size_t loc = sch.find("%s");
sch.replace(loc, 2, iDev()->GetDispatchKernelSource());
-#if defined(WITH_LIGHTNING_COMPILER)
- // For LC, replace "amd_scheduler" with "amd_scheduler_pal"
- static const char AmdScheduler[] = "amd_scheduler";
- static const char AmdSchedulerPal[] = "amd_scheduler_pal";
- loc = sch.find(AmdScheduler);
- sch.replace(loc, strlen(AmdScheduler), AmdSchedulerPal);
- loc = sch.find(AmdScheduler, (loc + strlen(AmdSchedulerPal)));
- sch.replace(loc, strlen(AmdScheduler), AmdSchedulerPal);
-#endif
+ if (settings().useLightning_) {
+ // For LC, replace "amd_scheduler" with "amd_scheduler_pal"
+ static const char AmdScheduler[] = "amd_scheduler";
+ static const char AmdSchedulerPal[] = "amd_scheduler_pal";
+ loc = sch.find(AmdScheduler);
+ sch.replace(loc, sizeof(AmdScheduler) - 1, AmdSchedulerPal);
+ loc = sch.find(AmdScheduler, (loc + sizeof(AmdSchedulerPal) - 1));
+ sch.replace(loc, sizeof(AmdScheduler) - 1, AmdSchedulerPal);
+ }
scheduler = sch.c_str();
ocl20 = "-cl-std=CL2.0";
}
diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp
index 504285f256..fc483934e1 100644
--- a/rocclr/runtime/device/pal/palkernel.cpp
+++ b/rocclr/runtime/device/pal/palkernel.cpp
@@ -8,9 +8,14 @@
#include "device/pal/palsched.hpp"
#include "platform/commandqueue.hpp"
#include "utils/options.hpp"
-
#include "acl.h"
+#if defined(WITH_LIGHTNING_COMPILER)
+#include "llvm/Support/AMDGPUMetadata.h"
+
+typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
+#endif // defined(WITH_LIGHTNING_COMPILER)
+
#include
#include
#include
@@ -85,9 +90,7 @@ HSAILKernel::~HSAILKernel() {
}
bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
-#if defined(WITH_LIGHTNING_COMPILER)
- assert(!"Should not reach here");
-#else // !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
acl_error error = ACL_SUCCESS;
std::string openClKernelName = openclMangledName(name());
flags_.internalKernel_ =
@@ -240,7 +243,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
delete[] VecTypeHint;
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
return true;
}
@@ -370,12 +373,11 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
return hsaDisp;
}
-#if defined(WITH_LIGHTNING_COMPILER)
-
const LightningProgram& LightningKernel::prog() const {
return reinterpret_cast(prog_);
}
+#if defined(WITH_LIGHTNING_COMPILER)
static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) {
for (const KernelMD& kernelMD : programMD->mKernels) {
if (kernelMD.mName == name) {
@@ -384,8 +386,10 @@ static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const s
}
return nullptr;
}
+#endif // defined(WITH_LIGHTNING_COMPILER)
bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
+#if defined(WITH_LIGHTNING_COMPILER)
flags_.internalKernel_ =
(compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -462,10 +466,8 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
waveLimiter_.enable();
*/
-
+#endif // defined(WITH_LIGHTNING_COMPILER)
return true;
}
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
} // namespace pal
diff --git a/rocclr/runtime/device/pal/palkernel.hpp b/rocclr/runtime/device/pal/palkernel.hpp
index d94ae849d3..77f6106c8b 100644
--- a/rocclr/runtime/device/pal/palkernel.hpp
+++ b/rocclr/runtime/device/pal/palkernel.hpp
@@ -16,13 +16,6 @@
#include "device/devwavelimiter.hpp"
#include "hsa.h"
-#if defined(WITH_LIGHTNING_COMPILER)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
namespace amd {
namespace hsa {
namespace loader {
@@ -118,7 +111,6 @@ class HSAILKernel : public device::Kernel {
size_t codeSize_; //!< Size of ISA code
};
-#if defined(WITH_LIGHTNING_COMPILER)
class LightningKernel : public HSAILKernel {
public:
LightningKernel(const std::string& name, HSAILProgram* prog, const std::string& compileOptions)
@@ -130,6 +122,5 @@ class LightningKernel : public HSAILKernel {
//! Initializes the metadata required for this kernel,
bool init(amd::hsa::loader::Symbol* symbol);
};
-#endif // defined(WITH_LIGHTNING_COMPILER)
/*@}*/} // namespace pal
diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp
index 024766be4c..d90865e558 100644
--- a/rocclr/runtime/device/pal/palprogram.cpp
+++ b/rocclr/runtime/device/pal/palprogram.cpp
@@ -198,43 +198,6 @@ HSAILProgram::~HSAILProgram() {
amd::hsa::loader::Loader::Destroy(loader_);
}
-bool HSAILProgram::initBuild(amd::option::Options* options) {
- if (!device::Program::initBuild(options)) {
- return false;
- }
-
- const char* devName = dev().hwInfo()->machineTarget_;
- options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
- clBinary()->getEncryptCode(), true);
-
- // Elf Binary setup
- std::string outFileName;
-
- // true means fsail required
- clBinary()->init(options, true);
- if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
- outFileName = options->getDumpFileName(".bin");
- }
-
- if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
- (outFileName.size() > 0) ? outFileName.c_str() : nullptr)) {
- LogError("Setup elf out for gpu failed");
- return false;
- }
- return true;
-}
-
-bool HSAILProgram::finiBuild(bool isBuildGood) {
- clBinary()->resetElfOut();
- clBinary()->resetElfIn();
-
- if (!isBuildGood) {
- // Prevent the encrypted binary form leaking out
- clBinary()->setBinary(nullptr, 0);
- }
-
- return device::Program::finiBuild(isBuildGood);
-}
inline static std::vector splitSpaceSeparatedString(char* str) {
std::string s(str);
@@ -245,10 +208,7 @@ inline static std::vector splitSpaceSeparatedString(char* str) {
}
bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if defined(WITH_LIGHTNING_COMPILER)
- assert(!"Should not reach here");
- return false;
-#else // !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
// ACL_TYPE_CG stage is not performed for offline compilation
hsa_agent_t agent;
agent.handle = 1;
@@ -324,8 +284,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
}
DestroySegmentCpuAccess();
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
return true;
-#endif // !defined(WITH_LIGHTNING_COMPILER)
}
bool HSAILProgram::createBinary(amd::option::Options* options) { return true; }
@@ -354,9 +314,7 @@ void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const {
}
const aclTargetInfo& HSAILProgram::info(const char* str) {
-#if defined(WITH_LIGHTNING_COMPILER)
- assert(!"Should not reach here");
-#else // !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
acl_error err;
std::string arch = "hsail";
if (dev().settings().use64BitPtr_) {
@@ -367,14 +325,12 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
return info_;
}
bool HSAILProgram::saveBinaryAndSetType(type_t type) {
-#if defined(WITH_LIGHTNING_COMPILER)
- assert(!"Should not reach here");
-#else // !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
// Write binary to memory
if (rawBinary_ != nullptr) {
// Free memory containing rawBinary
@@ -389,7 +345,7 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) {
setBinary(static_cast(rawBinary_), size);
// Set the type of binary
setType(type);
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
return true;
}
@@ -647,16 +603,20 @@ static hsa_status_t GetGlobalVarNamesCallback(
}
return HSA_STATUS_SUCCESS;
}
+#endif // defined(WITH_LIGHTNING_COMPILER)
bool LightningProgram::createBinary(amd::option::Options* options) {
+#if defined(WITH_LIGHTNING_COMPILER)
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
LogError("Failed to create ELF binary image!");
return false;
}
+#endif // defined(WITH_LIGHTNING_COMPILER)
return true;
}
bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
+#if defined(WITH_LIGHTNING_COMPILER)
hsa_agent_t agent;
agent.handle = 1;
@@ -731,10 +691,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
hasGlobalStores_ = (glbVarNames.size() != 0) ? true : false;
DestroySegmentCpuAccess();
-
+#endif // defined(WITH_LIGHTNING_COMPILER)
return true;
}
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
} // namespace pal
diff --git a/rocclr/runtime/device/pal/palprogram.hpp b/rocclr/runtime/device/pal/palprogram.hpp
index c1daa131cb..d463a09615 100644
--- a/rocclr/runtime/device/pal/palprogram.hpp
+++ b/rocclr/runtime/device/pal/palprogram.hpp
@@ -6,12 +6,6 @@
#include "device/pal/palkernel.hpp"
#include "amd_hsa_loader.hpp"
-#if defined(WITH_LIGHTNING_COMPILER)
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
namespace amd {
namespace option {
class Options;
@@ -175,22 +169,12 @@ class HSAILProgram : public device::Program {
}
protected:
- //! pre-compile setup for GPU
- virtual bool initBuild(amd::option::Options* options);
-
- //! post-compile setup for GPU
- virtual bool finiBuild(bool isBuildGood);
-
bool saveBinaryAndSetType(type_t type);
virtual bool createBinary(amd::option::Options* options);
virtual const aclTargetInfo& info(const char* str = "");
- virtual bool isElf(const char* bin) const {
- return amd::isElfMagic(bin);
- }
-
virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize) override;
//! Destroys CPU allocations in the code segment
@@ -222,7 +206,6 @@ class HSAILProgram : public device::Program {
PALHSALoaderContext loaderContext_; //!< Context for HSA Loader
};
-#if defined(WITH_LIGHTNING_COMPILER)
//! \class Lightning Compiler Program
class LightningProgram : public HSAILProgram {
public:
@@ -246,6 +229,5 @@ class LightningProgram : public HSAILProgram {
virtual bool createBinary(amd::option::Options* options) override;
};
-#endif // defined(WITH_LIGHTNING_COMPILER)
/*@}*/} // namespace pal
diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp
index 7493e91618..cb5528e070 100644
--- a/rocclr/runtime/device/pal/palsettings.cpp
+++ b/rocclr/runtime/device/pal/palsettings.cpp
@@ -143,6 +143,7 @@ Settings::Settings() {
std::min(static_cast(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
maxCmdBuffers_ = 12;
+ useLightning_ = GPU_ENABLE_LC;
}
bool Settings::create(const Pal::DeviceProperties& palProp,
diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp
index a63c9f9767..93fba7bc5b 100644
--- a/rocclr/runtime/device/rocm/rockernel.cpp
+++ b/rocclr/runtime/device/rocm/rockernel.cpp
@@ -9,6 +9,14 @@
#ifndef WITHOUT_HSA_BACKEND
+#if defined(WITH_LIGHTNING_COMPILER)
+#include "driver/AmdCompiler.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+
+typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
+typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
+#endif // defined(WITH_LIGHTNING_COMPILER)
+
namespace roc {
Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle,
diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp
index 7999590bb2..be1e6f7003 100644
--- a/rocclr/runtime/device/rocm/rocprogram.cpp
+++ b/rocclr/runtime/device/rocm/rocprogram.cpp
@@ -1,8 +1,6 @@
//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
-
-
#ifndef WITHOUT_HSA_BACKEND
#include "rocprogram.hpp"
@@ -14,8 +12,8 @@
#include "driver/AmdCompiler.h"
#include "libraries.amdgcn.inc"
#endif // defined(WITH_LIGHTNING_COMPILER)
-#include "utils/bif_section_labels.hpp"
+#include "utils/bif_section_labels.hpp"
#include "amd_hsa_kernel_code.h"
#include
@@ -113,49 +111,6 @@ bool Program::initClBinary(char* binaryIn, size_t size) {
return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr));
}
-
-bool Program::initBuild(amd::option::Options* options) {
- compileOptions_ = options->origOptionStr;
-
- if (!device::Program::initBuild(options)) {
- return false;
- }
-
- const char* devName = dev().deviceInfo().machineTarget_;
- options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
- clBinary()->getEncryptCode(), true);
-
- // Elf Binary setup
- std::string outFileName;
-
- // true means hsail required
- clBinary()->init(options, true);
- if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
- outFileName = options->getDumpFileName(".bin");
- }
-
- bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
- if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
- (outFileName.size() > 0) ? outFileName.c_str() : nullptr)) {
- LogError("Setup elf out for gpu failed");
- return false;
- }
- return true;
-}
-
-// ! post-compile setup for GPU
-bool Program::finiBuild(bool isBuildGood) {
- clBinary()->resetElfOut();
- clBinary()->resetElfIn();
-
- if (!isBuildGood) {
- // Prevent the encrypted binary form leaking out
- clBinary()->setBinary(nullptr, 0);
- }
-
- return device::Program::finiBuild(isBuildGood);
-}
-
#if defined(WITH_COMPILER_LIB)
HSAILProgram::HSAILProgram(roc::NullDevice& device) : roc::Program(device) {
xnackEnabled_ = dev().deviceInfo().xnackEnabled_;
diff --git a/rocclr/runtime/device/rocm/rocprogram.hpp b/rocclr/runtime/device/rocm/rocprogram.hpp
index 89f2f88d2b..178011ba4a 100644
--- a/rocclr/runtime/device/rocm/rocprogram.hpp
+++ b/rocclr/runtime/device/rocm/rocprogram.hpp
@@ -12,16 +12,6 @@
#include
#include "rocdevice.hpp"
-#if defined(WITH_LIGHTNING_COMPILER)
-#include "driver/AmdCompiler.h"
-#include "llvm/Support/AMDGPUMetadata.h"
-
-typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-
-#endif // defined(WITH_LIGHTNING_COMPILER)
-
//! \namespace roc HSA Device Implementation
namespace roc {
@@ -50,12 +40,6 @@ class Program : public device::Program {
hsa_executable_t hsaExecutable() const { return hsaExecutable_; }
protected:
- //! pre-compile setup for GPU
- virtual bool initBuild(amd::option::Options* options);
-
- //! post-compile setup for GPU
- virtual bool finiBuild(bool isBuildGood);
-
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
*
* \return The build error code
@@ -66,11 +50,6 @@ class Program : public device::Program {
virtual const aclTargetInfo& info(const char* str = "") { return info_; }
- virtual bool isElf(const char* bin) const {
- return amd::isElfMagic(bin);
- // return false;
- }
-
protected:
//! Disable default copy constructor
Program(const Program&) = delete;
diff --git a/rocclr/runtime/device/rocm/rocsettings.cpp b/rocclr/runtime/device/rocm/rocsettings.cpp
index 31e235eeb1..a691d72606 100644
--- a/rocclr/runtime/device/rocm/rocsettings.cpp
+++ b/rocclr/runtime/device/rocm/rocsettings.cpp
@@ -69,6 +69,8 @@ Settings::Settings() {
// Device enqueuing settings
numDeviceEvents_ = 1024;
numWaitEvents_ = 8;
+
+ useLightning_ = GPU_ENABLE_LC;
}
bool Settings::create(bool fullProfile, int gfxipVersion) {
diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp
index 9b68004437..336c95a805 100644
--- a/rocclr/runtime/platform/program.cpp
+++ b/rocclr/runtime/platform/program.cpp
@@ -48,13 +48,16 @@ const Symbol* Program::findSymbol(const char* kernelName) const {
cl_int Program::addDeviceProgram(Device& device, const void* image, size_t length,
amd::option::Options* options) {
- if (image != NULL && !amd::isElfMagic((const char*)image)
-#if !defined(WITH_LIGHTNING_COMPILER)
- && !aclValidateBinaryImage(
- image, length, language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM)
-#endif // !defined(WITH_LIGHTNING_COMPILER)
- ) {
- return CL_INVALID_BINARY;
+ if (image != NULL && !amd::isElfMagic((const char*)image)) {
+ if (device.settings().useLightning_) {
+ return CL_INVALID_BINARY;
+ }
+#if defined(WITH_COMPILER_LIB)
+ else if (!aclValidateBinaryImage(
+ image, length, language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM)) {
+ return CL_INVALID_BINARY;
+ }
+#endif // !defined(WITH_COMPILER_LIB)
}
// Check if the device is already associated with this program
@@ -307,9 +310,7 @@ cl_int Program::link(const std::vector& devices, size_t numInputs,
}
if (isHSAILTarget(*aclutGetTargetInfo(aclBin))) {
parsedOptions.oVariables->Frontend = "clang";
-#if defined(WITH_LIGHTNING_COMPILER)
- parsedOptions.oVariables->Legacy = true;
-#endif // defined(WITH_LIGHTNING_COMPILER)
+ parsedOptions.oVariables->Legacy = it->settings().useLightning_;
} else if (isAMDILTarget(*aclutGetTargetInfo(aclBin))) {
parsedOptions.oVariables->Frontend = "edg";
}
diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp
index be54d11a72..12ac115f83 100644
--- a/rocclr/runtime/utils/flags.hpp
+++ b/rocclr/runtime/utils/flags.hpp
@@ -203,6 +203,8 @@ release(uint, PAL_RGP_DISP_COUNT, 50, \
"The number of dispatches for RGP capture with SQTT") \
release(bool, GPU_FORCE_WAVE_SIZE_32, false, \
"Forces WaveSize32 compilation in SC") \
+release(bool, GPU_ENABLE_LC, IS_LIGHTNING, \
+ "Enables LC path") \
release(uint, GPU_MAX_COMMAND_BUFFERS, 8, \
"The maximum number of command buffers allocated per queue") \
release(cstring, HIP_VISIBLE_DEVICES, "", \