From 5787f039930c0deb0697d5cf99515810eba58c4a Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 2 Oct 2018 18:50:36 -0400
Subject: [PATCH] P4 to Git Change 1613773 by gandryey@gera-w8 on 2018/10/02
18:37:59
SWDEV-79445 - OCL generic changes and code clean-up
Program compilation clean-up:
- Remove !defined(WITH_LIGHTNING_COMPILER), but appprofiler still requires more changes.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#232 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devkernel.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#112 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#68 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#101 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#38 edit
[ROCm/clr commit: 8ee6142811a516d28e74280180c21c2a7b6de376]
---
projects/clr/rocclr/runtime/device/device.cpp | 11 ++-
.../clr/rocclr/runtime/device/devkernel.cpp | 22 +++---
.../clr/rocclr/runtime/device/devkernel.hpp | 2 +-
.../clr/rocclr/runtime/device/devprogram.cpp | 70 ++++++++++++-------
.../rocclr/runtime/device/pal/paldevice.cpp | 54 +++++++-------
.../rocclr/runtime/device/pal/palkernel.cpp | 4 +-
.../rocclr/runtime/device/pal/palprogram.cpp | 18 ++---
.../rocclr/runtime/device/pal/palsettings.cpp | 20 +++---
.../rocclr/runtime/device/rocm/rocdevice.cpp | 23 +++---
.../runtime/device/rocm/rocsettings.cpp | 17 +++--
10 files changed, 125 insertions(+), 116 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/device.cpp b/projects/clr/rocclr/runtime/device/device.cpp
index e19f4f3a6b..fadd8fe272 100644
--- a/projects/clr/rocclr/runtime/device/device.cpp
+++ b/projects/clr/rocclr/runtime/device/device.cpp
@@ -119,13 +119,10 @@ bool Device::BlitProgram::create(amd::Device* device, const char* extraKernels,
}
// Build all kernels
- std::string opt =
- "-cl-internal-kernel "
-#if !defined(WITH_LIGHTNING_COMPILER)
- "-Wf,--force_disable_spir -fno-lib-no-inline "
- "-fno-sc-keep-calls "
-#endif // !defined(WITH_LIGHTNING_COMPILER)
- ;
+ std::string opt = "-cl-internal-kernel ";
+ if (!device->settings().useLightning_) {
+ opt += "-Wf,--force_disable_spir -fno-lib-no-inline -fno-sc-keep-calls ";
+ }
if (extraOptions != nullptr) {
opt += extraOptions;
diff --git a/projects/clr/rocclr/runtime/device/devkernel.cpp b/projects/clr/rocclr/runtime/device/devkernel.cpp
index 33438dfc8f..8c0aa82d7f 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.cpp
@@ -280,7 +280,7 @@ static inline uint32_t GetOclArgumentTypeOCL(const KernelArgMD& lcArg, bool* isH
}
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline uint32_t GetOclArgumentTypeOCL(const aclArgData* argInfo, bool* isHidden) {
if (argInfo->argStr[0] == '_' && argInfo->argStr[1] == '.') {
*isHidden = true;
@@ -414,7 +414,7 @@ static inline clk_value_type_t GetOclTypeOCL(const KernelArgMD& lcArg, size_t si
}
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline clk_value_type_t GetOclTypeOCL(const aclArgData* argInfo, size_t size = 0) {
uint sizeType;
uint numElements;
@@ -497,7 +497,7 @@ static inline size_t GetArgAlignmentOCL(const KernelArgMD& lcArg) { return lcArg
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline size_t GetArgAlignmentOCL(const aclArgData* argInfo) {
switch (argInfo->type) {
case ARG_TYPE_POINTER:
@@ -551,7 +551,7 @@ static inline size_t GetArgPointeeAlignmentOCL(const KernelArgMD& lcArg) {
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline size_t GetArgPointeeAlignmentOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_POINTER) {
return argInfo->arg.pointer.align;
@@ -578,7 +578,7 @@ static inline bool GetReadOnlyOCL(const KernelArgMD& lcArg) {
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline bool GetReadOnlyOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_POINTER) {
return (argInfo->arg.pointer.type == ACCESS_TYPE_RO) ? true : false;
@@ -596,7 +596,7 @@ static inline int GetArgSizeOCL(const KernelArgMD& lcArg) { return lcArg.mSize;
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
inline static int GetArgSizeOCL(const aclArgData* argInfo) {
switch (argInfo->type) {
case ARG_TYPE_POINTER:
@@ -659,7 +659,7 @@ static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const KernelArgM
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_address_qualifier GetOclAddrQualOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_POINTER) {
switch (argInfo->arg.pointer.memory) {
@@ -708,7 +708,7 @@ static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const KernelArg
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_access_qualifier GetOclAccessQualOCL(const aclArgData* argInfo) {
if (argInfo->type == ARG_TYPE_IMAGE) {
switch (argInfo->arg.image.type) {
@@ -749,7 +749,7 @@ static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const KernelArgMD&
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
static inline cl_kernel_arg_type_qualifier GetOclTypeQualOCL(const aclArgData* argInfo) {
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
if (argInfo->type == ARG_TYPE_POINTER) {
@@ -858,7 +858,7 @@ void Kernel::InitParameters(const KernelMD& kernelMD, uint32_t argBufferSize) {
#endif
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
void Kernel::InitParameters(const aclArgData* aclArg, uint32_t argBufferSize) {
// Iterate through the arguments and insert into parameterList
device::Kernel::parameters_t params;
@@ -1032,7 +1032,7 @@ void Kernel::InitPrintf(const std::vector& printfInfoStrings) {
#endif // defined(WITH_LIGHTNING_COMPILER)
// ================================================================================================
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
void Kernel::InitPrintf(const aclPrintfFmt* aclPrintf) {
PrintfInfo info;
uint index = 0;
diff --git a/projects/clr/rocclr/runtime/device/devkernel.hpp b/projects/clr/rocclr/runtime/device/devkernel.hpp
index 59f7733fa9..d3ec08dcae 100644
--- a/projects/clr/rocclr/runtime/device/devkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/devkernel.hpp
@@ -224,7 +224,7 @@ class Kernel : public amd::HeapObject {
//! Initializes HSAIL Printf metadata and info for LC
void InitPrintf(const std::vector& printfInfoStrings);
#endif
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
void InitParameters(
const aclArgData* aclArg, //!< List of ACL arguments
uint32_t argBufferSize
diff --git a/projects/clr/rocclr/runtime/device/devprogram.cpp b/projects/clr/rocclr/runtime/device/devprogram.cpp
index e531993025..88d0abf88d 100644
--- a/projects/clr/rocclr/runtime/device/devprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/devprogram.cpp
@@ -16,7 +16,7 @@
#include "libraries.amdgcn.inc"
#include "opencl1.2-c.amdgcn.inc"
#include "opencl2.0-c.amdgcn.inc"
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_LIGHTNING_COMPILER)
#include
#include
@@ -387,7 +387,7 @@ static void logFunction(const char* msg, size_t size) {
bool Program::compileImplHSAIL(const std::string& sourceCode,
const std::vector& headers,
const char** headerIncludeNames, amd::option::Options* options) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
acl_error errorCode;
aclTargetInfo target;
@@ -479,7 +479,7 @@ bool Program::compileImplHSAIL(const std::string& sourceCode,
// Save the binary in the interface class
saveBinaryAndSetType(TYPE_COMPILED);
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
return true;
}
@@ -592,7 +592,7 @@ bool Program::linkImplLC(const std::vector& inputPrograms,
// ================================================================================================
bool Program::linkImplHSAIL(const std::vector& inputPrograms,
amd::option::Options* options, bool createLibrary) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
acl_error errorCode;
// For each program we need to extract the LLVMIR and create
@@ -678,7 +678,7 @@ bool Program::linkImplHSAIL(const std::vector& inputPrograms,
return linkImpl(options);
#else
return false;
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
}
// ================================================================================================
@@ -931,7 +931,7 @@ bool Program::linkImplLC(amd::option::Options* options) {
// ================================================================================================
bool Program::linkImplHSAIL(amd::option::Options* options) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
acl_error errorCode;
bool finalize = true;
internal_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
@@ -1020,7 +1020,7 @@ bool Program::linkImplHSAIL(amd::option::Options* options) {
return true;
#else
return false;
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
}
// ================================================================================================
@@ -1420,14 +1420,14 @@ std::string Program::ProcessOptions(amd::option::Options* options) {
optionsStr.append(opts.str());
}
-#if !defined(WITH_LIGHTNING_COMPILER)
- if (!device().settings().singleFpDenorm_) {
- optionsStr.append(" -cl-denorms-are-zero");
- }
+ if (!device().settings().useLightning_) {
+ if (!device().settings().singleFpDenorm_) {
+ optionsStr.append(" -cl-denorms-are-zero");
+ }
- // Check if the host is 64 bit or 32 bit
- LP64_ONLY(optionsStr.append(" -m64"));
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+ // Check if the host is 64 bit or 32 bit
+ LP64_ONLY(optionsStr.append(" -m64"));
+ }
// Tokenize the extensions string into a vector of strings
std::istringstream istrstr(device().info().extensions_);
@@ -1518,6 +1518,15 @@ bool Program::getCompileOptionsAtLinking(const std::vector& inputProgr
return true;
}
+// ================================================================================================
+bool isSPIRVMagicL(const void* Image, size_t Length) {
+ const unsigned SPRVMagicNumber = 0x07230203;
+ if (Image == nullptr || Length < sizeof(unsigned))
+ return false;
+ auto Magic = static_cast(Image);
+ return *Magic == SPRVMagicNumber;
+}
+
// ================================================================================================
bool Program::initClBinary(const char* binaryIn, size_t size) {
if (!initClBinary()) {
@@ -1533,10 +1542,18 @@ bool Program::initClBinary(const char* binaryIn, size_t size) {
// unencrypted
int encryptCode = 0;
char* decryptedBin = nullptr;
+ bool isSPIRV = false;
+ bool isBc = false;
-#if !defined(WITH_LIGHTNING_COMPILER)
- bool isSPIRV = isSPIRVMagic(binaryIn, size);
- if (isSPIRV || isBcMagic(binaryIn)) {
+#if defined(WITH_COMPILER_LIB)
+ if (!device().settings().useLightning_) {
+ isSPIRV = isSPIRVMagicL(binaryIn, size);
+ isBc = isBcMagic(binaryIn);
+ }
+#endif // defined(WITH_COMPILER_LIB)
+
+ if (isSPIRV || isBc) {
+#if defined(WITH_COMPILER_LIB)
acl_error err = ACL_SUCCESS;
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
@@ -1580,9 +1597,8 @@ bool Program::initClBinary(const char* binaryIn, size_t size) {
aclBinaryFini(aclbin_v30);
aclBinaryFini(aclbin_v21);
}
- } else
-#endif // !defined(WITH_LIGHTNING_COMPILER)
- {
+#endif // defined(WITH_COMPILER_LIB)
+ } else {
size_t decryptedSize;
if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) {
return false;
@@ -1705,9 +1721,9 @@ aclType Program::getCompilationStagesFromBinary(std::vector& completeSt
default:
break;
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_LIGHTNING_COMPILER)
} else {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
acl_error errorCode;
size_t secSize = 0;
completeStages.clear();
@@ -1830,7 +1846,7 @@ aclType Program::getCompilationStagesFromBinary(std::vector& completeSt
default:
break;
}
-#endif // #if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // #if defined(WITH_COMPILER_LIB)
}
return from;
}
@@ -1841,7 +1857,7 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options
binary_t binary = this->binary();
// If the binary already exists
if ((binary.first != nullptr) && (binary.second > 0)) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
if (aclValidateBinaryImage(binary.first, binary.second, BINARY_TYPE_ELF)) {
acl_error errorCode;
binaryElf_ = aclReadFromMem(binary.first, binary.second, &errorCode);
@@ -1850,7 +1866,7 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options
return continueCompileFrom;
}
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
// save the current options
std::string sCurCompileOptions = compileOptions_;
@@ -1880,7 +1896,7 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options
if (compileOptions_.empty()) break;
std::string sBinOptions;
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
if (binaryElf_ != nullptr) {
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
assert(symbol && "symbol not found");
@@ -1898,7 +1914,7 @@ aclType Program::getNextCompilationStageFromBinary(amd::option::Options* options
sBinOptions = std::string((char*)opts, symSize);
}
else
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
{
sBinOptions = sCurOptions;
}
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index 6d067b51e7..d904fb49af 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -74,7 +74,7 @@ bool NullDevice::init() {
// Comment out this section for SWDEV-146950 since Kalindi and Mullins
// does not works for LC offline compilation without knowing which GFXIP
// should be used for them.
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
// Loop through all supported devices and create each of them
for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
@@ -110,7 +110,7 @@ bool NullDevice::init() {
}
}
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
// Loop through all supported devices and create each of them
for (uint id = 0;
@@ -274,31 +274,31 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
if (settings().useLightning_) {
#if defined(WITH_LIGHTNING_COMPILER)
- // create compilation object with cache support
- int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
- int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
- int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
+ // create compilation object with cache support
+ int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
+ int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
+ int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
- // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
- // with dash as delimiter to be compatible with Windows directory name
- std::ostringstream cacheTarget;
- cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
- if (hwInfo_->xnackEnabled_) {
- cacheTarget << "-xnack";
- }
+ // Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
+ // with dash as delimiter to be compatible with Windows directory name
+ std::ostringstream cacheTarget;
+ cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping;
+ if (hwInfo_->xnackEnabled_) {
+ cacheTarget << "-xnack";
+ }
- // Create CacheCompilation for the offline device
- amd::CacheCompilation* compObj = new amd::CacheCompilation(
- cacheTarget.str(), "_null_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
- if (!compObj) {
- LogError("Unable to create cache compilation object!");
- return false;
- }
+ // Create CacheCompilation for the offline device
+ amd::CacheCompilation* compObj = new amd::CacheCompilation(
+ cacheTarget.str(), "_null_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
+ if (!compObj) {
+ LogError("Unable to create cache compilation object!");
+ return false;
+ }
- cacheCompilation_.reset(compObj);
+ cacheCompilation_.reset(compObj);
#endif
} else {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
const char* library = getenv("HSA_COMPILER_LIBRARY");
aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8),
library,
@@ -315,7 +315,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
LogError("Error initializing the compiler");
return false;
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
}
return true;
@@ -964,7 +964,7 @@ bool Device::create(Pal::IDevice* device) {
#endif
}
else {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
const char* library = getenv("HSA_COMPILER_LIBRARY");
aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8),
library,
@@ -981,7 +981,7 @@ bool Device::create(Pal::IDevice* device) {
LogError("Error initializing the compiler");
return false;
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
}
// Allocate SRD manager
@@ -1262,12 +1262,12 @@ void Device::tearDown() {
platform_ = nullptr;
}
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
if (compiler_ != nullptr) {
aclCompilerFini(compiler_);
compiler_ = nullptr;
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
}
Memory* Device::getGpuMemory(amd::Memory* mem) const {
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index fc483934e1..3ee42beaf3 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -90,7 +90,7 @@ HSAILKernel::~HSAILKernel() {
}
bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
acl_error error = ACL_SUCCESS;
std::string openClKernelName = openclMangledName(name());
flags_.internalKernel_ =
@@ -243,7 +243,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
delete[] VecTypeHint;
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
return true;
}
diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
index d90865e558..f0c28ddc1d 100644
--- a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
@@ -24,7 +24,7 @@
#include "driver/AmdCompiler.h"
#include "libraries.amdgcn.inc"
#include "gelf.h"
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_LIGHTNING_COMPILER)
namespace pal {
@@ -177,7 +177,7 @@ HSAILProgram::~HSAILProgram() {
for (auto& it : staticSamplers_) {
delete it;
}
-#if !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
if (rawBinary_ != nullptr) {
aclFreeMem(binaryElf_, rawBinary_);
}
@@ -189,7 +189,7 @@ HSAILProgram::~HSAILProgram() {
LogWarning("Error while destroying the acl binary \n");
}
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
releaseClBinary();
if (executable_ != nullptr) {
loader_->DestroyExecutable(executable_);
@@ -208,7 +208,7 @@ inline static std::vector splitSpaceSeparatedString(char* str) {
}
bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
// ACL_TYPE_CG stage is not performed for offline compilation
hsa_agent_t agent;
agent.handle = 1;
@@ -284,7 +284,7 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
}
DestroySegmentCpuAccess();
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
return true;
}
@@ -314,7 +314,7 @@ void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const {
}
const aclTargetInfo& HSAILProgram::info(const char* str) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
acl_error err;
std::string arch = "hsail";
if (dev().settings().use64BitPtr_) {
@@ -325,12 +325,12 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
return info_;
}
bool HSAILProgram::saveBinaryAndSetType(type_t type) {
-#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
// Write binary to memory
if (rawBinary_ != nullptr) {
// Free memory containing rawBinary
@@ -345,7 +345,7 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) {
setBinary(static_cast(rawBinary_), size);
// Set the type of binary
setType(type);
-#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
return true;
}
diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
index cb5528e070..e43f18b42f 100644
--- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
@@ -340,16 +340,16 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
enableExtension(ClKhrImage2dFromBuffer);
enableExtension(ClAmdMediaOps);
enableExtension(ClAmdMediaOps2);
-#if !defined(WITH_LIGHTNING_COMPILER)
- enableExtension(ClAmdPopcnt);
- enableExtension(ClAmdVec3);
- enableExtension(ClAmdPrintf);
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+
+ if (!useLightning_) {
+ enableExtension(ClAmdPopcnt);
+ enableExtension(ClAmdVec3);
+ enableExtension(ClAmdPrintf);
+ enableExtension(ClKhrSpir);
+ }
// Enable some platform extensions
enableExtension(ClAmdDeviceAttributeQuery);
-#if !defined(WITH_LIGHTNING_COMPILER)
- enableExtension(ClKhrSpir);
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+
#ifdef ATI_OS_LINUX
if (palProp.gpuMemoryProperties.busAddressableMemSize > 0)
@@ -376,13 +376,11 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
enableExtension(ClKhrFp64);
}
-#if !defined(WITH_LIGHTNING_COMPILER)
- if (doublePrecision) {
+ if (!useLightning_ && doublePrecision) {
// Enable AMD double precision extension
doublePrecision_ = true;
enableExtension(ClAmdFp64);
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
if (palProp.gpuMemoryProperties.busAddressableMemSize > 0) {
// Enable bus addressable memory extension
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
index 6707c1c689..bcba8d3018 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocdevice.cpp
@@ -211,11 +211,9 @@ bool NullDevice::initCompiler(bool isOffline) {
NULL, NULL, NULL, NULL, NULL, NULL
};
compilerHandle_ = aclCompilerInit(&opts, &error);
- if (error != ACL_SUCCESS) {
-#if !defined(WITH_LIGHTNING_COMPILER)
+ if (!GPU_ENABLE_LC && error != ACL_SUCCESS) {
LogError("Error initializing the compiler handle");
return false;
-#endif // !defined(WITH_LIGHTNING_COMPILER)
}
}
#endif // defined(WITH_COMPILER_LIB)
@@ -245,7 +243,7 @@ bool NullDevice::init() {
// Return without initializing offline device list
return true;
-#if !defined(WITH_LIGHTNING_COMPILER)
+#if defined(WITH_COMPILER_LIB)
// If there is an HSA enabled device online then skip any offline device
std::vector devices;
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
@@ -272,9 +270,10 @@ bool NullDevice::init() {
}
nullDevice->registerDevice();
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
+#endif // defined(WITH_COMPILER_LIB)
return true;
}
+
NullDevice::~NullDevice() {
if (info_.extensions_) {
delete[] info_.extensions_;
@@ -1181,14 +1180,14 @@ bool Device::populateOCLDeviceConstants() {
if (agent_profile_ == HSA_PROFILE_FULL) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
}
-#if !defined(WITH_LIGHTNING_COMPILER)
- // Report atomics capability based on GFX IP, control on Hawaii
- // and Vega10.
- if (info_.hostUnifiedMemory_ ||
- ((deviceInfo_.gfxipVersion_ >= 800) && (deviceInfo_.gfxipVersion_ < 900))) {
- info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
+ if (!settings().useLightning_) {
+ // Report atomics capability based on GFX IP, control on Hawaii
+ // and Vega10.
+ if (info_.hostUnifiedMemory_ ||
+ ((deviceInfo_.gfxipVersion_ >= 800) && (deviceInfo_.gfxipVersion_ < 900))) {
+ info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
+ }
}
-#endif // !defined(WITH_LIGHTNING_COMPILER)
}
if (settings().checkExtension(ClAmdDeviceAttributeQuery)) {
diff --git a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp
index a691d72606..f0a33021b1 100644
--- a/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp
+++ b/projects/clr/rocclr/runtime/device/rocm/rocsettings.cpp
@@ -104,23 +104,22 @@ bool Settings::create(bool fullProfile, int gfxipVersion) {
// Enable KHR double precision extension
enableExtension(ClKhrFp64);
-#if !defined(WITH_LIGHTNING_COMPILER)
- // Also enable AMD double precision extension?
- enableExtension(ClAmdFp64);
-#endif // !defined(WITH_LIGHTNING_COMPILER)
enableExtension(ClKhrSubGroups);
enableExtension(ClKhrDepthImages);
enableExtension(ClAmdCopyBufferP2P);
enableExtension(ClKhrFp16);
supportDepthsRGB_ = true;
-#if defined(WITH_LIGHTNING_COMPILER)
- enableExtension(ClAmdAssemblyProgram);
- // enable subnormals for gfx900 and later
- if (gfxipVersion >= 900) {
+ if (useLightning_) {
+ enableExtension(ClAmdAssemblyProgram);
+ // enable subnormals for gfx900 and later
+ if (gfxipVersion >= 900) {
singleFpDenorm_ = true;
+ }
+ } else {
+ // Also enable AMD double precision extension?
+ enableExtension(ClAmdFp64);
}
-#endif // WITH_LIGHTNING_COMPILER
if (gfxipVersion == 902) {
apuSystem_ = true;