diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp index 392d492572..e19f4f3a6b 100644 --- a/rocclr/runtime/device/device.cpp +++ b/rocclr/runtime/device/device.cpp @@ -573,7 +573,7 @@ bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C, namespace device { -Settings::Settings() { +Settings::Settings() : value_(0) { assert((ClExtTotal < (8 * sizeof(extensions_))) && "Too many extensions!"); extensions_ = 0; partialDispatch_ = false; diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp index b2b82adb50..e03c49931c 100644 --- a/rocclr/runtime/device/device.hpp +++ b/rocclr/runtime/device/device.hpp @@ -519,7 +519,8 @@ class Settings : public amd::HeapObject { uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program uint singleFpDenorm_ : 1; //!< Support Single FP Denorm uint gfx10Hsail_ : 1 ; //!< GFX10 HSAIL path - uint reserved_ : 21; + uint useLightning_ : 1; //!< Enable LC path for this device + uint reserved_ : 20; }; uint value_; }; diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp index 1d8dca35fb..e531993025 100644 --- a/rocclr/runtime/device/devprogram.cpp +++ b/rocclr/runtime/device/devprogram.cpp @@ -1044,6 +1044,7 @@ void Program::releaseClBinary() { // ================================================================================================ bool Program::initBuild(amd::option::Options* options) { + compileOptions_ = options->origOptionStr; programOptions_ = options; if (options->oVariables->DumpFlags > 0) { @@ -1054,11 +1055,41 @@ bool Program::initBuild(amd::option::Options* options) { if (!initClBinary()) { return false; } + + const char* devName = machineTarget_; + options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu", + clBinary()->getEncryptCode(), true); + + // Elf Binary setup + std::string outFileName; + + // true means hsail required + clBinary()->init(options, true); + if (options->isDumpFlagSet(amd::option::DUMP_BIF)) { + outFileName = options->getDumpFileName(".bin"); + } + + if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64), + (outFileName.size() > 0) ? outFileName.c_str() : nullptr)) { + LogError("Setup elf out for gpu failed"); + return false; + } + return true; } // ================================================================================================ -bool Program::finiBuild(bool isBuildGood) { return true; } +bool Program::finiBuild(bool isBuildGood) { + clBinary()->resetElfOut(); + clBinary()->resetElfIn(); + + if (!isBuildGood) { + // Prevent the encrypted binary form leaking out + clBinary()->setBinary(nullptr, 0); + } + + return true; +} // ================================================================================================ cl_int Program::compile(const std::string& sourceCode, diff --git a/rocclr/runtime/device/devprogram.hpp b/rocclr/runtime/device/devprogram.hpp index 179b56f720..332d0b0c5c 100644 --- a/rocclr/runtime/device/devprogram.hpp +++ b/rocclr/runtime/device/devprogram.hpp @@ -197,10 +197,10 @@ class Program : public amd::HeapObject { protected: //! pre-compile setup - virtual bool initBuild(amd::option::Options* options); + bool initBuild(amd::option::Options* options); //! post-compile cleanup - virtual bool finiBuild(bool isBuildGood); + bool finiBuild(bool isBuildGood); /*! \brief Compiles GPU CL program to LLVM binary (compiler frontend) * @@ -236,8 +236,6 @@ class Program : public amd::HeapObject { //! return target info virtual const aclTargetInfo& info(const char* str = "") = 0; - virtual bool isElf(const char* bin) const = 0; - virtual bool setKernels( amd::option::Options* options, void* binary, size_t binSize) { return true; } @@ -271,6 +269,8 @@ class Program : public amd::HeapObject { //! Finds the total size of all global variables in the program bool FindGlobalVarSize(void* binary, size_t binSize); + bool isElf(const char* bin) const { return amd::isElfMagic(bin); } + private: //! Compile the device program with LC path bool compileImplLC(const std::string& sourceCode, diff --git a/rocclr/runtime/device/gpu/gpuprogram.cpp b/rocclr/runtime/device/gpu/gpuprogram.cpp index e808e9e434..482909b6ea 100644 --- a/rocclr/runtime/device/gpu/gpuprogram.cpp +++ b/rocclr/runtime/device/gpu/gpuprogram.cpp @@ -23,48 +23,6 @@ namespace gpu { -bool NullProgram::initBuild(amd::option::Options* options) { - if (!device::Program::initBuild(options)) { - return false; - } - - const char* devname = dev().hwInfo()->machineTarget_; - options->setPerBuildInfo((devname && (devname[0] != '\0')) ? devname : "gpu", - clBinary()->getEncryptCode(), - true // FIXME: the dev ptr is used to query the wavefront size. - ); - - // Elf Binary setup - std::string outFileName; - - // Recompile from IL may happen (invoking Kernel::recompil()) to generate correct - // isa code for 7xx. Because of this, force saving AMDIL into the binary. - clBinary()->init(options, (dev().calTarget() <= CAL_TARGET_730)); - if (options->isDumpFlagSet(amd::option::DUMP_BIF)) { - outFileName = options->getDumpFileName(".bin"); - } - - bool useELF64 = dev().settings().use64BitPtr_; - if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32, - (outFileName.size() > 0) ? outFileName.c_str() : NULL)) { - LogError("Setup elf out for gpu failed"); - return false; - } - return true; -} - -bool NullProgram::finiBuild(bool isBuildGood) { - clBinary()->resetElfOut(); - clBinary()->resetElfIn(); - - if (!isBuildGood) { - // Prevent the encrypted binary form leaking out - clBinary()->setBinary(NULL, 0); - } - - return device::Program::finiBuild(isBuildGood); -} - const aclTargetInfo& NullProgram::info(const char* str) { acl_error err; std::string arch = GPU_TARGET_INFO_ARCH; @@ -1572,44 +1530,6 @@ HSAILProgram::~HSAILProgram() { amd::hsa::loader::Loader::Destroy(loader_); } -bool HSAILProgram::initBuild(amd::option::Options* options) { - if (!device::Program::initBuild(options)) { - return false; - } - - const char* devName = dev().hwInfo()->machineTarget_; - options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu", - clBinary()->getEncryptCode(), true); - - // Elf Binary setup - std::string outFileName; - - // true means fsail required - clBinary()->init(options, true); - if (options->isDumpFlagSet(amd::option::DUMP_BIF)) { - outFileName = options->getDumpFileName(".bin"); - } - - if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64), - (outFileName.size() > 0) ? outFileName.c_str() : NULL)) { - LogError("Setup elf out for gpu failed"); - return false; - } - return true; -} - -bool HSAILProgram::finiBuild(bool isBuildGood) { - clBinary()->resetElfOut(); - clBinary()->resetElfIn(); - - if (!isBuildGood) { - // Prevent the encrypted binary form leaking out - clBinary()->setBinary(NULL, 0); - } - - return device::Program::finiBuild(isBuildGood); -} - inline static std::vector splitSpaceSeparatedString(char* str) { std::string s(str); std::stringstream ss(s); diff --git a/rocclr/runtime/device/gpu/gpuprogram.hpp b/rocclr/runtime/device/gpu/gpuprogram.hpp index 102d2c035a..ead578304a 100644 --- a/rocclr/runtime/device/gpu/gpuprogram.hpp +++ b/rocclr/runtime/device/gpu/gpuprogram.hpp @@ -119,12 +119,6 @@ class NullProgram : public device::Program { const std::vector& glbCb() const { return glbCb_; } protected: - //! pre-compile setup for GPU - virtual bool initBuild(amd::option::Options* options); - - //! post-compile setup for GPU - virtual bool finiBuild(bool isBuildGood); - /*! \brief Compiles GPU CL program to LLVM binary (compiler frontend) * * \return True if we successefully compiled a GPU program @@ -261,8 +255,6 @@ class NullProgram : public device::Program { std::vector printf_; //!< Format strings for GPU printf support std::vector glbCb_; //!< Global constant buffers - virtual bool isElf(const char* bin) const { return amd::isElfMagic(bin); } - virtual const aclTargetInfo& info(const char* str = ""); virtual bool saveBinaryAndSetType(type_t type) { return true; } @@ -477,12 +469,6 @@ class HSAILProgram : public device::Program { bool isStaticSampler() const { return (staticSamplers_.size() != 0); } protected: - //! pre-compile setup for GPU - virtual bool initBuild(amd::option::Options* options); - - //! post-compile setup for GPU - virtual bool finiBuild(bool isBuildGood); - bool saveBinaryAndSetType(type_t type); virtual bool linkImpl(amd::option::Options* options); @@ -491,11 +477,6 @@ class HSAILProgram : public device::Program { virtual const aclTargetInfo& info(const char* str = ""); - virtual bool isElf(const char* bin) const { - return amd::isElfMagic(bin); - // return false; - } - private: //! Disable default copy constructor HSAILProgram(const HSAILProgram&); diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp index 49ba683736..6d067b51e7 100644 --- a/rocclr/runtime/device/pal/paldevice.cpp +++ b/rocclr/runtime/device/pal/paldevice.cpp @@ -74,7 +74,7 @@ bool NullDevice::init() { // Comment out this section for SWDEV-146950 since Kalindi and Mullins // does not works for LC offline compilation without knowing which GFXIP // should be used for them. -#ifndef WITH_LIGHTNING_COMPILER +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) // Loop through all supported devices and create each of them for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++id) { @@ -110,7 +110,7 @@ bool NullDevice::init() { } } } -#endif +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) // Loop through all supported devices and create each of them for (uint id = 0; @@ -272,6 +272,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, info_.wavefrontWidth_ = (ipLevel >= Pal::GfxIpLevel::GfxIp10) ? 32 : 64; + if (settings().useLightning_) { #if defined(WITH_LIGHTNING_COMPILER) // create compilation object with cache support int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100; @@ -296,17 +297,37 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, cacheCompilation_.reset(compObj); #endif + } else { +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) + const char* library = getenv("HSA_COMPILER_LIBRARY"); + aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8), + library, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + AMD_OCL_SC_LIB }; + // Initialize the compiler handle + acl_error error; + compiler_ = aclCompilerInit(&opts, &error); + if (error != ACL_SUCCESS) { + LogError("Error initializing the compiler"); + return false; + } +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) + } return true; } device::Program* NullDevice::createProgram(amd::option::Options* options) { device::Program* program; -#if defined(WITH_LIGHTNING_COMPILER) - program = new LightningProgram(*this); -#else // !defined(WITH_LIGHTNING_COMPILER) - program = new HSAILProgram(*this); -#endif // defined(WITH_LIGHTNING_COMPILER) + if (settings().useLightning_) { + program = new LightningProgram(*this); + } else { + program = new HSAILProgram(*this); + } if (program == nullptr) { LogError("Memory allocation has failed!"); @@ -506,12 +527,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, } ::strcpy(info_.vendor_, "Advanced Micro Devices, Inc."); ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)", -#if defined(WITH_LIGHTNING_COMPILER) - ",LC" -#else // ! defined(WITH_LIGHTNING_COMPILER) - ",HSAIL" -#endif // ! defined(WITH_LIGHTNING_COMPILER) - ); + settings().useLightning_ ? ",LC" : ",HSAIL"); info_.profile_ = "FULL_PROFILE"; if (settings().oclVersion_ >= OpenCL20) { @@ -922,6 +938,52 @@ bool Device::create(Pal::IDevice* device) { allocedMem[i] = 0; } + if (settings().useLightning_) { +#if defined(WITH_LIGHTNING_COMPILER) + // create compilation object with cache support + int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100; + int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10; + int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10; + + // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) + // with dash as delimiter to be compatible with Windows directory name + std::ostringstream cacheTarget; + cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; + if (isXNACKSupported) { + cacheTarget << "-xnack"; + } + + amd::CacheCompilation* compObj = new amd::CacheCompilation( + cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); + if (!compObj) { + LogError("Unable to create cache compilation object!"); + return false; + } + + cacheCompilation_.reset(compObj); +#endif + } + else { +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) + const char* library = getenv("HSA_COMPILER_LIBRARY"); + aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8), + library, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + AMD_OCL_SC_LIB }; + // Initialize the compiler handle + acl_error error; + compiler_ = aclCompilerInit(&opts, &error); + if (error != ACL_SUCCESS) { + LogError("Error initializing the compiler"); + return false; + } +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) + } + // Allocate SRD manager srdManager_ = new SrdManager(*this, std::max(HsaImageObjectSize, HsaSamplerObjectSize), 64 * Ki); if (srdManager_ == nullptr) { @@ -933,30 +995,6 @@ bool Device::create(Pal::IDevice* device) { hwDebugMgr_ = new GpuDebugManager(this); } -#if defined(WITH_LIGHTNING_COMPILER) - // create compilation object with cache support - int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100; - int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10; - int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10; - - // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) - // with dash as delimiter to be compatible with Windows directory name - std::ostringstream cacheTarget; - cacheTarget << "AMD-AMDGPU-" << gfxipMajor << "-" << gfxipMinor << "-" << gfxipStepping; - if (isXNACKSupported) { - cacheTarget << "-xnack"; - } - - amd::CacheCompilation* compObj = new amd::CacheCompilation( - cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET); - if (!compObj) { - LogError("Unable to create cache compilation object!"); - return false; - } - - cacheCompilation_.reset(compObj); -#endif - return true; } @@ -1090,11 +1128,12 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) { device::Program* Device::createProgram(amd::option::Options* options) { device::Program* program; -#if defined(WITH_LIGHTNING_COMPILER) - program = new LightningProgram(*this); -#else // !defined(WITH_LIGHTNING_COMPILER) - program = new HSAILProgram(*this); -#endif // defined(WITH_LIGHTNING_COMPILER) + if (settings().useLightning_) { + program = new LightningProgram(*this); + } + else { + program = new HSAILProgram(*this); + } if (program == nullptr) { LogError("We failed memory allocation for program!"); } @@ -1154,25 +1193,6 @@ bool Device::init() { bool useDeviceList = false; requestedDevices_t requestedDevices; -#if !defined(WITH_LIGHTNING_COMPILER) - const char* library = getenv("HSA_COMPILER_LIBRARY"); - aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8), - library, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - AMD_OCL_SC_LIB}; - // Initialize the compiler handle - acl_error error; - compiler_ = aclCompilerInit(&opts, &error); - if (error != ACL_SUCCESS) { - LogError("Error initializing the compiler"); - return false; - } -#endif // !defined(WITH_LIGHTNING_COMPILER) - size_t size = Pal::GetPlatformSize(); platformObj_ = new char[size]; Pal::PlatformCreateInfo info = {}; @@ -1242,12 +1262,12 @@ void Device::tearDown() { platform_ = nullptr; } -#if !defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) if (compiler_ != nullptr) { aclCompilerFini(compiler_); compiler_ = nullptr; } -#endif // !defined(WITH_LIGHTNING_COMPILER) +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) } Memory* Device::getGpuMemory(amd::Memory* mem) const { @@ -2187,15 +2207,15 @@ bool Device::createBlitProgram() { if (settings().oclVersion_ >= OpenCL20) { size_t loc = sch.find("%s"); sch.replace(loc, 2, iDev()->GetDispatchKernelSource()); -#if defined(WITH_LIGHTNING_COMPILER) - // For LC, replace "amd_scheduler" with "amd_scheduler_pal" - static const char AmdScheduler[] = "amd_scheduler"; - static const char AmdSchedulerPal[] = "amd_scheduler_pal"; - loc = sch.find(AmdScheduler); - sch.replace(loc, strlen(AmdScheduler), AmdSchedulerPal); - loc = sch.find(AmdScheduler, (loc + strlen(AmdSchedulerPal))); - sch.replace(loc, strlen(AmdScheduler), AmdSchedulerPal); -#endif + if (settings().useLightning_) { + // For LC, replace "amd_scheduler" with "amd_scheduler_pal" + static const char AmdScheduler[] = "amd_scheduler"; + static const char AmdSchedulerPal[] = "amd_scheduler_pal"; + loc = sch.find(AmdScheduler); + sch.replace(loc, sizeof(AmdScheduler) - 1, AmdSchedulerPal); + loc = sch.find(AmdScheduler, (loc + sizeof(AmdSchedulerPal) - 1)); + sch.replace(loc, sizeof(AmdScheduler) - 1, AmdSchedulerPal); + } scheduler = sch.c_str(); ocl20 = "-cl-std=CL2.0"; } diff --git a/rocclr/runtime/device/pal/palkernel.cpp b/rocclr/runtime/device/pal/palkernel.cpp index 504285f256..fc483934e1 100644 --- a/rocclr/runtime/device/pal/palkernel.cpp +++ b/rocclr/runtime/device/pal/palkernel.cpp @@ -8,9 +8,14 @@ #include "device/pal/palsched.hpp" #include "platform/commandqueue.hpp" #include "utils/options.hpp" - #include "acl.h" +#if defined(WITH_LIGHTNING_COMPILER) +#include "llvm/Support/AMDGPUMetadata.h" + +typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; +#endif // defined(WITH_LIGHTNING_COMPILER) + #include #include #include @@ -85,9 +90,7 @@ HSAILKernel::~HSAILKernel() { } bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"Should not reach here"); -#else // !defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) acl_error error = ACL_SUCCESS; std::string openClKernelName = openclMangledName(name()); flags_.internalKernel_ = @@ -240,7 +243,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) { delete[] VecTypeHint; } -#endif // !defined(WITH_LIGHTNING_COMPILER) +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) return true; } @@ -370,12 +373,11 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments( return hsaDisp; } -#if defined(WITH_LIGHTNING_COMPILER) - const LightningProgram& LightningKernel::prog() const { return reinterpret_cast(prog_); } +#if defined(WITH_LIGHTNING_COMPILER) static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const std::string& name) { for (const KernelMD& kernelMD : programMD->mKernels) { if (kernelMD.mName == name) { @@ -384,8 +386,10 @@ static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const s } return nullptr; } +#endif // defined(WITH_LIGHTNING_COMPILER) bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { +#if defined(WITH_LIGHTNING_COMPILER) flags_.internalKernel_ = (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false; @@ -462,10 +466,8 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) { waveLimiter_.enable(); */ - +#endif // defined(WITH_LIGHTNING_COMPILER) return true; } -#endif // defined(WITH_LIGHTNING_COMPILER) - } // namespace pal diff --git a/rocclr/runtime/device/pal/palkernel.hpp b/rocclr/runtime/device/pal/palkernel.hpp index d94ae849d3..77f6106c8b 100644 --- a/rocclr/runtime/device/pal/palkernel.hpp +++ b/rocclr/runtime/device/pal/palkernel.hpp @@ -16,13 +16,6 @@ #include "device/devwavelimiter.hpp" #include "hsa.h" -#if defined(WITH_LIGHTNING_COMPILER) -#include "llvm/Support/AMDGPUMetadata.h" - -typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; -#endif // defined(WITH_LIGHTNING_COMPILER) - namespace amd { namespace hsa { namespace loader { @@ -118,7 +111,6 @@ class HSAILKernel : public device::Kernel { size_t codeSize_; //!< Size of ISA code }; -#if defined(WITH_LIGHTNING_COMPILER) class LightningKernel : public HSAILKernel { public: LightningKernel(const std::string& name, HSAILProgram* prog, const std::string& compileOptions) @@ -130,6 +122,5 @@ class LightningKernel : public HSAILKernel { //! Initializes the metadata required for this kernel, bool init(amd::hsa::loader::Symbol* symbol); }; -#endif // defined(WITH_LIGHTNING_COMPILER) /*@}*/} // namespace pal diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp index 024766be4c..d90865e558 100644 --- a/rocclr/runtime/device/pal/palprogram.cpp +++ b/rocclr/runtime/device/pal/palprogram.cpp @@ -198,43 +198,6 @@ HSAILProgram::~HSAILProgram() { amd::hsa::loader::Loader::Destroy(loader_); } -bool HSAILProgram::initBuild(amd::option::Options* options) { - if (!device::Program::initBuild(options)) { - return false; - } - - const char* devName = dev().hwInfo()->machineTarget_; - options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu", - clBinary()->getEncryptCode(), true); - - // Elf Binary setup - std::string outFileName; - - // true means fsail required - clBinary()->init(options, true); - if (options->isDumpFlagSet(amd::option::DUMP_BIF)) { - outFileName = options->getDumpFileName(".bin"); - } - - if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64), - (outFileName.size() > 0) ? outFileName.c_str() : nullptr)) { - LogError("Setup elf out for gpu failed"); - return false; - } - return true; -} - -bool HSAILProgram::finiBuild(bool isBuildGood) { - clBinary()->resetElfOut(); - clBinary()->resetElfIn(); - - if (!isBuildGood) { - // Prevent the encrypted binary form leaking out - clBinary()->setBinary(nullptr, 0); - } - - return device::Program::finiBuild(isBuildGood); -} inline static std::vector splitSpaceSeparatedString(char* str) { std::string s(str); @@ -245,10 +208,7 @@ inline static std::vector splitSpaceSeparatedString(char* str) { } bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) { -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"Should not reach here"); - return false; -#else // !defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) // ACL_TYPE_CG stage is not performed for offline compilation hsa_agent_t agent; agent.handle = 1; @@ -324,8 +284,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_ } DestroySegmentCpuAccess(); +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) return true; -#endif // !defined(WITH_LIGHTNING_COMPILER) } bool HSAILProgram::createBinary(amd::option::Options* options) { return true; } @@ -354,9 +314,7 @@ void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { } const aclTargetInfo& HSAILProgram::info(const char* str) { -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"Should not reach here"); -#else // !defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) acl_error err; std::string arch = "hsail"; if (dev().settings().use64BitPtr_) { @@ -367,14 +325,12 @@ const aclTargetInfo& HSAILProgram::info(const char* str) { if (err != ACL_SUCCESS) { LogWarning("aclGetTargetInfo failed"); } -#endif // !defined(WITH_LIGHTNING_COMPILER) +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) return info_; } bool HSAILProgram::saveBinaryAndSetType(type_t type) { -#if defined(WITH_LIGHTNING_COMPILER) - assert(!"Should not reach here"); -#else // !defined(WITH_LIGHTNING_COMPILER) +#if defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) // Write binary to memory if (rawBinary_ != nullptr) { // Free memory containing rawBinary @@ -389,7 +345,7 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) { setBinary(static_cast(rawBinary_), size); // Set the type of binary setType(type); -#endif // !defined(WITH_LIGHTNING_COMPILER) +#endif // defined(WITH_COMPILER_LIB) || !defined(WITH_LIGHTNING_COMPILER) return true; } @@ -647,16 +603,20 @@ static hsa_status_t GetGlobalVarNamesCallback( } return HSA_STATUS_SUCCESS; } +#endif // defined(WITH_LIGHTNING_COMPILER) bool LightningProgram::createBinary(amd::option::Options* options) { +#if defined(WITH_LIGHTNING_COMPILER) if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) { LogError("Failed to create ELF binary image!"); return false; } +#endif // defined(WITH_LIGHTNING_COMPILER) return true; } bool LightningProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) { +#if defined(WITH_LIGHTNING_COMPILER) hsa_agent_t agent; agent.handle = 1; @@ -731,10 +691,8 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s hasGlobalStores_ = (glbVarNames.size() != 0) ? true : false; DestroySegmentCpuAccess(); - +#endif // defined(WITH_LIGHTNING_COMPILER) return true; } -#endif // defined(WITH_LIGHTNING_COMPILER) - } // namespace pal diff --git a/rocclr/runtime/device/pal/palprogram.hpp b/rocclr/runtime/device/pal/palprogram.hpp index c1daa131cb..d463a09615 100644 --- a/rocclr/runtime/device/pal/palprogram.hpp +++ b/rocclr/runtime/device/pal/palprogram.hpp @@ -6,12 +6,6 @@ #include "device/pal/palkernel.hpp" #include "amd_hsa_loader.hpp" -#if defined(WITH_LIGHTNING_COMPILER) -#include "llvm/Support/AMDGPUMetadata.h" - -typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; -#endif // defined(WITH_LIGHTNING_COMPILER) - namespace amd { namespace option { class Options; @@ -175,22 +169,12 @@ class HSAILProgram : public device::Program { } protected: - //! pre-compile setup for GPU - virtual bool initBuild(amd::option::Options* options); - - //! post-compile setup for GPU - virtual bool finiBuild(bool isBuildGood); - bool saveBinaryAndSetType(type_t type); virtual bool createBinary(amd::option::Options* options); virtual const aclTargetInfo& info(const char* str = ""); - virtual bool isElf(const char* bin) const { - return amd::isElfMagic(bin); - } - virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize) override; //! Destroys CPU allocations in the code segment @@ -222,7 +206,6 @@ class HSAILProgram : public device::Program { PALHSALoaderContext loaderContext_; //!< Context for HSA Loader }; -#if defined(WITH_LIGHTNING_COMPILER) //! \class Lightning Compiler Program class LightningProgram : public HSAILProgram { public: @@ -246,6 +229,5 @@ class LightningProgram : public HSAILProgram { virtual bool createBinary(amd::option::Options* options) override; }; -#endif // defined(WITH_LIGHTNING_COMPILER) /*@}*/} // namespace pal diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp index 7493e91618..cb5528e070 100644 --- a/rocclr/runtime/device/pal/palsettings.cpp +++ b/rocclr/runtime/device/pal/palsettings.cpp @@ -143,6 +143,7 @@ Settings::Settings() { std::min(static_cast(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_); maxCmdBuffers_ = 12; + useLightning_ = GPU_ENABLE_LC; } bool Settings::create(const Pal::DeviceProperties& palProp, diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp index a63c9f9767..93fba7bc5b 100644 --- a/rocclr/runtime/device/rocm/rockernel.cpp +++ b/rocclr/runtime/device/rocm/rockernel.cpp @@ -9,6 +9,14 @@ #ifndef WITHOUT_HSA_BACKEND +#if defined(WITH_LIGHTNING_COMPILER) +#include "driver/AmdCompiler.h" +#include "llvm/Support/AMDGPUMetadata.h" + +typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; +typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; +#endif // defined(WITH_LIGHTNING_COMPILER) + namespace roc { Kernel::Kernel(std::string name, Program* prog, const uint64_t& kernelCodeHandle, diff --git a/rocclr/runtime/device/rocm/rocprogram.cpp b/rocclr/runtime/device/rocm/rocprogram.cpp index 7999590bb2..be1e6f7003 100644 --- a/rocclr/runtime/device/rocm/rocprogram.cpp +++ b/rocclr/runtime/device/rocm/rocprogram.cpp @@ -1,8 +1,6 @@ // // Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. // - - #ifndef WITHOUT_HSA_BACKEND #include "rocprogram.hpp" @@ -14,8 +12,8 @@ #include "driver/AmdCompiler.h" #include "libraries.amdgcn.inc" #endif // defined(WITH_LIGHTNING_COMPILER) -#include "utils/bif_section_labels.hpp" +#include "utils/bif_section_labels.hpp" #include "amd_hsa_kernel_code.h" #include @@ -113,49 +111,6 @@ bool Program::initClBinary(char* binaryIn, size_t size) { return clBinary()->setBinary(bin, sz, (decryptedBin != nullptr)); } - -bool Program::initBuild(amd::option::Options* options) { - compileOptions_ = options->origOptionStr; - - if (!device::Program::initBuild(options)) { - return false; - } - - const char* devName = dev().deviceInfo().machineTarget_; - options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu", - clBinary()->getEncryptCode(), true); - - // Elf Binary setup - std::string outFileName; - - // true means hsail required - clBinary()->init(options, true); - if (options->isDumpFlagSet(amd::option::DUMP_BIF)) { - outFileName = options->getDumpFileName(".bin"); - } - - bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64; - if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32, - (outFileName.size() > 0) ? outFileName.c_str() : nullptr)) { - LogError("Setup elf out for gpu failed"); - return false; - } - return true; -} - -// ! post-compile setup for GPU -bool Program::finiBuild(bool isBuildGood) { - clBinary()->resetElfOut(); - clBinary()->resetElfIn(); - - if (!isBuildGood) { - // Prevent the encrypted binary form leaking out - clBinary()->setBinary(nullptr, 0); - } - - return device::Program::finiBuild(isBuildGood); -} - #if defined(WITH_COMPILER_LIB) HSAILProgram::HSAILProgram(roc::NullDevice& device) : roc::Program(device) { xnackEnabled_ = dev().deviceInfo().xnackEnabled_; diff --git a/rocclr/runtime/device/rocm/rocprogram.hpp b/rocclr/runtime/device/rocm/rocprogram.hpp index 89f2f88d2b..178011ba4a 100644 --- a/rocclr/runtime/device/rocm/rocprogram.hpp +++ b/rocclr/runtime/device/rocm/rocprogram.hpp @@ -12,16 +12,6 @@ #include #include "rocdevice.hpp" -#if defined(WITH_LIGHTNING_COMPILER) -#include "driver/AmdCompiler.h" -#include "llvm/Support/AMDGPUMetadata.h" - -typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD; -typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD; -typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD; - -#endif // defined(WITH_LIGHTNING_COMPILER) - //! \namespace roc HSA Device Implementation namespace roc { @@ -50,12 +40,6 @@ class Program : public device::Program { hsa_executable_t hsaExecutable() const { return hsaExecutable_; } protected: - //! pre-compile setup for GPU - virtual bool initBuild(amd::option::Options* options); - - //! post-compile setup for GPU - virtual bool finiBuild(bool isBuildGood); - /*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen) * * \return The build error code @@ -66,11 +50,6 @@ class Program : public device::Program { virtual const aclTargetInfo& info(const char* str = "") { return info_; } - virtual bool isElf(const char* bin) const { - return amd::isElfMagic(bin); - // return false; - } - protected: //! Disable default copy constructor Program(const Program&) = delete; diff --git a/rocclr/runtime/device/rocm/rocsettings.cpp b/rocclr/runtime/device/rocm/rocsettings.cpp index 31e235eeb1..a691d72606 100644 --- a/rocclr/runtime/device/rocm/rocsettings.cpp +++ b/rocclr/runtime/device/rocm/rocsettings.cpp @@ -69,6 +69,8 @@ Settings::Settings() { // Device enqueuing settings numDeviceEvents_ = 1024; numWaitEvents_ = 8; + + useLightning_ = GPU_ENABLE_LC; } bool Settings::create(bool fullProfile, int gfxipVersion) { diff --git a/rocclr/runtime/platform/program.cpp b/rocclr/runtime/platform/program.cpp index 9b68004437..336c95a805 100644 --- a/rocclr/runtime/platform/program.cpp +++ b/rocclr/runtime/platform/program.cpp @@ -48,13 +48,16 @@ const Symbol* Program::findSymbol(const char* kernelName) const { cl_int Program::addDeviceProgram(Device& device, const void* image, size_t length, amd::option::Options* options) { - if (image != NULL && !amd::isElfMagic((const char*)image) -#if !defined(WITH_LIGHTNING_COMPILER) - && !aclValidateBinaryImage( - image, length, language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM) -#endif // !defined(WITH_LIGHTNING_COMPILER) - ) { - return CL_INVALID_BINARY; + if (image != NULL && !amd::isElfMagic((const char*)image)) { + if (device.settings().useLightning_) { + return CL_INVALID_BINARY; + } +#if defined(WITH_COMPILER_LIB) + else if (!aclValidateBinaryImage( + image, length, language_ == SPIRV ? BINARY_TYPE_SPIRV : BINARY_TYPE_ELF | BINARY_TYPE_LLVM)) { + return CL_INVALID_BINARY; + } +#endif // !defined(WITH_COMPILER_LIB) } // Check if the device is already associated with this program @@ -307,9 +310,7 @@ cl_int Program::link(const std::vector& devices, size_t numInputs, } if (isHSAILTarget(*aclutGetTargetInfo(aclBin))) { parsedOptions.oVariables->Frontend = "clang"; -#if defined(WITH_LIGHTNING_COMPILER) - parsedOptions.oVariables->Legacy = true; -#endif // defined(WITH_LIGHTNING_COMPILER) + parsedOptions.oVariables->Legacy = it->settings().useLightning_; } else if (isAMDILTarget(*aclutGetTargetInfo(aclBin))) { parsedOptions.oVariables->Frontend = "edg"; } diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp index be54d11a72..12ac115f83 100644 --- a/rocclr/runtime/utils/flags.hpp +++ b/rocclr/runtime/utils/flags.hpp @@ -203,6 +203,8 @@ release(uint, PAL_RGP_DISP_COUNT, 50, \ "The number of dispatches for RGP capture with SQTT") \ release(bool, GPU_FORCE_WAVE_SIZE_32, false, \ "Forces WaveSize32 compilation in SC") \ +release(bool, GPU_ENABLE_LC, IS_LIGHTNING, \ + "Enables LC path") \ release(uint, GPU_MAX_COMMAND_BUFFERS, 8, \ "The maximum number of command buffers allocated per queue") \ release(cstring, HIP_VISIBLE_DEVICES, "", \