// // Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. // #include "device/device.hpp" #include "thread/atomic.hpp" #include "thread/monitor.hpp" #if defined(WITH_HSA_DEVICE) #include "device/rocm/rocdevice.hpp" extern amd::AppProfile* rocCreateAppProfile(); #endif #if defined(WITH_PAL_DEVICE) // namespace pal { extern bool PalDeviceLoad(); extern void PalDeviceUnload(); //} #endif // WITH_PAL_DEVICE #if defined(WITH_GPU_DEVICE) extern bool DeviceLoad(); extern void DeviceUnload(); #endif // WITH_GPU_DEVICE #include "platform/runtime.hpp" #include "platform/program.hpp" #include "thread/monitor.hpp" #include "amdocl/cl_common.hpp" #include "utils/options.hpp" #include "utils/versions.hpp" // AMD_PLATFORM_INFO #if defined(HAVE_BLOWFISH_H) #include "blowfish/oclcrypt.hpp" #endif #include "utils/bif_section_labels.hpp" #include "utils/libUtils.h" #include "spirv/spirvUtils.h" #include #include #include #include #include #include #include #include #include namespace device { extern const char* BlitSourceCode; } namespace amd { std::vector* Device::devices_ = NULL; AppProfile Device::appProfile_; amd::Monitor MemObjMap::AllocatedLock_("Guards SVM allocation list"); std::map MemObjMap::MemObjMap_; size_t MemObjMap::size() { amd::ScopedLock lock(AllocatedLock_); return MemObjMap_.size(); } void MemObjMap::AddMemObj(const void* k, amd::Memory* v) { amd::ScopedLock lock(AllocatedLock_); MemObjMap_.insert({reinterpret_cast(k), v}); } void MemObjMap::RemoveMemObj(const void* k) { amd::ScopedLock lock(AllocatedLock_); MemObjMap_.erase(reinterpret_cast(k)); } amd::Memory* MemObjMap::FindMemObj(const void* k) { amd::ScopedLock lock(AllocatedLock_); uintptr_t key = reinterpret_cast(k); auto it = MemObjMap_.upper_bound(key); if (it == MemObjMap_.begin()) { return NULL; } --it; amd::Memory* mem = it->second; if (key >= it->first && key < (it->first + mem->getSize())) { // the k is in the range return mem; } else { return NULL; } } Device::BlitProgram::~BlitProgram() { if (program_ != NULL) { program_->release(); } } bool Device::BlitProgram::create(amd::Device* device, const char* extraKernels, const char* extraOptions) { std::vector devices; devices.push_back(device); std::string kernels(device::BlitSourceCode); if (extraKernels != NULL) { kernels += extraKernels; } // Create a program with all blit kernels program_ = new Program(*context_, kernels.c_str(), Program::OpenCL_C); if (program_ == NULL) { return false; } // Build all kernels std::string opt = "-cl-internal-kernel " #if !defined(WITH_LIGHTNING_COMPILER) "-Wf,--force_disable_spir -fno-lib-no-inline " "-fno-sc-keep-calls " #endif // !defined(WITH_LIGHTNING_COMPILER) ; if (extraOptions != NULL) { opt += extraOptions; } if (!GPU_DUMP_BLIT_KERNELS) { opt += " -fno-enable-dump"; } if (CL_SUCCESS != program_->build(devices, opt.c_str(), NULL, NULL, GPU_DUMP_BLIT_KERNELS)) { return false; } return true; } bool Device::init() { assert(!Runtime::initialized() && "initialize only once"); bool ret = false; devices_ = NULL; appProfile_.init(); // IMPORTANT: Note that we are initialiing HSA stack first and then // GPU stack. The order of initialization is signiicant and if changed // amd::Device::registerDevice() must be accordingly modified. #if defined(WITH_HSA_DEVICE) // Return value of roc::Device::init() // If returned false, error initializing HSA stack. // If returned true, either HSA not installed or HSA stack // successfully initialized. if (!roc::Device::init()) { // abort() commentted because this is the only indication // that KFD is not installed. // Ignore the failure and assume KFD is not installed. // abort(); } ret |= roc::NullDevice::init(); #endif // WITH_HSA_DEVICE #if defined(WITH_GPU_DEVICE) if (GPU_ENABLE_PAL != 1) { ret |= DeviceLoad(); } #endif // WITH_GPU_DEVICE #if defined(WITH_PAL_DEVICE) if (GPU_ENABLE_PAL != 0) { ret |= PalDeviceLoad(); } #endif // WITH_PAL_DEVICE return ret; } void Device::tearDown() { if (devices_ != NULL) { for (uint i = 0; i < devices_->size(); ++i) { delete devices_->at(i); } devices_->clear(); delete devices_; } #if defined(WITH_HSA_DEVICE) roc::Device::tearDown(); #endif // WITH_HSA_DEVICE #if defined(WITH_GPU_DEVICE) if (GPU_ENABLE_PAL != 1) { DeviceUnload(); } #endif // WITH_GPU_DEVICE #if defined(WITH_PAL_DEVICE) if (GPU_ENABLE_PAL != 0) { PalDeviceUnload(); } #endif // WITH_PAL_DEVICE } Device::Device() : settings_(NULL), online_(true), blitProgram_(NULL), hwDebugMgr_(NULL), vaCacheAccess_(nullptr), vaCacheMap_(nullptr) { memset(&info_, '\0', sizeof(info_)); } Device::~Device() { CondLog((vaCacheMap_ != nullptr) && (vaCacheMap_->size() != 0), "Application didn't unmap all host memory!"); delete vaCacheMap_; delete vaCacheAccess_; // Destroy device settings if (settings_ != NULL) { delete settings_; } if (info_.extensions_ != NULL) { delete[] info_.extensions_; } } bool Device::create() { vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true); if (NULL == vaCacheAccess_) { return false; } vaCacheMap_ = new std::map(); if (NULL == vaCacheMap_) { return false; } return true; } void Device::registerDevice() { assert(Runtime::singleThreaded() && "this is not thread-safe"); static bool defaultIsAssigned = false; if (devices_ == NULL) { devices_ = new std::vector; } if (info_.available_) { if (!defaultIsAssigned && online_) { defaultIsAssigned = true; info_.type_ |= CL_DEVICE_TYPE_DEFAULT; } } devices_->push_back(this); } void Device::addVACache(device::Memory* memory) const { // Make sure system memory has direct access if (memory->isHostMemDirectAccess()) { // VA cache access must be serialised amd::ScopedLock lk(*vaCacheAccess_); void* start = memory->owner()->getHostMem(); size_t offset; device::Memory* doubleMap = findMemoryFromVA(start, &offset); if (doubleMap == nullptr) { // Insert the new entry vaCacheMap_->insert( std::pair(reinterpret_cast(start), memory)); } else { LogError("Unexpected double map() call from the app!"); } } } void Device::removeVACache(const device::Memory* memory) const { // Make sure system memory has direct access if (memory->isHostMemDirectAccess() && memory->owner()) { // VA cache access must be serialised amd::ScopedLock lk(*vaCacheAccess_); void* start = memory->owner()->getHostMem(); vaCacheMap_->erase(reinterpret_cast(start)); } } device::Memory* Device::findMemoryFromVA(const void* ptr, size_t* offset) const { // VA cache access must be serialised amd::ScopedLock lk(*vaCacheAccess_); uintptr_t key = reinterpret_cast(ptr); auto it = vaCacheMap_->upper_bound(reinterpret_cast(ptr)); if (it == vaCacheMap_->begin()) { return nullptr; } --it; device::Memory* mem = it->second; if (key >= it->first && key < (it->first + mem->size())) { // ptr is in the range *offset = key - it->first; return mem; } return nullptr; } bool Device::IsTypeMatching(cl_device_type type, bool offlineDevices) { if (!(isOnline() || offlineDevices)) { return false; } return (info_.type_ & type) != 0; } std::vector Device::getDevices(cl_device_type type, bool offlineDevices) { std::vector result; if (devices_ == NULL) { return result; } // Create the list of available devices for (const auto& it : *devices_) { // Check if the device type is matched if (it->IsTypeMatching(type, offlineDevices)) { result.push_back(it); } } return result; } size_t Device::numDevices(cl_device_type type, bool offlineDevices) { size_t result = 0; if (devices_ == NULL) { return 0; } for (const auto& it : *devices_) { // Check if the device type is matched if (it->IsTypeMatching(type, offlineDevices)) { ++result; } } return result; } bool Device::getDeviceIDs(cl_device_type deviceType, cl_uint numEntries, cl_device_id* devices, cl_uint* numDevices, bool offlineDevices) { if (numDevices != NULL && devices == NULL) { *numDevices = (cl_uint)amd::Device::numDevices(deviceType, offlineDevices); return (*numDevices > 0) ? true : false; } assert(devices != NULL && "check the code above"); std::vector ret = amd::Device::getDevices(deviceType, offlineDevices); if (ret.size() == 0) { *not_null(numDevices) = 0; return false; } auto it = ret.cbegin(); cl_uint count = std::min(numEntries, (cl_uint)ret.size()); while (count--) { *devices++ = as_cl(*it++); --numEntries; } while (numEntries--) { *devices++ = (cl_device_id)0; } *not_null(numDevices) = (cl_uint)ret.size(); return true; } char* Device::getExtensionString() { std::stringstream extStream; size_t size; char* result = NULL; // Generate the extension string for (uint i = 0; i < ClExtTotal; ++i) { if (settings().checkExtension(i)) { extStream << OclExtensionsString[i]; } } size = extStream.str().size() + 1; // Create a single string with all extensions result = new char[size]; if (result != NULL) { memcpy(result, extStream.str().data(), (size - 1)); result[size - 1] = 0; } return result; } #if defined(WITH_LIGHTNING_COMPILER) CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache, bool resetCache) : codeCache_(targetStr, 0, AMD_PLATFORM_BUILD_NUMBER, postfix), isCodeCacheEnabled_(enableCache) { if (resetCache) { // clean up the cached data of the target device StringCache emptyCache(targetStr, 0, 0, postfix); } } bool CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C, std::vector& inputs, amd::opencl_driver::Buffer* output, std::vector& options, std::string& buildLog) { std::string cacheOpt; cacheOpt = std::accumulate(begin(options), end(options), cacheOpt); bool ret = false; bool cachedCodeExist = false; std::vector bcSet; if (isCodeCacheEnabled_) { using namespace amd::opencl_driver; for (auto& input : inputs) { assert(input->Type() == DT_LLVM_BC); BufferReference* bc = reinterpret_cast(input); StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()}; bcSet.push_back(cachedData); } std::string dstData = ""; if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData, "Link LLVM Bitcodes")) { std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); cachedCodeExist = true; } } if (!cachedCodeExist) { if (!C->LinkLLVMBitcode(inputs, output, options)) { return false; } if (isCodeCacheEnabled_) { std::string dstData(output->Buf().data(), output->Buf().size()); if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { buildLog += "Warning: Failed to caching codes.\n"; LogWarning("Caching codes failed!"); } } } return true; } bool CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C, std::vector& inputs, amd::opencl_driver::Buffer* output, std::vector& options, std::string& buildLog) { std::string cacheOpt; for (uint i = 0; i < options.size(); i++) { // skip the header file option, which is associated with the -cl-std= option if (options[i].compare("-include-pch") == 0) { i++; continue; } cacheOpt += options[i]; } bool ret = false; bool cachedCodeExist = false; std::vector bcSet; if (isCodeCacheEnabled_) { using namespace amd::opencl_driver; bool checkCache = true; for (auto& input : inputs) { if (input->Type() == DT_CL) { BufferReference* bc = reinterpret_cast(input); StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()}; bcSet.push_back(cachedData); } else if (input->Type() == DT_CL_HEADER) { FileReference* bcFile = reinterpret_cast(input); std::string bc; bcFile->ReadToString(bc); StringCache::CachedData cachedData = {bc.c_str(), bc.size()}; bcSet.push_back(cachedData); } else { buildLog += "Error: unsupported bitcode type for checking cache.\n"; checkCache = false; break; } } std::string dstData = ""; if (checkCache && codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData, "Compile to LLVM Bitcodes")) { std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); cachedCodeExist = true; } } if (!cachedCodeExist) { if (!C->CompileToLLVMBitcode(inputs, output, options)) { return false; } if (isCodeCacheEnabled_) { std::string dstData(output->Buf().data(), output->Buf().size()); if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { buildLog += "Warning: Failed to caching codes.\n"; LogWarning("Caching codes failed!"); } } } return true; } bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C, std::vector& inputs, amd::opencl_driver::Buffer* output, std::vector& options, std::string& buildLog) { std::string cacheOpt; cacheOpt = std::accumulate(begin(options), end(options), cacheOpt); bool ret = false; bool cachedCodeExist = false; std::vector bcSet; if (isCodeCacheEnabled_) { for (auto& input : inputs) { assert(input->Type() == amd::opencl_driver::DT_LLVM_BC); amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*)input; StringCache::CachedData cachedData = {bc->Buf().data(), bc->Size()}; bcSet.push_back(cachedData); } std::string dstData = ""; if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData, "Compile and Link Executable")) { std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf())); cachedCodeExist = true; } } if (!cachedCodeExist) { if (!C->CompileAndLinkExecutable(inputs, output, options)) { return false; } if (isCodeCacheEnabled_) { std::string dstData(output->Buf().data(), output->Buf().size()); if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) { buildLog += "Warning: Failed to caching codes.\n"; LogWarning("Caching codes failed!"); } } } return true; } #endif // defined(WITH_LIGHTNING_COMPILER) } // namespace amd namespace device { Settings::Settings() { assert((ClExtTotal < (8 * sizeof(extensions_))) && "Too many extensions!"); extensions_ = 0; partialDispatch_ = false; supportRA_ = true; customHostAllocator_ = false; waitCommand_ = AMD_OCL_WAIT_COMMAND; supportDepthsRGB_ = false; enableHwDebug_ = false; commandQueues_ = 200; //!< Field value set to maximum number //!< concurrent Virtual GPUs for default } bool Kernel::createSignature( const parameters_t& params, uint32_t numParameters, uint32_t version) { std::stringstream attribs; if (workGroupInfo_.compileSize_[0] != 0) { attribs << "reqd_work_group_size("; for (size_t i = 0; i < 3; ++i) { if (i != 0) { attribs << ","; } attribs << workGroupInfo_.compileSize_[i]; } attribs << ")"; } if (workGroupInfo_.compileSizeHint_[0] != 0) { attribs << " work_group_size_hint("; for (size_t i = 0; i < 3; ++i) { if (i != 0) { attribs << ","; } attribs << workGroupInfo_.compileSizeHint_[i]; } attribs << ")"; } if (!workGroupInfo_.compileVecTypeHint_.empty()) { attribs << " vec_type_hint(" << workGroupInfo_.compileVecTypeHint_ << ")"; } // Destroy old signature if it was allocated before // (offline devices path) delete signature_; signature_ = new amd::KernelSignature(params, attribs.str(), numParameters, version); if (NULL != signature_) { return true; } return false; } Kernel::~Kernel() { delete signature_; } std::string Kernel::openclMangledName(const std::string& name) { const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel); assert(bifSym && "symbol not found"); return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST]; } void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin, const amd::Coord3D region, uint mapFlags, bool entire, amd::Image* baseMip) { // Map/Unmap must be serialized. amd::ScopedLock lock(owner()->lockMemoryOps()); WriteMapInfo info = {}; WriteMapInfo* pInfo = &info; auto it = writeMapInfo_.find(mapAddress); if (it != writeMapInfo_.end()) { LogWarning("Double map of the same or overlapped region!"); pInfo = &it->second; } if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) { pInfo->origin_ = origin; pInfo->region_ = region; pInfo->entire_ = entire; pInfo->unmapWrite_ = true; } if (mapFlags & CL_MAP_READ) { pInfo->unmapRead_ = true; } pInfo->baseMip_ = baseMip; // Insert into the map if it's the first region if (++pInfo->count_ == 1) { writeMapInfo_.insert({mapAddress, info}); } } Program::Program(amd::Device& device) : device_(device), type_(TYPE_NONE), clBinary_(NULL), llvmBinary_(), elfSectionType_(amd::OclElf::LLVMIR), compileOptions_(), linkOptions_(), lastBuildOptionsArg_(), buildStatus_(CL_BUILD_NONE), buildError_(CL_SUCCESS), globalVariableTotalSize_(0), programOptions(NULL) {} Program::~Program() { clear(); } void Program::clear() { // Destroy all device kernels for (const auto& it : kernels_) { delete it.second; } kernels_.clear(); } bool Program::initBuild(amd::option::Options* options) { programOptions = options; if (options->oVariables->DumpFlags > 0) { static amd::Atomic build_num = 0; options->setBuildNo(build_num++); } buildLog_.clear(); if (!initClBinary()) { return false; } return true; } bool Program::finiBuild(bool isBuildGood) { return true; } cl_int Program::compile(const std::string& sourceCode, const std::vector& headers, const char** headerIncludeNames, const char* origOptions, amd::option::Options* options) { uint64_t start_time = 0; if (options->oVariables->EnableBuildTiming) { buildLog_ = "\nStart timing major build components.....\n\n"; start_time = amd::Os::timeNanos(); } lastBuildOptionsArg_ = origOptions ? origOptions : ""; if (options) { compileOptions_ = options->origOptionStr; } buildStatus_ = CL_BUILD_IN_PROGRESS; if (!initBuild(options)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation init failed."; } } if (options->oVariables->FP32RoundDivideSqrt && !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { buildStatus_ = CL_BUILD_ERROR; buildLog_ += "Error: -cl-fp32-correctly-rounded-divide-sqrt " "specified without device support"; } // Compile the source code if any if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() && !compileImpl(sourceCode, headers, headerIncludeNames, options)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation failed."; } } setType(TYPE_COMPILED); if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) { buildLog_ += "Internal Error: creating OpenCL binary failed!\n"; } if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation fini failed."; } } if (buildStatus_ == CL_BUILD_IN_PROGRESS) { buildStatus_ = CL_BUILD_SUCCESS; } else { buildError_ = CL_COMPILE_PROGRAM_FAILURE; } if (options->oVariables->EnableBuildTiming) { std::stringstream tmp_ss; tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; buildLog_ += tmp_ss.str(); } if (options->oVariables->BuildLog && !buildLog_.empty()) { if (strcmp(options->oVariables->BuildLog, "stderr") == 0) { fprintf(stderr, "%s\n", options->optionsLog().c_str()); fprintf(stderr, "%s\n", buildLog_.c_str()); } else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) { printf("%s\n", options->optionsLog().c_str()); printf("%s\n", buildLog_.c_str()); } else { std::fstream f; std::stringstream tmp_ss; std::string logs = options->optionsLog() + buildLog_; tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo(); f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); f.write(logs.data(), logs.size()); f.close(); } LogError(buildLog_.c_str()); } return buildError(); } cl_int Program::link(const std::vector& inputPrograms, const char* origLinkOptions, amd::option::Options* linkOptions) { lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : ""; if (linkOptions) { linkOptions_ = linkOptions->origOptionStr; } buildStatus_ = CL_BUILD_IN_PROGRESS; amd::option::Options options; if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ += "Internal error: Get compile options failed."; } } else { if (!amd::option::parseAllOptions(compileOptions_, options)) { buildStatus_ = CL_BUILD_ERROR; buildLog_ += options.optionsLog(); LogError("Parsing compile options failed."); } } uint64_t start_time = 0; if (options.oVariables->EnableBuildTiming) { buildLog_ = "\nStart timing major build components.....\n\n"; start_time = amd::Os::timeNanos(); } // initBuild() will clear buildLog_, so store it in a temporary variable std::string tmpBuildLog = buildLog_; if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ += "Internal error: Compilation init failed."; } } buildLog_ += tmpBuildLog; if (options.oVariables->FP32RoundDivideSqrt && !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { buildStatus_ = CL_BUILD_ERROR; buildLog_ += "Error: -cl-fp32-correctly-rounded-divide-sqrt " "specified without device support"; } bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false; if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ += "Internal error: Link failed.\n"; buildLog_ += "Make sure the system setup is correct."; } } if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation fini failed."; } } if (buildStatus_ == CL_BUILD_IN_PROGRESS) { buildStatus_ = CL_BUILD_SUCCESS; } else { buildError_ = CL_LINK_PROGRAM_FAILURE; } if (options.oVariables->EnableBuildTiming) { std::stringstream tmp_ss; tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; buildLog_ += tmp_ss.str(); } if (options.oVariables->BuildLog && !buildLog_.empty()) { if (strcmp(options.oVariables->BuildLog, "stderr") == 0) { fprintf(stderr, "%s\n", options.optionsLog().c_str()); fprintf(stderr, "%s\n", buildLog_.c_str()); } else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) { printf("%s\n", options.optionsLog().c_str()); printf("%s\n", buildLog_.c_str()); } else { std::fstream f; std::stringstream tmp_ss; std::string logs = options.optionsLog() + buildLog_; tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo(); f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); f.write(logs.data(), logs.size()); f.close(); } } if (!buildLog_.empty()) { LogError(buildLog_.c_str()); } return buildError(); } cl_int Program::build(const std::string& sourceCode, const char* origOptions, amd::option::Options* options) { uint64_t start_time = 0; if (options->oVariables->EnableBuildTiming) { buildLog_ = "\nStart timing major build components.....\n\n"; start_time = amd::Os::timeNanos(); } lastBuildOptionsArg_ = origOptions ? origOptions : ""; if (options) { compileOptions_ = options->origOptionStr; } buildStatus_ = CL_BUILD_IN_PROGRESS; if (!initBuild(options)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation init failed."; } } if (options->oVariables->FP32RoundDivideSqrt && !(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) { buildStatus_ = CL_BUILD_ERROR; buildLog_ += "Error: -cl-fp32-correctly-rounded-divide-sqrt " "specified without device support"; } // Compile the source code if any std::vector headers; if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() && !compileImpl(sourceCode, headers, NULL, options)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation failed."; } } if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ += "Internal error: Link failed.\n"; buildLog_ += "Make sure the system setup is correct."; } } if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) { buildStatus_ = CL_BUILD_ERROR; if (buildLog_.empty()) { buildLog_ = "Internal error: Compilation fini failed."; } } if (buildStatus_ == CL_BUILD_IN_PROGRESS) { buildStatus_ = CL_BUILD_SUCCESS; } else { buildError_ = CL_BUILD_PROGRAM_FAILURE; } if (options->oVariables->EnableBuildTiming) { std::stringstream tmp_ss; tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n"; buildLog_ += tmp_ss.str(); } if (options->oVariables->BuildLog && !buildLog_.empty()) { if (strcmp(options->oVariables->BuildLog, "stderr") == 0) { fprintf(stderr, "%s\n", options->optionsLog().c_str()); fprintf(stderr, "%s\n", buildLog_.c_str()); } else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) { printf("%s\n", options->optionsLog().c_str()); printf("%s\n", buildLog_.c_str()); } else { std::fstream f; std::stringstream tmp_ss; std::string logs = options->optionsLog() + buildLog_; tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo(); f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary)); f.write(logs.data(), logs.size()); f.close(); } } if (!buildLog_.empty()) { LogError(buildLog_.c_str()); } return buildError(); } bool Program::getCompileOptionsAtLinking(const std::vector& inputPrograms, const amd::option::Options* linkOptions) { amd::option::Options compileOptions; auto it = inputPrograms.cbegin(); const auto itEnd = inputPrograms.cend(); for (size_t i = 0; it != itEnd; ++it, ++i) { Program* program = *it; amd::option::Options compileOptions2; amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2; if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions)) { buildLog_ += thisCompileOptions->optionsLog(); LogError("Parsing compile options failed."); return false; } if (i == 0) compileOptions_ = program->compileOptions_; // if we are linking a program executable, and if "program" is a // compiled module or a library created with "-enable-link-options", // we can overwrite "program"'s compile options with linking options if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) { bool linkOptsCanOverwrite = false; if (program->type() != TYPE_LIBRARY) { linkOptsCanOverwrite = true; } else { amd::option::Options thisLinkOptions; if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) { buildLog_ += thisLinkOptions.optionsLog(); LogError("Parsing link options failed."); return false; } if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true; } if (linkOptsCanOverwrite) { if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) { buildLog_ += thisCompileOptions->optionsLog(); LogError("Setting link options failed."); return false; } } if (i == 0) compileOptions_ += " " + linkOptions_; } // warn if input modules have inconsistent compile options if (i > 0) { if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) { buildLog_ += "Warning: Input OpenCL binaries has inconsistent" " compile options. Using compile options from" " the first input binary!\n"; } } } return true; } bool Program::initClBinary(const char* binaryIn, size_t size) { if (!initClBinary()) { return false; } // Save the original binary that isn't owned by ClBinary clBinary()->saveOrigBinary(binaryIn, size); const char* bin = binaryIn; size_t sz = size; // unencrypted int encryptCode = 0; char* decryptedBin = NULL; #if !defined(WITH_LIGHTNING_COMPILER) bool isSPIRV = isSPIRVMagic(binaryIn, size); if (isSPIRV || isBcMagic(binaryIn)) { acl_error err = ACL_SUCCESS; aclBinaryOptions binOpts = {0}; binOpts.struct_size = sizeof(binOpts); binOpts.elfclass = (info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64) ? ELFCLASS64 : ELFCLASS32; binOpts.bitness = ELFDATA2LSB; binOpts.alloc = &::malloc; binOpts.dealloc = &::free; aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err); if (err != ACL_SUCCESS) { LogWarning("aclBinaryInit failed"); aclBinaryFini(aclbin_v30); return false; } err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size, isSPIRV ? aclSPIRV : aclSPIR); if (ACL_SUCCESS != err) { LogWarning("aclInsertSection failed"); aclBinaryFini(aclbin_v30); return false; } if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) { err = aclWriteToMem(aclbin_v30, (void**)const_cast(&bin), &sz); if (err != ACL_SUCCESS) { LogWarning("aclWriteToMem failed"); aclBinaryFini(aclbin_v30); return false; } aclBinaryFini(aclbin_v30); } else { aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21); err = aclWriteToMem(aclbin_v21, (void**)const_cast(&bin), &sz); if (err != ACL_SUCCESS) { LogWarning("aclWriteToMem failed"); aclBinaryFini(aclbin_v30); aclBinaryFini(aclbin_v21); return false; } aclBinaryFini(aclbin_v30); aclBinaryFini(aclbin_v21); } } else #endif // !defined(WITH_LIGHTNING_COMPILER) { size_t decryptedSize; if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) { return false; } if (decryptedBin != NULL) { // It is decrypted binary. bin = decryptedBin; sz = decryptedSize; } if (!isElf(bin)) { // Invalid binary. if (decryptedBin != NULL) { delete[] decryptedBin; } return false; } } clBinary()->setFlags(encryptCode); return clBinary()->setBinary(bin, sz, (decryptedBin != NULL)); } bool Program::setBinary(const char* binaryIn, size_t size) { if (!initClBinary(binaryIn, size)) { return false; } if (!clBinary()->setElfIn()) { LogError("Setting input OCL binary failed"); return false; } uint16_t type; if (!clBinary()->elfIn()->getType(type)) { LogError("Bad OCL Binary: error loading ELF type!"); return false; } switch (type) { case ET_NONE: { setType(TYPE_NONE); break; } case ET_REL: { if (clBinary()->isSPIR() || clBinary()->isSPIRV()) { setType(TYPE_INTERMEDIATE); } else { setType(TYPE_COMPILED); } break; } case ET_DYN: { char* sect = NULL; size_t sz = 0; // FIXME: we should look for the e_machine to detect an HSACO. if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) { setType(TYPE_EXECUTABLE); } else { setType(TYPE_LIBRARY); } break; } case ET_EXEC: { setType(TYPE_EXECUTABLE); break; } default: LogError("Bad OCL Binary: bad ELF type!"); return false; } clBinary()->loadCompileOptions(compileOptions_); clBinary()->loadLinkOptions(linkOptions_); clBinary()->resetElfIn(); return true; } bool Program::createBIFBinary(aclBinary* bin) { #if defined(WITH_COMPILER_LIB) acl_error err; char* binaryIn = NULL; size_t size; err = aclWriteToMem(bin, reinterpret_cast(&binaryIn), &size); if (err != ACL_SUCCESS) { LogWarning("aclWriteToMem failed"); return false; } clBinary()->saveBIFBinary(binaryIn, size); aclFreeMem(bin, binaryIn); return true; #else // !defined(WITH_COMPILER_LIB) return false; #endif // !defined(WITH_COMPILER_LIB) } ClBinary::ClBinary(const amd::Device& dev, BinaryImageFormat bifVer) : dev_(dev), binary_(NULL), size_(0), flags_(0), origBinary_(NULL), origSize_(0), encryptCode_(0), elfIn_(NULL), elfOut_(NULL), format_(bifVer) {} ClBinary::~ClBinary() { release(); if (elfIn_) { delete elfIn_; } if (elfOut_) { delete elfOut_; } } std::string ClBinary::getBIFSymbol(unsigned int symbolID) const { size_t nSymbols = 0; // Due to PRE & POST defines in bif_section_labels.hpp conflict with // PRE & POST struct members in sp3-si-chip-registers.h // unable to include bif_section_labels.hpp in device.hpp //! @todo: resolve conflict by renaming defines, // then include bif_section_labels.hpp in device.hpp & // use oclBIFSymbolID instead of unsigned int as a parameter const oclBIFSymbolID symID = static_cast(symbolID); switch (format_) { case BIF_VERSION2: { nSymbols = sizeof(BIF20) / sizeof(oclBIFSymbolStruct); const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID); assert(symb && "BIF20 symbol with symbolID not found"); if (symb) { return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]); } break; } case BIF_VERSION3: { nSymbols = sizeof(BIF30) / sizeof(oclBIFSymbolStruct); const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID); assert(symb && "BIF30 symbol with symbolID not found"); if (symb) { return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]); } break; } default: assert(0 && "unexpected BIF type"); return ""; } return ""; } void ClBinary::init(amd::option::Options* optionsObj, bool amdilRequired) { // option has higher priority than environment variable. if ((flags_ & BinarySourceMask) != BinaryRemoveSource) { // set to zero flags_ = (flags_ & (~BinarySourceMask)); flags_ |= (optionsObj->oVariables->BinSOURCE ? BinarySaveSource : BinaryNoSaveSource); } if ((flags_ & BinaryLlvmirMask) != BinaryRemoveLlvmir) { // set to zero flags_ = (flags_ & (~BinaryLlvmirMask)); flags_ |= (optionsObj->oVariables->BinLLVMIR ? BinarySaveLlvmir : BinaryNoSaveLlvmir); } // If amdilRequired is true, force to save AMDIL (for correctness) if ((flags_ & BinaryAmdilMask) != BinaryRemoveAmdil || amdilRequired) { // set to zero flags_ = (flags_ & (~BinaryAmdilMask)); flags_ |= ((optionsObj->oVariables->BinAMDIL || amdilRequired) ? BinarySaveAmdil : BinaryNoSaveAmdil); } if ((flags_ & BinaryIsaMask) != BinaryRemoveIsa) { // set to zero flags_ = (flags_ & (~BinaryIsaMask)); flags_ |= ((optionsObj->oVariables->BinEXE) ? BinarySaveIsa : BinaryNoSaveIsa); } if ((flags_ & BinaryASMask) != BinaryRemoveAS) { // set to zero flags_ = (flags_ & (~BinaryASMask)); flags_ |= ((optionsObj->oVariables->BinAS) ? BinarySaveAS : BinaryNoSaveAS); } } bool ClBinary::isRecompilable(std::string& llvmBinary, amd::OclElf::oclElfPlatform thePlatform) { /* It is recompilable if there is llvmir that was generated for the same platform (CPU or GPU) and with the same bitness. Note: the bitness has been checked in initClBinary(), no need to check it here. */ if (llvmBinary.empty()) { return false; } uint16_t elf_target; amd::OclElf::oclElfPlatform platform; if (elfIn()->getTarget(elf_target, platform)) { if (platform == thePlatform) { return true; } if ((platform == amd::OclElf::COMPLIB_PLATFORM) && (((thePlatform == amd::OclElf::CAL_PLATFORM) && ((elf_target == (uint16_t)EM_AMDIL) || (elf_target == (uint16_t)EM_HSAIL) || (elf_target == (uint16_t)EM_HSAIL_64))) || ((thePlatform == amd::OclElf::CPU_PLATFORM) && ((elf_target == (uint16_t)EM_386) || (elf_target == (uint16_t)EM_X86_64))))) { return true; } } return false; } void ClBinary::release() { if (isBinaryAllocated() && (binary_ != NULL)) { delete[] binary_; binary_ = NULL; flags_ &= ~BinaryAllocated; } } void ClBinary::saveBIFBinary(const char* binaryIn, size_t size) { char* image = new char[size]; memcpy(image, binaryIn, size); setBinary(image, size, true); return; } bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) { #if 0 if (!saveISA() && !saveAMDIL() && !saveLLVMIR() && !saveSOURCE()) { return true; } #endif release(); size_t imageSize; char* image; assert(elfOut_ && "elfOut_ should be initialized in ClBinary::data()"); // Insert Version string that builds this binary into .comment section const device::Info& devInfo = dev_.info(); std::string buildVerInfo("@(#) "); if (devInfo.version_ != NULL) { buildVerInfo.append(devInfo.version_); buildVerInfo.append(". Driver version: "); buildVerInfo.append(devInfo.driverVersion_); } else { // char OpenCLVersion[256]; // size_t sz; // cl_int ret= clGetPlatformInfo(AMD_PLATFORM, CL_PLATFORM_VERSION, 256, OpenCLVersion, &sz); // if (ret == CL_SUCCESS) { // buildVerInfo.append(OpenCLVersion, sz); // } // If CAL is unavailable, just hard-code the OpenCL driver version buildVerInfo.append("OpenCL 1.1" AMD_PLATFORM_INFO); } elfOut_->addSection(amd::OclElf::COMMENT, buildVerInfo.data(), buildVerInfo.size()); switch (type) { case Program::TYPE_NONE: { elfOut_->setType(ET_NONE); break; } case Program::TYPE_COMPILED: { elfOut_->setType(ET_REL); break; } case Program::TYPE_LIBRARY: { elfOut_->setType(ET_DYN); break; } case Program::TYPE_EXECUTABLE: { elfOut_->setType(ET_EXEC); break; } default: assert(0 && "unexpected elf type"); } if (!elfOut_->dumpImage(&image, &imageSize)) { return false; } #if defined(HAVE_BLOWFISH_H) if (doencrypt) { // Increase the size by 64 to accomodate extra headers int outBufSize = (int)(imageSize + 64); char* outBuf = new char[outBufSize]; if (outBuf == NULL) { return false; } memset(outBuf, '\0', outBufSize); int outBytes = 0; bool success = amd::oclEncrypt(0, image, imageSize, outBuf, outBufSize, &outBytes); delete[] image; if (!success) { delete[] outBuf; return false; } image = outBuf; imageSize = outBytes; } #endif setBinary(image, imageSize, true); return true; } Program::binary_t ClBinary::data() const { return {binary_, size_}; } bool ClBinary::setBinary(const char* theBinary, size_t theBinarySize, bool allocated) { release(); size_ = theBinarySize; binary_ = theBinary; if (allocated) { flags_ |= BinaryAllocated; } return true; } void ClBinary::setFlags(int encryptCode) { encryptCode_ = encryptCode; if (encryptCode != 0) { flags_ = (flags_ & (~(BinarySourceMask | BinaryLlvmirMask | BinaryAmdilMask | BinaryIsaMask | BinaryASMask))); flags_ |= (BinaryRemoveSource | BinaryRemoveLlvmir | BinaryRemoveAmdil | BinarySaveIsa | BinaryRemoveAS); } } bool ClBinary::decryptElf(const char* binaryIn, size_t size, char** decryptBin, size_t* decryptSize, int* encryptCode) { *decryptBin = NULL; #if defined(HAVE_BLOWFISH_H) int outBufSize = 0; if (amd::isEncryptedBIF(binaryIn, (int)size, &outBufSize)) { char* outBuf = new (std::nothrow) char[outBufSize]; if (outBuf == NULL) { return false; } // Decrypt int outDataSize = 0; if (!amd::oclDecrypt(binaryIn, (int)size, outBuf, outBufSize, &outDataSize)) { delete[] outBuf; return false; } *decryptBin = reinterpret_cast(outBuf); *decryptSize = outDataSize; *encryptCode = 1; } #endif return true; } bool ClBinary::setElfIn() { if (elfIn_) return true; if (binary_ == NULL) { return false; } elfIn_ = new amd::OclElf(ELFCLASSNONE, binary_, size_, NULL, ELF_C_READ); if ((elfIn_ == NULL) || elfIn_->hasError()) { if (elfIn_) { delete elfIn_; elfIn_ = NULL; } LogError("Creating input ELF object failed"); return false; } return true; } void ClBinary::resetElfIn() { if (elfIn_) { delete elfIn_; elfIn_ = NULL; } } bool ClBinary::setElfOut(unsigned char eclass, const char* outFile) { elfOut_ = new amd::OclElf(eclass, NULL, 0, outFile, ELF_C_WRITE); if ((elfOut_ == NULL) || elfOut_->hasError()) { if (elfOut_) { delete elfOut_; elfOut_ = NULL; } LogError("Creating ouput ELF object failed"); return false; } return setElfTarget(); } void ClBinary::resetElfOut() { if (elfOut_) { delete elfOut_; elfOut_ = NULL; } } bool ClBinary::loadLlvmBinary(std::string& llvmBinary, amd::OclElf::oclElfSections& elfSectionType) const { // Check if current binary already has LLVMIR char* section = NULL; size_t sz = 0; const amd::OclElf::oclElfSections SectionTypes[] = {amd::OclElf::LLVMIR, amd::OclElf::SPIR, amd::OclElf::SPIRV}; for (int i = 0; i < 3; ++i) { if (elfIn_->getSection(SectionTypes[i], §ion, &sz) && section && sz > 0) { llvmBinary.append(section, sz); elfSectionType = SectionTypes[i]; return true; } } return false; } bool ClBinary::loadCompileOptions(std::string& compileOptions) const { char* options = NULL; size_t sz; compileOptions.clear(); if (elfIn_->getSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(), &options, &sz)) { if (sz > 0) { compileOptions.append(options, sz); } return true; } return false; } bool ClBinary::loadLinkOptions(std::string& linkOptions) const { char* options = NULL; size_t sz; linkOptions.clear(); if (elfIn_->getSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(), &options, &sz)) { if (sz > 0) { linkOptions.append(options, sz); } return true; } return false; } void ClBinary::storeCompileOptions(const std::string& compileOptions) { elfOut()->addSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(), compileOptions.c_str(), compileOptions.length()); } void ClBinary::storeLinkOptions(const std::string& linkOptions) { elfOut()->addSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(), linkOptions.c_str(), linkOptions.length()); } bool ClBinary::isSPIR() const { char* section = NULL; size_t sz = 0; if (elfIn_->getSection(amd::OclElf::LLVMIR, §ion, &sz) && section && sz > 0) return false; if (elfIn_->getSection(amd::OclElf::SPIR, §ion, &sz) && section && sz > 0) return true; return false; } bool ClBinary::isSPIRV() const { char* section = NULL; size_t sz = 0; if (elfIn_->getSection(amd::OclElf::SPIRV, §ion, &sz) && section && sz > 0) { return true; } return false; } } // namespace device