2014-07-04 16:17:05 -04:00
|
|
|
//
|
|
|
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#include "device/device.hpp"
|
|
|
|
|
#include "thread/atomic.hpp"
|
|
|
|
|
#include "thread/monitor.hpp"
|
|
|
|
|
|
|
|
|
|
#if defined(WITH_HSA_DEVICE)
|
2016-07-19 17:42:51 -04:00
|
|
|
#include "device/rocm/rocdevice.hpp"
|
|
|
|
|
extern amd::AppProfile* rocCreateAppProfile();
|
2014-07-04 16:17:05 -04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if defined(WITH_CPU_DEVICE)
|
|
|
|
|
#include "device/cpu/cpudevice.hpp"
|
2017-04-13 13:56:38 -04:00
|
|
|
#endif // WITH_CPU_DEVICE
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2016-01-22 18:18:55 -05:00
|
|
|
#if defined(WITH_PAL_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
// namespace pal {
|
2016-01-22 18:18:55 -05:00
|
|
|
extern bool PalDeviceLoad();
|
|
|
|
|
extern void PalDeviceUnload();
|
|
|
|
|
//}
|
2017-04-13 13:56:38 -04:00
|
|
|
#endif // WITH_PAL_DEVICE
|
2016-01-22 18:18:55 -05:00
|
|
|
|
2014-07-04 16:17:05 -04:00
|
|
|
#if defined(WITH_GPU_DEVICE)
|
|
|
|
|
extern bool DeviceLoad();
|
|
|
|
|
extern void DeviceUnload();
|
2017-04-13 13:56:38 -04:00
|
|
|
#endif // WITH_GPU_DEVICE
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
#include "platform/runtime.hpp"
|
|
|
|
|
#include "platform/program.hpp"
|
|
|
|
|
#include "thread/monitor.hpp"
|
|
|
|
|
#include "amdocl/cl_common.hpp"
|
|
|
|
|
#include "utils/options.hpp"
|
2017-04-13 13:56:38 -04:00
|
|
|
#include "utils/versions.hpp" // AMD_PLATFORM_INFO
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
#if defined(HAVE_BLOWFISH_H)
|
|
|
|
|
#include "blowfish/oclcrypt.hpp"
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#include "utils/bif_section_labels.hpp"
|
2015-08-19 07:28:56 -04:00
|
|
|
#include "utils/libUtils.h"
|
|
|
|
|
#include "spirv/spirvUtils.h"
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <cstring>
|
|
|
|
|
#include <cstdio>
|
|
|
|
|
#include <sstream>
|
|
|
|
|
#include <fstream>
|
|
|
|
|
#include <set>
|
2015-08-27 08:40:14 -04:00
|
|
|
#include <algorithm>
|
2017-01-12 11:03:31 -05:00
|
|
|
#include <numeric>
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2015-08-19 07:28:56 -04:00
|
|
|
|
2014-07-04 16:17:05 -04:00
|
|
|
namespace device {
|
|
|
|
|
extern const char* BlitSourceCode;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace amd {
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::vector<Device*>* Device::devices_ = NULL;
|
2014-09-23 12:44:50 -04:00
|
|
|
AppProfile Device::appProfile_;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
amd::Monitor SvmManager::AllocatedLock_("Guards SVM allocation list");
|
|
|
|
|
std::map<uintptr_t, amd::Memory*> SvmManager::svmBufferMap_;
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
size_t SvmManager::size() {
|
|
|
|
|
amd::ScopedLock lock(AllocatedLock_);
|
|
|
|
|
return svmBufferMap_.size();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void SvmManager::AddSvmBuffer(const void* k, amd::Memory* v) {
|
|
|
|
|
amd::ScopedLock lock(AllocatedLock_);
|
|
|
|
|
svmBufferMap_.insert(std::pair<uintptr_t, amd::Memory*>(reinterpret_cast<uintptr_t>(k), v));
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void SvmManager::RemoveSvmBuffer(const void* k) {
|
|
|
|
|
amd::ScopedLock lock(AllocatedLock_);
|
|
|
|
|
svmBufferMap_.erase(reinterpret_cast<uintptr_t>(k));
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
amd::Memory* SvmManager::FindSvmBuffer(const void* k) {
|
|
|
|
|
amd::ScopedLock lock(AllocatedLock_);
|
|
|
|
|
uintptr_t key = reinterpret_cast<uintptr_t>(k);
|
|
|
|
|
std::map<uintptr_t, amd::Memory*>::iterator it = svmBufferMap_.upper_bound(key);
|
|
|
|
|
if (it == svmBufferMap_.begin()) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
--it;
|
|
|
|
|
amd::Memory* mem = it->second;
|
|
|
|
|
if (key >= it->first && key < (it->first + mem->getSize())) {
|
|
|
|
|
// the k is in the range
|
|
|
|
|
return mem;
|
|
|
|
|
} else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Device::BlitProgram::~BlitProgram() {
|
|
|
|
|
if (program_ != NULL) {
|
|
|
|
|
program_->release();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Device::BlitProgram::create(amd::Device* device, const char* extraKernels,
|
|
|
|
|
const char* extraOptions) {
|
|
|
|
|
std::vector<amd::Device*> devices;
|
|
|
|
|
devices.push_back(device);
|
|
|
|
|
std::string kernels(device::BlitSourceCode);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (extraKernels != NULL) {
|
|
|
|
|
kernels += extraKernels;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Create a program with all blit kernels
|
|
|
|
|
program_ = new Program(*context_, kernels.c_str());
|
|
|
|
|
if (program_ == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Build all kernels
|
|
|
|
|
std::string opt =
|
|
|
|
|
"-cl-internal-kernel "
|
2016-08-30 17:47:57 -04:00
|
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
2017-04-13 13:56:38 -04:00
|
|
|
"-Wf,--force_disable_spir -fno-lib-no-inline "
|
|
|
|
|
"-fno-sc-keep-calls "
|
|
|
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
if (extraOptions != NULL) {
|
|
|
|
|
opt += extraOptions;
|
|
|
|
|
}
|
|
|
|
|
if (!GPU_DUMP_BLIT_KERNELS) {
|
|
|
|
|
opt += " -fno-enable-dump";
|
|
|
|
|
}
|
|
|
|
|
if (CL_SUCCESS != program_->build(devices, opt.c_str(), NULL, NULL, GPU_DUMP_BLIT_KERNELS)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Device::init() {
|
|
|
|
|
assert(!Runtime::initialized() && "initialize only once");
|
|
|
|
|
bool ret = false;
|
|
|
|
|
devices_ = NULL;
|
|
|
|
|
appProfile_.init();
|
2014-09-23 12:44:50 -04:00
|
|
|
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
// IMPORTANT: Note that we are initialiing HSA stack first and then
|
|
|
|
|
// GPU stack. The order of initialization is signiicant and if changed
|
|
|
|
|
// amd::Device::registerDevice() must be accordingly modified.
|
|
|
|
|
#if defined(WITH_HSA_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
// Return value of roc::Device::init()
|
|
|
|
|
// If returned false, error initializing HSA stack.
|
|
|
|
|
// If returned true, either HSA not installed or HSA stack
|
|
|
|
|
// successfully initialized.
|
|
|
|
|
if (!roc::Device::init()) {
|
|
|
|
|
// abort() commentted because this is the only indication
|
|
|
|
|
// that KFD is not installed.
|
|
|
|
|
// Ignore the failure and assume KFD is not installed.
|
|
|
|
|
// abort();
|
|
|
|
|
}
|
|
|
|
|
ret |= roc::NullDevice::init();
|
|
|
|
|
#endif // WITH_HSA_DEVICE
|
2016-07-21 18:22:08 -04:00
|
|
|
#if defined(WITH_GPU_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
if (GPU_ENABLE_PAL != 1) {
|
|
|
|
|
ret |= DeviceLoad();
|
|
|
|
|
}
|
|
|
|
|
#endif // WITH_GPU_DEVICE
|
2016-01-22 18:18:55 -05:00
|
|
|
#if defined(WITH_PAL_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
if (GPU_ENABLE_PAL != 0) {
|
|
|
|
|
ret |= PalDeviceLoad();
|
|
|
|
|
}
|
|
|
|
|
#endif // WITH_PAL_DEVICE
|
2014-07-04 16:17:05 -04:00
|
|
|
#if defined(WITH_CPU_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
ret |= cpu::Device::init();
|
|
|
|
|
#endif // WITH_CPU_DEVICE
|
|
|
|
|
return ret;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Device::tearDown() {
|
|
|
|
|
if (devices_ != NULL) {
|
|
|
|
|
for (uint i = 0; i < devices_->size(); ++i) {
|
|
|
|
|
delete devices_->at(i);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
devices_->clear();
|
|
|
|
|
delete devices_;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
#if defined(WITH_HSA_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
roc::Device::tearDown();
|
|
|
|
|
#endif // WITH_HSA_DEVICE
|
2016-07-21 18:22:08 -04:00
|
|
|
#if defined(WITH_GPU_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
if (GPU_ENABLE_PAL != 1) {
|
|
|
|
|
DeviceUnload();
|
|
|
|
|
}
|
|
|
|
|
#endif // WITH_GPU_DEVICE
|
2016-01-22 18:18:55 -05:00
|
|
|
#if defined(WITH_PAL_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
if (GPU_ENABLE_PAL != 0) {
|
|
|
|
|
PalDeviceUnload();
|
|
|
|
|
}
|
|
|
|
|
#endif // WITH_PAL_DEVICE
|
2014-07-04 16:17:05 -04:00
|
|
|
#if defined(WITH_CPU_DEVICE)
|
2017-04-13 13:56:38 -04:00
|
|
|
cpu::Device::tearDown();
|
|
|
|
|
#endif // WITH_CPU_DEVICE
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Device::Device(Device* parent)
|
2017-04-13 13:56:38 -04:00
|
|
|
: settings_(NULL),
|
|
|
|
|
online_(true),
|
|
|
|
|
blitProgram_(NULL),
|
|
|
|
|
hwDebugMgr_(NULL),
|
|
|
|
|
parent_(parent),
|
|
|
|
|
vaCacheAccess_(nullptr),
|
|
|
|
|
vaCacheMap_(nullptr) {
|
|
|
|
|
memset(&info_, '\0', sizeof(info_));
|
|
|
|
|
if (parent_ != NULL) {
|
|
|
|
|
parent_->retain();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Device::~Device() {
|
|
|
|
|
CondLog((vaCacheMap_ != nullptr) && (vaCacheMap_->size() != 0),
|
|
|
|
|
"Application didn't unmap all host memory!");
|
|
|
|
|
delete vaCacheMap_;
|
|
|
|
|
delete vaCacheAccess_;
|
|
|
|
|
|
|
|
|
|
// Destroy device settings
|
|
|
|
|
if (settings_ != NULL) {
|
|
|
|
|
delete settings_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (parent_ != NULL) {
|
|
|
|
|
parent_->release();
|
|
|
|
|
} else {
|
|
|
|
|
if (info_.extensions_ != NULL) {
|
|
|
|
|
delete[] info_.extensions_;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (info_.partitionCreateInfo_.type_.byCounts_ &&
|
|
|
|
|
info_.partitionCreateInfo_.byCounts_.countsList_ != NULL) {
|
|
|
|
|
delete[] info_.partitionCreateInfo_.byCounts_.countsList_;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Device::create() {
|
|
|
|
|
vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
|
|
|
|
|
if (NULL == vaCacheAccess_) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
vaCacheMap_ = new std::map<uintptr_t, device::Memory*>();
|
|
|
|
|
if (NULL == vaCacheMap_) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
2016-04-04 11:25:36 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Device::isAncestor(const Device* sub) const {
|
|
|
|
|
for (const Device* d = sub->parent_; d != NULL; d = d->parent_) {
|
|
|
|
|
if (d == this) {
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Device::registerDevice() {
|
|
|
|
|
assert(Runtime::singleThreaded() && "this is not thread-safe");
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
static bool defaultIsAssigned = false;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (devices_ == NULL) {
|
|
|
|
|
devices_ = new std::vector<Device*>;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (info_.available_) {
|
|
|
|
|
if (!defaultIsAssigned) {
|
|
|
|
|
defaultIsAssigned = true;
|
|
|
|
|
info_.type_ |= CL_DEVICE_TYPE_DEFAULT;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
devices_->push_back(this);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Device::addVACache(device::Memory* memory) const {
|
|
|
|
|
// Make sure system memory has direct access
|
|
|
|
|
if (memory->isHostMemDirectAccess()) {
|
|
|
|
|
// VA cache access must be serialised
|
|
|
|
|
amd::ScopedLock lk(*vaCacheAccess_);
|
|
|
|
|
void* start = memory->owner()->getHostMem();
|
|
|
|
|
size_t offset;
|
|
|
|
|
device::Memory* doubleMap = findMemoryFromVA(start, &offset);
|
2016-04-04 11:25:36 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (doubleMap == nullptr) {
|
|
|
|
|
// Insert the new entry
|
|
|
|
|
vaCacheMap_->insert(
|
|
|
|
|
std::pair<uintptr_t, device::Memory*>(reinterpret_cast<uintptr_t>(start), memory));
|
|
|
|
|
} else {
|
|
|
|
|
LogError("Unexpected double map() call from the app!");
|
2016-04-04 11:25:36 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2016-04-04 11:25:36 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Device::removeVACache(const device::Memory* memory) const {
|
|
|
|
|
// Make sure system memory has direct access
|
|
|
|
|
if (memory->isHostMemDirectAccess() && memory->owner()) {
|
2016-04-04 11:25:36 -04:00
|
|
|
// VA cache access must be serialised
|
|
|
|
|
amd::ScopedLock lk(*vaCacheAccess_);
|
2017-04-13 13:56:38 -04:00
|
|
|
void* start = memory->owner()->getHostMem();
|
|
|
|
|
vaCacheMap_->erase(reinterpret_cast<uintptr_t>(start));
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-04-04 11:25:36 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
device::Memory* Device::findMemoryFromVA(const void* ptr, size_t* offset) const {
|
|
|
|
|
// VA cache access must be serialised
|
|
|
|
|
amd::ScopedLock lk(*vaCacheAccess_);
|
2016-04-04 11:25:36 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
|
|
|
|
|
std::map<uintptr_t, device::Memory*>::iterator it =
|
|
|
|
|
vaCacheMap_->upper_bound(reinterpret_cast<uintptr_t>(ptr));
|
|
|
|
|
if (it == vaCacheMap_->begin()) {
|
2016-04-04 11:25:36 -04:00
|
|
|
return nullptr;
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
--it;
|
|
|
|
|
device::Memory* mem = it->second;
|
|
|
|
|
if (key >= it->first && key < (it->first + mem->size())) {
|
|
|
|
|
// ptr is in the range
|
|
|
|
|
*offset = key - it->first;
|
|
|
|
|
return mem;
|
|
|
|
|
}
|
|
|
|
|
return nullptr;
|
2016-04-04 11:25:36 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
bool Device::IsTypeMatching(cl_device_type type, bool offlineDevices) {
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!(isOnline() || offlineDevices)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return (info_.type_ & type) != 0;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::vector<Device*> Device::getDevices(cl_device_type type, bool offlineDevices) {
|
|
|
|
|
std::vector<Device*> result;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (devices_ == NULL) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Create the list of available devices
|
|
|
|
|
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
|
|
|
|
|
// Check if the device type is matched
|
|
|
|
|
if ((*it)->IsTypeMatching(type, offlineDevices)) {
|
|
|
|
|
result.push_back(*it);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return result;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
size_t Device::numDevices(cl_device_type type, bool offlineDevices) {
|
|
|
|
|
size_t result = 0;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (devices_ == NULL) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
|
|
|
|
|
// Check if the device type is matched
|
|
|
|
|
if ((*it)->IsTypeMatching(type, offlineDevices)) {
|
|
|
|
|
++result;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return result;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Device::getDeviceIDs(cl_device_type deviceType, cl_uint numEntries, cl_device_id* devices,
|
|
|
|
|
cl_uint* numDevices, bool offlineDevices) {
|
|
|
|
|
if (numDevices != NULL && devices == NULL) {
|
|
|
|
|
*numDevices = (cl_uint)amd::Device::numDevices(deviceType, offlineDevices);
|
|
|
|
|
return (*numDevices > 0) ? true : false;
|
|
|
|
|
}
|
|
|
|
|
assert(devices != NULL && "check the code above");
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::vector<amd::Device*> ret = amd::Device::getDevices(deviceType, offlineDevices);
|
|
|
|
|
if (ret.size() == 0) {
|
|
|
|
|
*not_null(numDevices) = 0;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::vector<amd::Device*>::iterator it = ret.begin();
|
|
|
|
|
cl_uint count = std::min(numEntries, (cl_uint)ret.size());
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
while (count--) {
|
|
|
|
|
*devices++ = as_cl(*it++);
|
|
|
|
|
--numEntries;
|
|
|
|
|
}
|
|
|
|
|
while (numEntries--) {
|
|
|
|
|
*devices++ = (cl_device_id)0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*not_null(numDevices) = (cl_uint)ret.size();
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
char* Device::getExtensionString() {
|
|
|
|
|
std::stringstream extStream;
|
|
|
|
|
size_t size;
|
|
|
|
|
char* result = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Generate the extension string
|
|
|
|
|
for (uint i = 0; i < ClExtTotal; ++i) {
|
|
|
|
|
if (settings().checkExtension(i)) {
|
|
|
|
|
extStream << OclExtensionsString[i];
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
size = extStream.str().size() + 1;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Create a single string with all extensions
|
|
|
|
|
result = new char[size];
|
|
|
|
|
if (result != NULL) {
|
|
|
|
|
memcpy(result, extStream.str().data(), (size - 1));
|
|
|
|
|
result[size - 1] = 0;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return result;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void* Device::allocMapTarget(amd::Memory& mem, const amd::Coord3D& origin,
|
|
|
|
|
const amd::Coord3D& region, uint mapFlags, size_t* rowPitch,
|
|
|
|
|
size_t* slicePitch) {
|
|
|
|
|
// Translate memory references
|
|
|
|
|
device::Memory* devMem = mem.getDeviceMemory(*this);
|
|
|
|
|
if (devMem == NULL) {
|
|
|
|
|
LogError("allocMapTarget failed. Can't allocate video memory");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2015-03-24 11:45:49 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Pass request over to memory
|
|
|
|
|
return devMem->allocMapTarget(origin, region, mapFlags, rowPitch, slicePitch);
|
2015-03-24 11:45:49 -04:00
|
|
|
}
|
|
|
|
|
|
2017-01-12 11:03:31 -05:00
|
|
|
|
|
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
2017-04-13 13:56:38 -04:00
|
|
|
CacheCompilation::CacheCompilation(std::string targetStr, std::string postfix, bool enableCache,
|
|
|
|
|
bool resetCache)
|
|
|
|
|
: codeCache_(targetStr, 0, AMD_PLATFORM_BUILD_NUMBER, postfix),
|
|
|
|
|
isCodeCacheEnabled_(enableCache) {
|
|
|
|
|
if (resetCache) {
|
|
|
|
|
// clean up the cached data of the target device
|
|
|
|
|
StringCache emptyCache(targetStr, 0, 0, postfix);
|
|
|
|
|
}
|
2017-01-12 11:03:31 -05:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool CacheCompilation::linkLLVMBitcode(amd::opencl_driver::Compiler* C,
|
2017-01-12 11:03:31 -05:00
|
|
|
std::vector<amd::opencl_driver::Data*>& inputs,
|
|
|
|
|
amd::opencl_driver::Buffer* output,
|
2017-04-13 13:56:38 -04:00
|
|
|
std::vector<std::string>& options, std::string& buildLog) {
|
|
|
|
|
std::string cacheOpt;
|
|
|
|
|
cacheOpt = std::accumulate(begin(options), end(options), cacheOpt);
|
2017-01-12 11:03:31 -05:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ret = false;
|
|
|
|
|
bool cachedCodeExist = false;
|
|
|
|
|
std::vector<StringCache::CachedData> bcSet;
|
|
|
|
|
if (isCodeCacheEnabled_) {
|
|
|
|
|
using namespace amd::opencl_driver;
|
2017-01-12 11:03:31 -05:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
for (auto& input : inputs) {
|
|
|
|
|
assert(input->Type() == DT_LLVM_BC);
|
2017-01-12 11:03:31 -05:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
BufferReference* bc = reinterpret_cast<BufferReference*>(input);
|
|
|
|
|
StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()};
|
|
|
|
|
bcSet.push_back(cachedData);
|
2017-01-12 11:03:31 -05:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::string dstData = "";
|
|
|
|
|
if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
|
|
|
|
|
"Link LLVM Bitcodes")) {
|
|
|
|
|
std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
|
|
|
|
|
cachedCodeExist = true;
|
2017-01-12 11:03:31 -05:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2017-01-12 11:03:31 -05:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!cachedCodeExist) {
|
|
|
|
|
if (!C->LinkLLVMBitcode(inputs, output, options)) {
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (isCodeCacheEnabled_) {
|
|
|
|
|
std::string dstData(output->Buf().data(), output->Buf().size());
|
|
|
|
|
if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
|
|
|
|
|
buildLog += "Warning: Failed to caching codes.\n";
|
|
|
|
|
LogWarning("Caching codes failed!");
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool CacheCompilation::compileToLLVMBitcode(amd::opencl_driver::Compiler* C,
|
|
|
|
|
std::vector<amd::opencl_driver::Data*>& inputs,
|
|
|
|
|
amd::opencl_driver::Buffer* output,
|
|
|
|
|
std::vector<std::string>& options,
|
|
|
|
|
std::string& buildLog) {
|
|
|
|
|
std::string cacheOpt;
|
|
|
|
|
for (uint i = 0; i < options.size(); i++) {
|
|
|
|
|
// skip the header file option, which is associated with the -cl-std=<CLstd> option
|
|
|
|
|
if (options[i].compare("-include-pch") == 0) {
|
|
|
|
|
i++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
cacheOpt += options[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ret = false;
|
|
|
|
|
bool cachedCodeExist = false;
|
|
|
|
|
std::vector<StringCache::CachedData> bcSet;
|
|
|
|
|
if (isCodeCacheEnabled_) {
|
|
|
|
|
using namespace amd::opencl_driver;
|
|
|
|
|
|
|
|
|
|
bool checkCache = true;
|
|
|
|
|
for (auto& input : inputs) {
|
|
|
|
|
if (input->Type() == DT_CL) {
|
|
|
|
|
BufferReference* bc = reinterpret_cast<BufferReference*>(input);
|
|
|
|
|
StringCache::CachedData cachedData = {bc->Ptr(), bc->Size()};
|
|
|
|
|
bcSet.push_back(cachedData);
|
|
|
|
|
} else if (input->Type() == DT_CL_HEADER) {
|
|
|
|
|
FileReference* bcFile = reinterpret_cast<FileReference*>(input);
|
|
|
|
|
std::string bc;
|
|
|
|
|
bcFile->ReadToString(bc);
|
|
|
|
|
StringCache::CachedData cachedData = {bc.c_str(), bc.size()};
|
|
|
|
|
bcSet.push_back(cachedData);
|
|
|
|
|
} else {
|
|
|
|
|
buildLog += "Error: unsupported bitcode type for checking cache.\n";
|
|
|
|
|
checkCache = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::string dstData = "";
|
|
|
|
|
if (checkCache &&
|
|
|
|
|
codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
|
|
|
|
|
"Compile to LLVM Bitcodes")) {
|
|
|
|
|
std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
|
|
|
|
|
cachedCodeExist = true;
|
2016-04-13 13:27:37 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2016-04-13 13:27:37 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!cachedCodeExist) {
|
|
|
|
|
if (!C->CompileToLLVMBitcode(inputs, output, options)) {
|
|
|
|
|
return false;
|
2014-11-28 18:11:36 -05:00
|
|
|
}
|
2016-04-15 19:22:30 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (isCodeCacheEnabled_) {
|
|
|
|
|
std::string dstData(output->Buf().data(), output->Buf().size());
|
|
|
|
|
if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
|
|
|
|
|
buildLog += "Warning: Failed to caching codes.\n";
|
|
|
|
|
LogWarning("Caching codes failed!");
|
|
|
|
|
}
|
2016-04-15 19:22:30 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2016-04-15 19:22:30 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool CacheCompilation::compileAndLinkExecutable(amd::opencl_driver::Compiler* C,
|
|
|
|
|
std::vector<amd::opencl_driver::Data*>& inputs,
|
|
|
|
|
amd::opencl_driver::Buffer* output,
|
|
|
|
|
std::vector<std::string>& options,
|
|
|
|
|
std::string& buildLog) {
|
|
|
|
|
std::string cacheOpt;
|
|
|
|
|
cacheOpt = std::accumulate(begin(options), end(options), cacheOpt);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ret = false;
|
|
|
|
|
bool cachedCodeExist = false;
|
|
|
|
|
std::vector<StringCache::CachedData> bcSet;
|
|
|
|
|
if (isCodeCacheEnabled_) {
|
|
|
|
|
for (auto& input : inputs) {
|
|
|
|
|
assert(input->Type() == amd::opencl_driver::DT_LLVM_BC);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
amd::opencl_driver::Buffer* bc = (amd::opencl_driver::Buffer*)input;
|
|
|
|
|
StringCache::CachedData cachedData = {bc->Buf().data(), bc->Size()};
|
|
|
|
|
bcSet.push_back(cachedData);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::string dstData = "";
|
|
|
|
|
if (codeCache_.getCacheEntry(isCodeCacheEnabled_, bcSet.data(), bcSet.size(), cacheOpt, dstData,
|
|
|
|
|
"Compile and Link Executable")) {
|
|
|
|
|
std::copy(dstData.begin(), dstData.end(), std::back_inserter(output->Buf()));
|
|
|
|
|
cachedCodeExist = true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!cachedCodeExist) {
|
|
|
|
|
if (!C->CompileAndLinkExecutable(inputs, output, options)) {
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (isCodeCacheEnabled_) {
|
|
|
|
|
std::string dstData(output->Buf().data(), output->Buf().size());
|
|
|
|
|
if (!codeCache_.makeCacheEntry(bcSet.data(), bcSet.size(), cacheOpt, dstData)) {
|
|
|
|
|
buildLog += "Warning: Failed to caching codes.\n";
|
|
|
|
|
LogWarning("Caching codes failed!");
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
#endif
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
} // namespace amd
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
namespace device {
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Settings::Settings() {
|
|
|
|
|
assert((ClExtTotal < (8 * sizeof(extensions_))) && "Too many extensions!");
|
|
|
|
|
extensions_ = 0;
|
|
|
|
|
partialDispatch_ = false;
|
|
|
|
|
supportRA_ = true;
|
|
|
|
|
customHostAllocator_ = false;
|
|
|
|
|
waitCommand_ = AMD_OCL_WAIT_COMMAND;
|
|
|
|
|
supportDepthsRGB_ = false;
|
|
|
|
|
enableHwDebug_ = false;
|
|
|
|
|
commandQueues_ = 200; //!< Field value set to maximum number
|
|
|
|
|
//!< concurrent Virtual GPUs for default
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Kernel::createSignature(const parameters_t& params) {
|
|
|
|
|
std::stringstream attribs;
|
|
|
|
|
if (workGroupInfo_.compileSize_[0] != 0) {
|
|
|
|
|
attribs << "reqd_work_group_size(";
|
|
|
|
|
for (size_t i = 0; i < 3; ++i) {
|
|
|
|
|
if (i != 0) {
|
|
|
|
|
attribs << ",";
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
attribs << workGroupInfo_.compileSize_[i];
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
attribs << ")";
|
|
|
|
|
}
|
|
|
|
|
if (workGroupInfo_.compileSizeHint_[0] != 0) {
|
|
|
|
|
attribs << " work_group_size_hint(";
|
|
|
|
|
for (size_t i = 0; i < 3; ++i) {
|
|
|
|
|
if (i != 0) {
|
|
|
|
|
attribs << ",";
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
attribs << workGroupInfo_.compileSizeHint_[i];
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
attribs << ")";
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!workGroupInfo_.compileVecTypeHint_.empty()) {
|
|
|
|
|
attribs << " vec_type_hint(" << workGroupInfo_.compileVecTypeHint_ << ")";
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Destroy old signature if it was allocated before
|
|
|
|
|
// (offline devices path)
|
|
|
|
|
delete signature_;
|
|
|
|
|
signature_ = new amd::KernelSignature(params, attribs.str());
|
|
|
|
|
if (NULL != signature_) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Kernel::~Kernel() { delete signature_; }
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
std::string Kernel::openclMangledName(const std::string& name) {
|
|
|
|
|
const oclBIFSymbolStruct* bifSym = findBIF30SymStruct(symOpenclKernel);
|
|
|
|
|
assert(bifSym && "symbol not found");
|
|
|
|
|
return std::string("&") + bifSym->str[bif::PRE] + name + bifSym->str[bif::POST];
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::saveMapInfo(const void* mapAddress, const amd::Coord3D origin,
|
|
|
|
|
const amd::Coord3D region, uint mapFlags, bool entire,
|
|
|
|
|
amd::Image* baseMip) {
|
|
|
|
|
// Map/Unmap must be serialized.
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
WriteMapInfo info = {};
|
|
|
|
|
WriteMapInfo* pInfo = &info;
|
|
|
|
|
auto it = writeMapInfo_.find(mapAddress);
|
|
|
|
|
if (it != writeMapInfo_.end()) {
|
|
|
|
|
LogWarning("Double map of the same or overlapped region!");
|
|
|
|
|
pInfo = &it->second;
|
|
|
|
|
}
|
2016-08-23 15:12:24 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (mapFlags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) {
|
|
|
|
|
pInfo->origin_ = origin;
|
|
|
|
|
pInfo->region_ = region;
|
|
|
|
|
pInfo->entire_ = entire;
|
|
|
|
|
pInfo->unmapWrite_ = true;
|
|
|
|
|
}
|
|
|
|
|
if (mapFlags & CL_MAP_READ) {
|
|
|
|
|
pInfo->unmapRead_ = true;
|
|
|
|
|
}
|
|
|
|
|
pInfo->baseMip_ = baseMip;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Insert into the map if it's the first region
|
|
|
|
|
if (++pInfo->count_ == 1) {
|
|
|
|
|
writeMapInfo_.insert(std::pair<const void*, WriteMapInfo>(mapAddress, info));
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Program::Program(amd::Device& device)
|
|
|
|
|
: device_(device),
|
|
|
|
|
type_(TYPE_NONE),
|
|
|
|
|
clBinary_(NULL),
|
|
|
|
|
llvmBinary_(),
|
|
|
|
|
elfSectionType_(amd::OclElf::LLVMIR),
|
|
|
|
|
compileOptions_(),
|
|
|
|
|
linkOptions_(),
|
|
|
|
|
lastBuildOptionsArg_(),
|
|
|
|
|
buildStatus_(CL_BUILD_NONE),
|
|
|
|
|
buildError_(CL_SUCCESS),
|
|
|
|
|
globalVariableTotalSize_(0),
|
|
|
|
|
programOptions(NULL) {}
|
|
|
|
|
|
|
|
|
|
Program::~Program() { clear(); }
|
|
|
|
|
|
|
|
|
|
void Program::clear() {
|
|
|
|
|
// Destroy all device kernels
|
|
|
|
|
kernels_t::const_iterator it;
|
|
|
|
|
for (it = kernels_.begin(); it != kernels_.end(); ++it) {
|
|
|
|
|
delete it->second;
|
|
|
|
|
}
|
|
|
|
|
kernels_.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Program::initBuild(amd::option::Options* options) {
|
|
|
|
|
programOptions = options;
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->DumpFlags > 0) {
|
|
|
|
|
static amd::Atomic<unsigned> build_num = 0;
|
|
|
|
|
options->setBuildNo(build_num++);
|
|
|
|
|
}
|
|
|
|
|
buildLog_.clear();
|
|
|
|
|
if (!initClBinary()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Program::finiBuild(bool isBuildGood) { return true; }
|
|
|
|
|
|
|
|
|
|
cl_int Program::compile(const std::string& sourceCode,
|
|
|
|
|
const std::vector<const std::string*>& headers,
|
|
|
|
|
const char** headerIncludeNames, const char* origOptions,
|
|
|
|
|
amd::option::Options* options) {
|
|
|
|
|
uint64_t start_time = 0;
|
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
|
|
|
buildLog_ = "\nStart timing major build components.....\n\n";
|
|
|
|
|
start_time = amd::Os::timeNanos();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
|
|
|
|
if (options) {
|
|
|
|
|
compileOptions_ = options->origOptionStr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
|
|
|
|
if (!initBuild(options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation init failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->FP32RoundDivideSqrt &&
|
|
|
|
|
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
buildLog_ +=
|
|
|
|
|
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
|
|
|
|
"specified without device support";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compile the source code if any
|
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
|
|
|
|
!compileImpl(sourceCode, headers, headerIncludeNames, options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
setType(TYPE_COMPILED);
|
|
|
|
|
|
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !createBinary(options)) {
|
|
|
|
|
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation fini failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
|
|
|
|
buildStatus_ = CL_BUILD_SUCCESS;
|
|
|
|
|
} else {
|
|
|
|
|
buildError_ = CL_COMPILE_PROGRAM_FAILURE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
|
|
|
std::stringstream tmp_ss;
|
|
|
|
|
tmp_ss << "\nTotal Compile Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
|
|
|
|
buildLog_ += tmp_ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
|
|
|
|
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
|
|
|
|
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
|
|
|
|
fprintf(stderr, "%s\n", buildLog_.c_str());
|
|
|
|
|
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
|
|
|
|
printf("%s\n", options->optionsLog().c_str());
|
|
|
|
|
printf("%s\n", buildLog_.c_str());
|
|
|
|
|
} else {
|
|
|
|
|
std::fstream f;
|
|
|
|
|
std::stringstream tmp_ss;
|
|
|
|
|
std::string logs = options->optionsLog() + buildLog_;
|
|
|
|
|
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
|
|
|
|
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
|
|
|
|
f.write(logs.data(), logs.size());
|
|
|
|
|
f.close();
|
|
|
|
|
}
|
|
|
|
|
LogError(buildLog_.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return buildError();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cl_int Program::link(const std::vector<Program*>& inputPrograms, const char* origLinkOptions,
|
|
|
|
|
amd::option::Options* linkOptions) {
|
|
|
|
|
lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : "";
|
|
|
|
|
if (linkOptions) {
|
|
|
|
|
linkOptions_ = linkOptions->origOptionStr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
|
|
|
|
|
|
|
|
|
amd::option::Options options;
|
|
|
|
|
if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ += "Internal error: Get compile options failed.";
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (!amd::option::parseAllOptions(compileOptions_, options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
buildLog_ += options.optionsLog();
|
|
|
|
|
LogError("Parsing compile options failed.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t start_time = 0;
|
|
|
|
|
if (options.oVariables->EnableBuildTiming) {
|
|
|
|
|
buildLog_ = "\nStart timing major build components.....\n\n";
|
|
|
|
|
start_time = amd::Os::timeNanos();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// initBuild() will clear buildLog_, so store it in a temporary variable
|
|
|
|
|
std::string tmpBuildLog = buildLog_;
|
|
|
|
|
|
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !initBuild(&options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ += "Internal error: Compilation init failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buildLog_ += tmpBuildLog;
|
|
|
|
|
|
|
|
|
|
if (options.oVariables->FP32RoundDivideSqrt &&
|
|
|
|
|
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
buildLog_ +=
|
|
|
|
|
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
|
|
|
|
"specified without device support";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool createLibrary = linkOptions ? linkOptions->oVariables->clCreateLibrary : false;
|
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(inputPrograms, &options, createLibrary)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ += "Internal error: Link failed.\n";
|
|
|
|
|
buildLog_ += "Make sure the system setup is correct.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation fini failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
|
|
|
|
buildStatus_ = CL_BUILD_SUCCESS;
|
|
|
|
|
} else {
|
|
|
|
|
buildError_ = CL_LINK_PROGRAM_FAILURE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options.oVariables->EnableBuildTiming) {
|
|
|
|
|
std::stringstream tmp_ss;
|
|
|
|
|
tmp_ss << "\nTotal Link Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
|
|
|
|
buildLog_ += tmp_ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options.oVariables->BuildLog && !buildLog_.empty()) {
|
|
|
|
|
if (strcmp(options.oVariables->BuildLog, "stderr") == 0) {
|
|
|
|
|
fprintf(stderr, "%s\n", options.optionsLog().c_str());
|
|
|
|
|
fprintf(stderr, "%s\n", buildLog_.c_str());
|
|
|
|
|
} else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) {
|
|
|
|
|
printf("%s\n", options.optionsLog().c_str());
|
|
|
|
|
printf("%s\n", buildLog_.c_str());
|
|
|
|
|
} else {
|
|
|
|
|
std::fstream f;
|
|
|
|
|
std::stringstream tmp_ss;
|
|
|
|
|
std::string logs = options.optionsLog() + buildLog_;
|
|
|
|
|
tmp_ss << options.oVariables->BuildLog << "." << options.getBuildNo();
|
|
|
|
|
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
|
|
|
|
f.write(logs.data(), logs.size());
|
|
|
|
|
f.close();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!buildLog_.empty()) {
|
|
|
|
|
LogError(buildLog_.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return buildError();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cl_int Program::build(const std::string& sourceCode, const char* origOptions,
|
|
|
|
|
amd::option::Options* options) {
|
|
|
|
|
uint64_t start_time = 0;
|
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
|
|
|
buildLog_ = "\nStart timing major build components.....\n\n";
|
|
|
|
|
start_time = amd::Os::timeNanos();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lastBuildOptionsArg_ = origOptions ? origOptions : "";
|
|
|
|
|
if (options) {
|
|
|
|
|
compileOptions_ = options->origOptionStr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buildStatus_ = CL_BUILD_IN_PROGRESS;
|
|
|
|
|
if (!initBuild(options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation init failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->FP32RoundDivideSqrt &&
|
|
|
|
|
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
buildLog_ +=
|
|
|
|
|
"Error: -cl-fp32-correctly-rounded-divide-sqrt "
|
|
|
|
|
"specified without device support";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compile the source code if any
|
|
|
|
|
std::vector<const std::string*> headers;
|
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !sourceCode.empty() &&
|
|
|
|
|
!compileImpl(sourceCode, headers, NULL, options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ += "Internal error: Link failed.\n";
|
|
|
|
|
buildLog_ += "Make sure the system setup is correct.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
|
|
|
|
|
buildStatus_ = CL_BUILD_ERROR;
|
|
|
|
|
if (buildLog_.empty()) {
|
|
|
|
|
buildLog_ = "Internal error: Compilation fini failed.";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
|
|
|
|
|
buildStatus_ = CL_BUILD_SUCCESS;
|
|
|
|
|
} else {
|
|
|
|
|
buildError_ = CL_BUILD_PROGRAM_FAILURE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->EnableBuildTiming) {
|
|
|
|
|
std::stringstream tmp_ss;
|
|
|
|
|
tmp_ss << "\nTotal Build Time: " << (amd::Os::timeNanos() - start_time) / 1000ULL << " us\n";
|
|
|
|
|
buildLog_ += tmp_ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (options->oVariables->BuildLog && !buildLog_.empty()) {
|
|
|
|
|
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
|
|
|
|
|
fprintf(stderr, "%s\n", options->optionsLog().c_str());
|
|
|
|
|
fprintf(stderr, "%s\n", buildLog_.c_str());
|
|
|
|
|
} else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
|
|
|
|
|
printf("%s\n", options->optionsLog().c_str());
|
|
|
|
|
printf("%s\n", buildLog_.c_str());
|
|
|
|
|
} else {
|
|
|
|
|
std::fstream f;
|
|
|
|
|
std::stringstream tmp_ss;
|
|
|
|
|
std::string logs = options->optionsLog() + buildLog_;
|
|
|
|
|
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
|
|
|
|
|
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
|
|
|
|
|
f.write(logs.data(), logs.size());
|
|
|
|
|
f.close();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!buildLog_.empty()) {
|
2017-07-07 14:21:59 -04:00
|
|
|
LogError(buildLog_.c_str());
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return buildError();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
|
|
|
|
|
const amd::option::Options* linkOptions) {
|
|
|
|
|
amd::option::Options compileOptions;
|
|
|
|
|
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
|
|
|
|
|
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
|
|
|
|
|
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
|
|
|
|
Program* program = *it;
|
|
|
|
|
|
|
|
|
|
amd::option::Options compileOptions2;
|
|
|
|
|
amd::option::Options* thisCompileOptions = i == 0 ? &compileOptions : &compileOptions2;
|
|
|
|
|
if (!amd::option::parseAllOptions(program->compileOptions_, *thisCompileOptions)) {
|
|
|
|
|
buildLog_ += thisCompileOptions->optionsLog();
|
|
|
|
|
LogError("Parsing compile options failed.");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (i == 0) compileOptions_ = program->compileOptions_;
|
|
|
|
|
|
|
|
|
|
// if we are linking a program executable, and if "program" is a
|
|
|
|
|
// compiled module or a library created with "-enable-link-options",
|
|
|
|
|
// we can overwrite "program"'s compile options with linking options
|
|
|
|
|
if (!linkOptions_.empty() && !linkOptions->oVariables->clCreateLibrary) {
|
|
|
|
|
bool linkOptsCanOverwrite = false;
|
|
|
|
|
if (program->type() != TYPE_LIBRARY) {
|
|
|
|
|
linkOptsCanOverwrite = true;
|
|
|
|
|
} else {
|
|
|
|
|
amd::option::Options thisLinkOptions;
|
|
|
|
|
if (!amd::option::parseLinkOptions(program->linkOptions_, thisLinkOptions)) {
|
|
|
|
|
buildLog_ += thisLinkOptions.optionsLog();
|
|
|
|
|
LogError("Parsing link options failed.");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (thisLinkOptions.oVariables->clEnableLinkOptions) linkOptsCanOverwrite = true;
|
|
|
|
|
}
|
|
|
|
|
if (linkOptsCanOverwrite) {
|
|
|
|
|
if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) {
|
|
|
|
|
buildLog_ += thisCompileOptions->optionsLog();
|
|
|
|
|
LogError("Setting link options failed.");
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
if (i == 0) compileOptions_ += " " + linkOptions_;
|
|
|
|
|
}
|
|
|
|
|
// warn if input modules have inconsistent compile options
|
|
|
|
|
if (i > 0) {
|
|
|
|
|
if (!compileOptions.equals(*thisCompileOptions, true /*ignore clc options*/)) {
|
|
|
|
|
buildLog_ +=
|
|
|
|
|
"Warning: Input OpenCL binaries has inconsistent"
|
|
|
|
|
" compile options. Using compile options from"
|
|
|
|
|
" the first input binary!\n";
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Program::initClBinary(char* binaryIn, size_t size) {
|
|
|
|
|
if (!initClBinary()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Save the original binary that isn't owned by ClBinary
|
|
|
|
|
clBinary()->saveOrigBinary(binaryIn, size);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
char* bin = binaryIn;
|
|
|
|
|
size_t sz = size;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// unencrypted
|
|
|
|
|
int encryptCode = 0;
|
|
|
|
|
char* decryptedBin = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2016-08-03 15:30:13 -04:00
|
|
|
#if !defined(WITH_LIGHTNING_COMPILER)
|
2017-04-13 13:56:38 -04:00
|
|
|
bool isSPIRV = isSPIRVMagic(binaryIn, size);
|
|
|
|
|
if (isSPIRV || isBcMagic(binaryIn)) {
|
|
|
|
|
acl_error err = ACL_SUCCESS;
|
|
|
|
|
aclBinaryOptions binOpts = {0};
|
|
|
|
|
binOpts.struct_size = sizeof(binOpts);
|
|
|
|
|
binOpts.elfclass =
|
|
|
|
|
(info().arch_id == aclX64 || info().arch_id == aclAMDIL64 || info().arch_id == aclHSAIL64)
|
|
|
|
|
? ELFCLASS64
|
|
|
|
|
: ELFCLASS32;
|
|
|
|
|
binOpts.bitness = ELFDATA2LSB;
|
|
|
|
|
binOpts.alloc = &::malloc;
|
|
|
|
|
binOpts.dealloc = &::free;
|
|
|
|
|
aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err);
|
|
|
|
|
if (err != ACL_SUCCESS) {
|
|
|
|
|
LogWarning("aclBinaryInit failed");
|
|
|
|
|
aclBinaryFini(aclbin_v30);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size,
|
|
|
|
|
isSPIRV ? aclSPIRV : aclSPIR);
|
|
|
|
|
if (ACL_SUCCESS != err) {
|
|
|
|
|
LogWarning("aclInsertSection failed");
|
|
|
|
|
aclBinaryFini(aclbin_v30);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (info().arch_id == aclHSAIL || info().arch_id == aclHSAIL64) {
|
|
|
|
|
err = aclWriteToMem(aclbin_v30, reinterpret_cast<void**>(&bin), &sz);
|
|
|
|
|
if (err != ACL_SUCCESS) {
|
|
|
|
|
LogWarning("aclWriteToMem failed");
|
|
|
|
|
aclBinaryFini(aclbin_v30);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
aclBinaryFini(aclbin_v30);
|
|
|
|
|
} else {
|
|
|
|
|
aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30, aclBIFVersion21);
|
|
|
|
|
err = aclWriteToMem(aclbin_v21, reinterpret_cast<void**>(&bin), &sz);
|
|
|
|
|
if (err != ACL_SUCCESS) {
|
|
|
|
|
LogWarning("aclWriteToMem failed");
|
|
|
|
|
aclBinaryFini(aclbin_v30);
|
|
|
|
|
aclBinaryFini(aclbin_v21);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
aclBinaryFini(aclbin_v30);
|
|
|
|
|
aclBinaryFini(aclbin_v21);
|
|
|
|
|
}
|
|
|
|
|
} else
|
|
|
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
|
{
|
|
|
|
|
size_t decryptedSize;
|
|
|
|
|
if (!clBinary()->decryptElf(binaryIn, size, &decryptedBin, &decryptedSize, &encryptCode)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (decryptedBin != NULL) {
|
|
|
|
|
// It is decrypted binary.
|
|
|
|
|
bin = decryptedBin;
|
|
|
|
|
sz = decryptedSize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!isElf(bin)) {
|
|
|
|
|
// Invalid binary.
|
|
|
|
|
if (decryptedBin != NULL) {
|
|
|
|
|
delete[] decryptedBin;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
clBinary()->setFlags(encryptCode);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return clBinary()->setBinary(bin, sz, (decryptedBin != NULL));
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Program::setBinary(char* binaryIn, size_t size) {
|
|
|
|
|
if (!initClBinary(binaryIn, size)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2016-08-15 18:51:49 -04:00
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!clBinary()->setElfIn(ELFCLASS64)) {
|
|
|
|
|
#else // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
|
if (!clBinary()->setElfIn(ELFCLASS32)) {
|
|
|
|
|
#endif // !defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
|
LogError("Setting input OCL binary failed");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
uint16_t type;
|
|
|
|
|
if (!clBinary()->elfIn()->getType(type)) {
|
|
|
|
|
LogError("Bad OCL Binary: error loading ELF type!");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
switch (type) {
|
|
|
|
|
case ET_NONE: {
|
|
|
|
|
setType(TYPE_NONE);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case ET_REL: {
|
|
|
|
|
if (clBinary()->isSPIR() || clBinary()->isSPIRV()) {
|
|
|
|
|
setType(TYPE_INTERMEDIATE);
|
|
|
|
|
} else {
|
|
|
|
|
setType(TYPE_COMPILED);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
case ET_DYN: {
|
|
|
|
|
setType(TYPE_LIBRARY);
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
case ET_EXEC: {
|
|
|
|
|
setType(TYPE_EXECUTABLE);
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
default:
|
|
|
|
|
LogError("Bad OCL Binary: bad ELF type!");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
clBinary()->loadCompileOptions(compileOptions_);
|
|
|
|
|
clBinary()->loadLinkOptions(linkOptions_);
|
2016-09-13 16:16:35 -04:00
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
2017-04-13 13:56:38 -04:00
|
|
|
// TODO: Remove this once BIF is no longer used as we should have a machinasm in
|
|
|
|
|
// place to get the binary type correctly from above.
|
|
|
|
|
// It is a workaround for executable build from the library. The code object
|
|
|
|
|
// binary does not have the type information.
|
|
|
|
|
|
|
|
|
|
char* sect = NULL;
|
|
|
|
|
size_t sz = 0;
|
|
|
|
|
if (clBinary()->elfIn()->getSection(amd::OclElf::TEXT, §, &sz) && sect && sz > 0) {
|
|
|
|
|
setType(TYPE_EXECUTABLE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sect = NULL;
|
|
|
|
|
sz = 0;
|
|
|
|
|
if (type != ET_DYN && // binary is not a library
|
|
|
|
|
(clBinary()->elfIn()->getSection(amd::OclElf::LLVMIR, §, &sz) && sect && sz > 0)) {
|
|
|
|
|
setType(TYPE_COMPILED);
|
|
|
|
|
}
|
2016-09-13 16:16:35 -04:00
|
|
|
|
|
|
|
|
#endif
|
2017-04-13 13:56:38 -04:00
|
|
|
clBinary()->resetElfIn();
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Program::createBIFBinary(aclBinary* bin) {
|
2016-08-03 15:30:13 -04:00
|
|
|
#if defined(WITH_LIGHTNING_COMPILER)
|
2017-04-13 13:56:38 -04:00
|
|
|
assert(!"createBIFBinary() should not be called when using LC");
|
|
|
|
|
return false;
|
|
|
|
|
#else // defined(WITH_LIGHTNING_COMPILER)
|
|
|
|
|
acl_error err;
|
|
|
|
|
char* binaryIn = NULL;
|
|
|
|
|
size_t size;
|
|
|
|
|
err = aclWriteToMem(bin, reinterpret_cast<void**>(&binaryIn), &size);
|
|
|
|
|
if (err != ACL_SUCCESS) {
|
|
|
|
|
LogWarning("aclWriteToMem failed");
|
2016-08-03 15:30:13 -04:00
|
|
|
return false;
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
clBinary()->saveBIFBinary(binaryIn, size);
|
|
|
|
|
aclFreeMem(bin, binaryIn);
|
|
|
|
|
return true;
|
|
|
|
|
#endif // defined(WITH_LIGHTNING_COMPILER)
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ClBinary::ClBinary(const amd::Device& dev, BinaryImageFormat bifVer)
|
2017-04-13 13:56:38 -04:00
|
|
|
: dev_(dev),
|
|
|
|
|
binary_(NULL),
|
|
|
|
|
size_(0),
|
|
|
|
|
flags_(0),
|
|
|
|
|
origBinary_(NULL),
|
|
|
|
|
origSize_(0),
|
|
|
|
|
encryptCode_(0),
|
|
|
|
|
elfIn_(NULL),
|
|
|
|
|
elfOut_(NULL),
|
|
|
|
|
format_(bifVer) {}
|
|
|
|
|
|
|
|
|
|
ClBinary::~ClBinary() {
|
|
|
|
|
release();
|
|
|
|
|
|
|
|
|
|
if (elfIn_) {
|
|
|
|
|
delete elfIn_;
|
|
|
|
|
}
|
|
|
|
|
if (elfOut_) {
|
|
|
|
|
delete elfOut_;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string ClBinary::getBIFSymbol(unsigned int symbolID) const {
|
|
|
|
|
size_t nSymbols = 0;
|
|
|
|
|
// Due to PRE & POST defines in bif_section_labels.hpp conflict with
|
|
|
|
|
// PRE & POST struct members in sp3-si-chip-registers.h
|
|
|
|
|
// unable to include bif_section_labels.hpp in device.hpp
|
|
|
|
|
//! @todo: resolve conflict by renaming defines,
|
|
|
|
|
// then include bif_section_labels.hpp in device.hpp &
|
|
|
|
|
// use oclBIFSymbolID instead of unsigned int as a parameter
|
|
|
|
|
const oclBIFSymbolID symID = static_cast<oclBIFSymbolID>(symbolID);
|
|
|
|
|
switch (format_) {
|
2014-07-04 16:17:05 -04:00
|
|
|
case BIF_VERSION2: {
|
2017-04-13 13:56:38 -04:00
|
|
|
nSymbols = sizeof(BIF20) / sizeof(oclBIFSymbolStruct);
|
|
|
|
|
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID);
|
|
|
|
|
assert(symb && "BIF20 symbol with symbolID not found");
|
|
|
|
|
if (symb) {
|
|
|
|
|
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
case BIF_VERSION3: {
|
2017-04-13 13:56:38 -04:00
|
|
|
nSymbols = sizeof(BIF30) / sizeof(oclBIFSymbolStruct);
|
|
|
|
|
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID);
|
|
|
|
|
assert(symb && "BIF30 symbol with symbolID not found");
|
|
|
|
|
if (symb) {
|
|
|
|
|
return std::string(symb->str[bif::PRE]) + std::string(symb->str[bif::POST]);
|
|
|
|
|
}
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
default:
|
2017-04-13 13:56:38 -04:00
|
|
|
assert(0 && "unexpected BIF type");
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ClBinary::init(amd::option::Options* optionsObj, bool amdilRequired) {
|
|
|
|
|
// option has higher priority than environment variable.
|
|
|
|
|
if ((flags_ & BinarySourceMask) != BinaryRemoveSource) {
|
|
|
|
|
// set to zero
|
|
|
|
|
flags_ = (flags_ & (~BinarySourceMask));
|
|
|
|
|
|
|
|
|
|
flags_ |= (optionsObj->oVariables->BinSOURCE ? BinarySaveSource : BinaryNoSaveSource);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((flags_ & BinaryLlvmirMask) != BinaryRemoveLlvmir) {
|
|
|
|
|
// set to zero
|
|
|
|
|
flags_ = (flags_ & (~BinaryLlvmirMask));
|
|
|
|
|
|
|
|
|
|
flags_ |= (optionsObj->oVariables->BinLLVMIR ? BinarySaveLlvmir : BinaryNoSaveLlvmir);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If amdilRequired is true, force to save AMDIL (for correctness)
|
|
|
|
|
if ((flags_ & BinaryAmdilMask) != BinaryRemoveAmdil || amdilRequired) {
|
|
|
|
|
// set to zero
|
|
|
|
|
flags_ = (flags_ & (~BinaryAmdilMask));
|
|
|
|
|
flags_ |=
|
|
|
|
|
((optionsObj->oVariables->BinAMDIL || amdilRequired) ? BinarySaveAmdil : BinaryNoSaveAmdil);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((flags_ & BinaryIsaMask) != BinaryRemoveIsa) {
|
|
|
|
|
// set to zero
|
|
|
|
|
flags_ = (flags_ & (~BinaryIsaMask));
|
|
|
|
|
flags_ |= ((optionsObj->oVariables->BinEXE) ? BinarySaveIsa : BinaryNoSaveIsa);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((flags_ & BinaryASMask) != BinaryRemoveAS) {
|
|
|
|
|
// set to zero
|
|
|
|
|
flags_ = (flags_ & (~BinaryASMask));
|
|
|
|
|
flags_ |= ((optionsObj->oVariables->BinAS) ? BinarySaveAS : BinaryNoSaveAS);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ClBinary::isRecompilable(std::string& llvmBinary, amd::OclElf::oclElfPlatform thePlatform) {
|
|
|
|
|
/* It is recompilable if there is llvmir that was generated for
|
|
|
|
|
the same platform (CPU or GPU) and with the same bitness.
|
|
|
|
|
|
|
|
|
|
Note: the bitness has been checked in initClBinary(), no need
|
|
|
|
|
to check it here.
|
|
|
|
|
*/
|
|
|
|
|
if (llvmBinary.empty()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
uint16_t elf_target;
|
|
|
|
|
amd::OclElf::oclElfPlatform platform;
|
|
|
|
|
if (elfIn()->getTarget(elf_target, platform)) {
|
|
|
|
|
if (platform == thePlatform) {
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
if ((platform == amd::OclElf::COMPLIB_PLATFORM) &&
|
|
|
|
|
(((thePlatform == amd::OclElf::CAL_PLATFORM) &&
|
|
|
|
|
((elf_target == (uint16_t)EM_AMDIL) || (elf_target == (uint16_t)EM_HSAIL) ||
|
|
|
|
|
(elf_target == (uint16_t)EM_HSAIL_64))) ||
|
|
|
|
|
((thePlatform == amd::OclElf::CPU_PLATFORM) &&
|
|
|
|
|
((elf_target == (uint16_t)EM_386) || (elf_target == (uint16_t)EM_X86_64))))) {
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::release() {
|
|
|
|
|
if (isBinaryAllocated() && (binary_ != NULL)) {
|
|
|
|
|
delete[] binary_;
|
|
|
|
|
binary_ = NULL;
|
|
|
|
|
flags_ &= ~BinaryAllocated;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::saveBIFBinary(char* binaryIn, size_t size) {
|
|
|
|
|
char* image = new char[size];
|
|
|
|
|
memcpy(image, binaryIn, size);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
setBinary(image, size, true);
|
|
|
|
|
return;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::createElfBinary(bool doencrypt, Program::type_t type) {
|
2014-07-04 16:17:05 -04:00
|
|
|
#if 0
|
|
|
|
|
if (!saveISA() && !saveAMDIL() && !saveLLVMIR() && !saveSOURCE()) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2017-04-13 13:56:38 -04:00
|
|
|
release();
|
|
|
|
|
|
|
|
|
|
size_t imageSize;
|
|
|
|
|
char* image;
|
|
|
|
|
assert(elfOut_ && "elfOut_ should be initialized in ClBinary::data()");
|
|
|
|
|
|
|
|
|
|
// Insert Version string that builds this binary into .comment section
|
|
|
|
|
const device::Info& devInfo = dev_.info();
|
|
|
|
|
std::string buildVerInfo("@(#) ");
|
|
|
|
|
if (devInfo.version_ != NULL) {
|
|
|
|
|
buildVerInfo.append(devInfo.version_);
|
|
|
|
|
buildVerInfo.append(". Driver version: ");
|
|
|
|
|
buildVerInfo.append(devInfo.driverVersion_);
|
|
|
|
|
} else {
|
|
|
|
|
// char OpenCLVersion[256];
|
|
|
|
|
// size_t sz;
|
|
|
|
|
// cl_int ret= clGetPlatformInfo(AMD_PLATFORM, CL_PLATFORM_VERSION, 256, OpenCLVersion, &sz);
|
|
|
|
|
// if (ret == CL_SUCCESS) {
|
|
|
|
|
// buildVerInfo.append(OpenCLVersion, sz);
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// If CAL is unavailable, just hard-code the OpenCL driver version
|
|
|
|
|
buildVerInfo.append("OpenCL 1.1" AMD_PLATFORM_INFO);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
elfOut_->addSection(amd::OclElf::COMMENT, buildVerInfo.data(), buildVerInfo.size());
|
|
|
|
|
switch (type) {
|
|
|
|
|
case Program::TYPE_NONE: {
|
|
|
|
|
elfOut_->setType(ET_NONE);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case Program::TYPE_COMPILED: {
|
|
|
|
|
elfOut_->setType(ET_REL);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case Program::TYPE_LIBRARY: {
|
|
|
|
|
elfOut_->setType(ET_DYN);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case Program::TYPE_EXECUTABLE: {
|
|
|
|
|
elfOut_->setType(ET_EXEC);
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
default:
|
|
|
|
|
assert(0 && "unexpected elf type");
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!elfOut_->dumpImage(&image, &imageSize)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
#if defined(HAVE_BLOWFISH_H)
|
2017-04-13 13:56:38 -04:00
|
|
|
if (doencrypt) {
|
|
|
|
|
// Increase the size by 64 to accomodate extra headers
|
|
|
|
|
int outBufSize = (int)(imageSize + 64);
|
|
|
|
|
char* outBuf = new char[outBufSize];
|
|
|
|
|
if (outBuf == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
memset(outBuf, '\0', outBufSize);
|
|
|
|
|
|
|
|
|
|
int outBytes = 0;
|
|
|
|
|
bool success = amd::oclEncrypt(0, image, imageSize, outBuf, outBufSize, &outBytes);
|
|
|
|
|
delete[] image;
|
|
|
|
|
if (!success) {
|
|
|
|
|
delete[] outBuf;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
image = outBuf;
|
|
|
|
|
imageSize = outBytes;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
#endif
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
setBinary(image, imageSize, true);
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Program::binary_t ClBinary::data() const { return std::make_pair(binary_, size_); }
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::setBinary(char* theBinary, size_t theBinarySize, bool allocated) {
|
|
|
|
|
release();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
size_ = theBinarySize;
|
|
|
|
|
binary_ = theBinary;
|
|
|
|
|
if (allocated) {
|
|
|
|
|
flags_ |= BinaryAllocated;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::setFlags(int encryptCode) {
|
|
|
|
|
encryptCode_ = encryptCode;
|
|
|
|
|
if (encryptCode != 0) {
|
|
|
|
|
flags_ =
|
|
|
|
|
(flags_ &
|
|
|
|
|
(~(BinarySourceMask | BinaryLlvmirMask | BinaryAmdilMask | BinaryIsaMask | BinaryASMask)));
|
|
|
|
|
flags_ |= (BinaryRemoveSource | BinaryRemoveLlvmir | BinaryRemoveAmdil | BinarySaveIsa |
|
|
|
|
|
BinaryRemoveAS);
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::decryptElf(char* binaryIn, size_t size, char** decryptBin, size_t* decryptSize,
|
|
|
|
|
int* encryptCode) {
|
|
|
|
|
*decryptBin = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
#if defined(HAVE_BLOWFISH_H)
|
2017-04-13 13:56:38 -04:00
|
|
|
int outBufSize = 0;
|
|
|
|
|
if (amd::isEncryptedBIF(binaryIn, (int)size, &outBufSize)) {
|
|
|
|
|
char* outBuf = new (std::nothrow) char[outBufSize];
|
|
|
|
|
if (outBuf == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Decrypt
|
|
|
|
|
int outDataSize = 0;
|
|
|
|
|
if (!amd::oclDecrypt(binaryIn, (int)size, outBuf, outBufSize, &outDataSize)) {
|
|
|
|
|
delete[] outBuf;
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
|
|
|
|
|
*decryptBin = reinterpret_cast<char*>(outBuf);
|
|
|
|
|
*decryptSize = outDataSize;
|
|
|
|
|
*encryptCode = 1;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
#endif
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::setElfIn(unsigned char eclass) {
|
|
|
|
|
if (elfIn_) return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (binary_ == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
elfIn_ = new amd::OclElf(eclass, binary_, size_, NULL, ELF_C_READ);
|
|
|
|
|
if ((elfIn_ == NULL) || elfIn_->hasError()) {
|
|
|
|
|
if (elfIn_) {
|
|
|
|
|
delete elfIn_;
|
|
|
|
|
elfIn_ = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
LogError("Creating input ELF object failed");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::resetElfIn() {
|
|
|
|
|
if (elfIn_) {
|
|
|
|
|
delete elfIn_;
|
|
|
|
|
elfIn_ = NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::setElfOut(unsigned char eclass, const char* outFile) {
|
|
|
|
|
elfOut_ = new amd::OclElf(eclass, NULL, 0, outFile, ELF_C_WRITE);
|
|
|
|
|
if ((elfOut_ == NULL) || elfOut_->hasError()) {
|
|
|
|
|
if (elfOut_) {
|
|
|
|
|
delete elfOut_;
|
|
|
|
|
elfOut_ = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
LogError("Creating ouput ELF object failed");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return setElfTarget();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::resetElfOut() {
|
|
|
|
|
if (elfOut_) {
|
|
|
|
|
delete elfOut_;
|
|
|
|
|
elfOut_ = NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::loadLlvmBinary(std::string& llvmBinary,
|
|
|
|
|
amd::OclElf::oclElfSections& elfSectionType) const {
|
|
|
|
|
// Check if current binary already has LLVMIR
|
|
|
|
|
char* section = NULL;
|
|
|
|
|
size_t sz = 0;
|
|
|
|
|
const amd::OclElf::oclElfSections SectionTypes[] = {amd::OclElf::LLVMIR, amd::OclElf::SPIR,
|
|
|
|
|
amd::OclElf::SPIRV};
|
2015-12-15 16:09:35 -05:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
for (int i = 0; i < 3; ++i) {
|
|
|
|
|
if (elfIn_->getSection(SectionTypes[i], §ion, &sz) && section && sz > 0) {
|
|
|
|
|
llvmBinary.append(section, sz);
|
|
|
|
|
elfSectionType = SectionTypes[i];
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::loadCompileOptions(std::string& compileOptions) const {
|
|
|
|
|
char* options = NULL;
|
|
|
|
|
size_t sz;
|
|
|
|
|
compileOptions.clear();
|
|
|
|
|
if (elfIn_->getSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(),
|
|
|
|
|
&options, &sz)) {
|
|
|
|
|
if (sz > 0) {
|
|
|
|
|
compileOptions.append(options, sz);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::loadLinkOptions(std::string& linkOptions) const {
|
|
|
|
|
char* options = NULL;
|
|
|
|
|
size_t sz;
|
|
|
|
|
linkOptions.clear();
|
|
|
|
|
if (elfIn_->getSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(),
|
|
|
|
|
&options, &sz)) {
|
|
|
|
|
if (sz > 0) {
|
|
|
|
|
linkOptions.append(options, sz);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::storeCompileOptions(const std::string& compileOptions) {
|
|
|
|
|
elfOut()->addSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclCompilerOptions).c_str(),
|
|
|
|
|
compileOptions.c_str(), compileOptions.length());
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void ClBinary::storeLinkOptions(const std::string& linkOptions) {
|
|
|
|
|
elfOut()->addSymbol(amd::OclElf::COMMENT, getBIFSymbol(symOpenclLinkerOptions).c_str(),
|
|
|
|
|
linkOptions.c_str(), linkOptions.length());
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::isSPIR() const {
|
|
|
|
|
char* section = NULL;
|
|
|
|
|
size_t sz = 0;
|
|
|
|
|
if (elfIn_->getSection(amd::OclElf::LLVMIR, §ion, &sz) && section && sz > 0) return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (elfIn_->getSection(amd::OclElf::SPIR, §ion, &sz) && section && sz > 0) return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool ClBinary::isSPIRV() const {
|
|
|
|
|
char* section = NULL;
|
|
|
|
|
size_t sz = 0;
|
2015-12-15 16:09:35 -05:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (elfIn_->getSection(amd::OclElf::SPIRV, §ion, &sz) && section && sz > 0) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2015-12-15 16:09:35 -05:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
cl_device_partition_property PartitionType::toCL() const {
|
|
|
|
|
static cl_device_partition_property conv[] = {CL_DEVICE_PARTITION_EQUALLY,
|
|
|
|
|
CL_DEVICE_PARTITION_BY_COUNTS,
|
|
|
|
|
CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN};
|
|
|
|
|
return conv[amd::leastBitSet(value_)];
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
size_t PartitionType::toCL(cl_device_partition_property* types) const {
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
if (equally_) {
|
|
|
|
|
types[i++] = CL_DEVICE_PARTITION_EQUALLY;
|
|
|
|
|
}
|
|
|
|
|
if (byCounts_) {
|
|
|
|
|
types[i++] = CL_DEVICE_PARTITION_BY_COUNTS;
|
|
|
|
|
}
|
|
|
|
|
if (byAffinityDomain_) {
|
|
|
|
|
types[i++] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
|
|
|
|
|
}
|
|
|
|
|
return i;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
cl_device_affinity_domain AffinityDomain::toCL() const { return (cl_device_affinity_domain)value_; }
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
#ifdef cl_ext_device_fission
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
cl_device_partition_property_ext PartitionType::toCLExt() const {
|
|
|
|
|
static cl_device_partition_property_ext conv[] = {CL_DEVICE_PARTITION_EQUALLY_EXT,
|
|
|
|
|
CL_DEVICE_PARTITION_BY_COUNTS_EXT,
|
|
|
|
|
CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT};
|
|
|
|
|
return conv[amd::leastBitSet(value_)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t PartitionType::toCLExt(cl_device_partition_property_ext* types) const {
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
if (equally_) {
|
|
|
|
|
types[i++] = CL_DEVICE_PARTITION_EQUALLY_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (byCounts_) {
|
|
|
|
|
types[i++] = CL_DEVICE_PARTITION_BY_COUNTS_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (byAffinityDomain_) {
|
|
|
|
|
types[i++] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT;
|
|
|
|
|
}
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cl_device_partition_property_ext AffinityDomain::toCLExt() const {
|
|
|
|
|
static cl_device_partition_property_ext conv[] = {
|
|
|
|
|
CL_AFFINITY_DOMAIN_NUMA_EXT, CL_AFFINITY_DOMAIN_L4_CACHE_EXT,
|
|
|
|
|
CL_AFFINITY_DOMAIN_L3_CACHE_EXT, CL_AFFINITY_DOMAIN_L2_CACHE_EXT,
|
|
|
|
|
CL_AFFINITY_DOMAIN_L1_CACHE_EXT, CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT};
|
|
|
|
|
return conv[amd::leastBitSet(value_)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t AffinityDomain::toCLExt(cl_device_partition_property_ext* affinities) const {
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
if (numa_) {
|
|
|
|
|
affinities[i++] = CL_AFFINITY_DOMAIN_NUMA_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (cacheL4_) {
|
|
|
|
|
affinities[i++] = CL_AFFINITY_DOMAIN_L4_CACHE_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (cacheL3_) {
|
|
|
|
|
affinities[i++] = CL_AFFINITY_DOMAIN_L3_CACHE_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (cacheL2_) {
|
|
|
|
|
affinities[i++] = CL_AFFINITY_DOMAIN_L2_CACHE_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (cacheL1_) {
|
|
|
|
|
affinities[i++] = CL_AFFINITY_DOMAIN_L1_CACHE_EXT;
|
|
|
|
|
}
|
|
|
|
|
if (next_) {
|
|
|
|
|
affinities[i++] = CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT;
|
|
|
|
|
}
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif // cl_ext_device_fission
|
|
|
|
|
|
|
|
|
|
} // namespace device
|