Files
rocm-systems/rocclr/runtime/device/device.cpp
T
foreman 16f8ca9aae P4 to Git Change 1079952 by yaxunl@yaxunl_stg_win50 on 2014/09/23 12:31:16
ECR #377625 - Workaround for Blender performance issue. Lower available VGPRs to improve waves per CU.

	Added BuildOptsAppend to OCL app profile.
	Read BuildOptsAppend and append to build options.
	Added specific wave optimization option for Blender.

Affected files ...

... //depot/stg/opencl/drivers/opencl/appprofiles/oclappprofile.xml#7 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/scwrapper/SI/scCompileSI.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#170 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#230 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#63 edit
2014-09-23 12:44:50 -04:00

1698 строки
46 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/device.hpp"
#include "thread/atomic.hpp"
#include "thread/monitor.hpp"
#if defined(WITH_HSA_DEVICE)
#include "device/hsa/hsadevice.hpp"
extern amd::AppProfile* oclhsaCreateAppProfile();
#endif
#if defined(WITH_CPU_DEVICE)
#include "device/cpu/cpudevice.hpp"
#endif // WITH_CPU_DEVICE
#if defined(WITH_GPU_DEVICE)
extern bool DeviceLoad();
extern void DeviceUnload();
#endif // WITH_GPU_DEVICE
#include "platform/runtime.hpp"
#include "platform/program.hpp"
#include "thread/monitor.hpp"
#include "amdocl/cl_common.hpp"
#include "utils/options.hpp"
#include "utils/versions.hpp" // AMD_PLATFORM_INFO
#if defined(HAVE_BLOWFISH_H)
#include "blowfish/oclcrypt.hpp"
#endif
#include "../utils/libUtils.h"
#include "utils/bif_section_labels.hpp"
#include <vector>
#include <string>
#include <cstring>
#include <cstdio>
#include <sstream>
#include <fstream>
#include <set>
namespace device {
extern const char* BlitSourceCode;
}
namespace amd {
std::vector<Device*> *Device::devices_ = NULL;
bool Device::isHsaDeviceAvailable_ = false;
bool Device::isGpuDeviceAvailable_ = false;
AppProfile Device::appProfile_;
#if defined(WITH_HSA_DEVICE)
AppProfile* Device::oclhsaAppProfile_ = NULL;
#endif
amd::Monitor SvmManager::AllocatedLock_("Guards SVM allocation list");
std::map<uintptr_t, amd::Memory*> SvmManager::svmBufferMap_;
size_t
SvmManager::size()
{
amd::ScopedLock lock(AllocatedLock_);
return svmBufferMap_.size();
}
void
SvmManager::AddSvmBuffer(const void* k, amd::Memory* v)
{
amd::ScopedLock lock(AllocatedLock_);
svmBufferMap_.insert(std::pair<uintptr_t, amd::Memory*>(reinterpret_cast<uintptr_t>(k), v));
}
void
SvmManager::RemoveSvmBuffer(const void* k)
{
amd::ScopedLock lock(AllocatedLock_);
svmBufferMap_.erase(reinterpret_cast<uintptr_t>(k));
}
amd::Memory*
SvmManager::FindSvmBuffer(const void* k)
{
amd::ScopedLock lock(AllocatedLock_);
uintptr_t key = reinterpret_cast<uintptr_t>(k);
std::map<uintptr_t, amd::Memory*>::iterator it = svmBufferMap_.upper_bound(key);
if (it == svmBufferMap_.begin()) {
return NULL;
}
--it;
amd::Memory* mem = it->second;
if (key >= it->first && key < (it->first + mem->getSize())) {
//the k is in the range
return mem;
}
else {
return NULL;
}
}
Device::BlitProgram::~BlitProgram()
{
if (program_ != NULL) {
program_->release();
}
}
bool
Device::BlitProgram::create(amd::Device* device,
const char* extraKernels, const char* extraOptions)
{
std::vector<amd::Device*> devices;
devices.push_back(device);
std::string kernels(device::BlitSourceCode);
if (extraKernels != NULL) {
kernels += extraKernels;
}
// Create a program with all blit kernels
program_ = new Program(*context_, kernels.c_str());
if (program_ == NULL) {
return false;
}
// Build all kernels
std::string opt = "-Wf,--force_disable_spir -fno-lib-no-inline "\
"-fno-sc-keep-calls ";
if (extraOptions != NULL) {
opt += extraOptions;
}
if (!GPU_DUMP_BLIT_KERNELS) {
opt += " -fno-enable-dump -cl-internal-kernel";
}
if (CL_SUCCESS != program_->build(devices, opt.c_str(),
NULL, NULL, GPU_DUMP_BLIT_KERNELS)) {
return false;
}
return true;
}
bool
Device::init()
{
assert(!Runtime::initialized() && "initialize only once");
bool ret = false;
devices_ = NULL;
appProfile_.init();
// IMPORTANT: Note that we are initialiing HSA stack first and then
// GPU stack. The order of initialization is signiicant and if changed
// amd::Device::registerDevice() must be accordingly modified.
#if defined(WITH_HSA_DEVICE)
oclhsaAppProfile_ = oclhsaCreateAppProfile();
if (oclhsaAppProfile_ && !oclhsaAppProfile_->IsHsaInitDisabled()) {
// Return value of oclhsa::Device::init()
// If returned false, error initializing HSA stack.
// If returned true, either HSA not installed or HSA stack
// successfully initialized.
if (!oclhsa::Device::init() ) {
// abort() commentted because this is the only indication
// that KFD is not installed.
// Ignore the failure and assume KFD is not installed.
//abort();
}
ret |= oclhsa::NullDevice::init();
}
#endif // WITH_HSA_DEVICE
#if defined(WITH_GPU_DEVICE)
ret |= DeviceLoad();
#endif // WITH_GPU_DEVICE
#if defined(WITH_CPU_DEVICE)
ret |= cpu::Device::init();
#endif // WITH_CPU_DEVICE
return ret;
}
void
Device::tearDown()
{
if (devices_ != NULL) {
for (uint i = 0; i < devices_->size(); ++i) {
delete devices_->at(i);
}
devices_->clear();
delete devices_;
}
#if defined(WITH_HSA_DEVICE)
if (oclhsaAppProfile_ && !oclhsaAppProfile_->IsHsaInitDisabled()) {
oclhsa::Device::tearDown();
delete oclhsaAppProfile_;
oclhsaAppProfile_ = NULL;
}
#endif // WITH_HSA_DEVICE
#if defined(WITH_GPU_DEVICE)
DeviceUnload();
#endif // WITH_GPU_DEVICE
#if defined(WITH_CPU_DEVICE)
cpu::Device::tearDown();
#endif // WITH_CPU_DEVICE
}
Device::Device(Device* parent)
: settings_(NULL), online_(true), blitProgram_(NULL), parent_(parent)
{
memset(&info_, '\0', sizeof(info_));
if (parent_ != NULL) {
parent_->retain();
}
}
Device::~Device()
{
// Destroy device settings
if (settings_ != NULL) {
delete settings_;
}
if (parent_ != NULL) {
parent_->release();
}
else {
if (info_.extensions_ != NULL) {
delete [] info_.extensions_;
}
}
if (info_.partitionCreateInfo_.type_.byCounts_ &&
info_.partitionCreateInfo_.byCounts_.countsList_ != NULL) {
delete [] info_.partitionCreateInfo_.byCounts_.countsList_;
}
}
bool
Device::verifyBinaryImage( const void* image, size_t size) const
{
const char* p = static_cast<const char*>(image);
#if defined(HAVE_BLOWFISH_H)
int outBufSize;
if (amd::isEncryptedBIF(p, (int)size, &outBufSize)) {
// For encrypted image, check it later and simply return true here.
return true;
}
#endif
if (amd::isElfMagic(p)) {
return true;
}
if (isBcMagic(p)) {
return true;
}
return false;
}
bool
Device::isAncestor(const Device* sub) const
{
for (const Device* d = sub->parent_; d != NULL; d = d->parent_) {
if (d == this) {
return true;
}
}
return false;
}
bool Device::IsHsaCapableDevice() {
static bool init = false;
typedef std::vector<std::string> ListType;
typedef std::vector<std::string>::const_iterator ListIterType;
static ListType hsaNames;
if (!init) {
hsaNames.push_back("Spectre");
hsaNames.push_back("Spooky");
init = true;
}
for (ListIterType itr = hsaNames.begin(); itr != hsaNames.end(); itr++) {
if ((*itr) == info_.name_ // If name is in list then HSA capable.
|| (info_.type_ & CL_HSA_ENABLED_AMD)) { // HSA enabled is HSA capable.
return true;
}
}
return false;
}
void
Device::registerDevice()
{
assert(Runtime::singleThreaded() && "this is not thread-safe");
static bool nonhsaDefaultIsAssigned = false;
static bool hsaDefaultIsAssigned = false;
if (devices_ == NULL) {
devices_ = new std::vector<Device*>;
}
if (info_.available_) {
bool dvHsaEnabled = (info_.type_ & CL_HSA_ENABLED_AMD) != 0;
if (IsHsaCapableDevice()) {
if (dvHsaEnabled && !hsaDefaultIsAssigned) {
hsaDefaultIsAssigned = true;
info_.type_ |= CL_DEVICE_TYPE_DEFAULT;
isHsaDeviceAvailable_ = true;
}
}
if (!dvHsaEnabled && !nonhsaDefaultIsAssigned) {
nonhsaDefaultIsAssigned = true;
info_.type_ |= CL_DEVICE_TYPE_DEFAULT;
isGpuDeviceAvailable_ = true;
}
}
devices_->push_back(this);
}
bool IsHsaRequested(cl_device_type requestedType) {
// Depending on HSA_RUNTIME and hint flags CL_HSA_XXXXX_AMD,
// decide to use Gpu or HSA device.
if (requestedType == CL_DEVICE_TYPE_ALL) {
// hint flags are masked by ALL (0xFFFFFFFF) so HSA_RUNTIME decides.
return HSA_RUNTIME;
}
bool hsabit = (requestedType & CL_HSA_ENABLED_AMD) != 0;
bool nonhsabit = (requestedType & CL_HSA_DISABLED_AMD) != 0;
// Condition where both hsabit and nonhsabit set
// is checked in getDevices() and
// numDevices(). So that condition will never occur in this code path
if (hsabit ^ nonhsabit) {
// Eithere ENABLE or DISABLE HSA specified via hint flags.
return (requestedType & CL_HSA_ENABLED_AMD) != 0;
} else {
// Niethre ENABLE nor DISABLE HSA specified via hint flags.
return HSA_RUNTIME;
}
}
bool Device::IsTypeMatching(cl_device_type type, bool offlineDevices) {
if (!(isOnline() || offlineDevices)) {
return false;
}
cl_device_type hintAndDefaultMask = (CL_HSA_ENABLED_AMD
| CL_HSA_DISABLED_AMD | CL_DEVICE_TYPE_DEFAULT);
if ((info_.type_ & type & ~hintAndDefaultMask)
|| (info_.type_ & type & CL_DEVICE_TYPE_DEFAULT)) {
// HSA and ORCA stack will never be running side-by-side.
// Hence device selection is mute.
// Instead of removing the code just returning true always.
// To reinstate, simply remove this "return true;" statement.
return true;
bool dvHsaEnabled = (info_.type_ & CL_HSA_ENABLED_AMD) != 0;
bool isHsaReq = IsHsaRequested(type);
// If not HSA capable device then always a match.
if (!IsHsaCapableDevice()) {
return true;
}
// ASSUMPTION: In the following two if statements,
// assumption is, either HSA or GPU stacks always available
// for HSA capable devcie.
// Requested stack not available - ignore hint.
if (isHsaReq && !isHsaDeviceAvailable_) {
return isGpuDeviceAvailable_;
}
// Requested stack not available - ignore hint.
if (!isHsaReq && !isGpuDeviceAvailable_) {
return isHsaDeviceAvailable_;
}
// If HSA capable device then request type(hsa or not) must
// match device hsa enablement(HSA enabled or not).
if (!(dvHsaEnabled ^ isHsaReq)) {
return true;
}
}
return false;
}
std::vector<Device*>
Device::getDevices(cl_device_type type, bool offlineDevices)
{
std::vector<Device*> result;
if (devices_ == NULL) {
return result;
}
#if defined(WITH_HSA_DEVICE)
type = oclhsaAppProfile_->ApplyHsaDeviceHintFlag(type);
#endif
// Create the list of available devices
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
// Check if the device type is matched
if ((*it)->IsTypeMatching(type, offlineDevices)) {
result.push_back(*it);
}
}
return result;
}
size_t
Device::numDevices(cl_device_type type, bool offlineDevices)
{
size_t result = 0;
if (devices_ == NULL) {
return 0;
}
#if defined(WITH_HSA_DEVICE)
type = oclhsaAppProfile_->ApplyHsaDeviceHintFlag(type);
#endif
for (device_iterator it = devices_->begin(); it != devices_->end(); ++it) {
// Check if the device type is matched
if ((*it)->IsTypeMatching(type, offlineDevices)) {
++result;
}
}
return result;
}
bool
Device::getDeviceIDs(
cl_device_type deviceType,
cl_uint numEntries,
cl_device_id* devices,
cl_uint* numDevices,
bool offlineDevices)
{
if (numDevices != NULL && devices == NULL) {
*numDevices =
(cl_uint)amd::Device::numDevices(deviceType, offlineDevices);
return (*numDevices > 0) ? true : false;
}
assert(devices != NULL && "check the code above");
std::vector<amd::Device*> ret =
amd::Device::getDevices(deviceType, offlineDevices);
if (ret.size() == 0) {
*not_null(numDevices) = 0;
return false;
}
std::vector<amd::Device*>::iterator it = ret.begin();
cl_uint count = std::min(numEntries, (cl_uint)ret.size());
while (count--) {
*devices++ = as_cl(*it++);
--numEntries;
}
while (numEntries--) {
*devices++ = (cl_device_id) 0;
}
*not_null(numDevices) = (cl_uint)ret.size();
return true;
}
char*
Device::getExtensionString()
{
std::stringstream extStream;
size_t size;
char* result = NULL;
// Generate the extension string
for (uint i = 0; i < ClExtTotal; ++i) {
if (settings().checkExtension(i)) {
extStream << OclExtensionsString[i];
}
}
size = extStream.str().size() + 1;
// Create a single string with all extensions
result = new char[size];
if (result != NULL) {
memcpy(result, extStream.str().data(), (size - 1));
result[size - 1] = 0;
}
return result;
}
} // namespace amd
namespace device {
Settings::Settings()
{
assert((ClExtTotal < (8 * sizeof(extensions_))) && "Too many extensions!");
extensions_ = 0;
partialDispatch_ = false;
supportRA_ = true;
largeHostMemAlloc_ = false;
customHostAllocator_ = false;
customSvmAllocator_ = false;
waitCommand_ = AMD_OCL_WAIT_COMMAND;
supportDepthsRGB_ = false;
assumeAliases_ = false;
}
bool
Kernel::createSignature(const parameters_t& params)
{
std::stringstream attribs;
if (workGroupInfo_.compileSize_[0] != 0) {
attribs << "reqd_work_group_size(";
for (size_t i = 0; i < 3; ++i) {
if (i != 0) {
attribs << ",";
}
attribs << workGroupInfo_.compileSize_[i];
}
attribs << ")";
}
if (workGroupInfo_.compileSizeHint_[0] != 0) {
attribs << " work_group_size_hint(";
for (size_t i = 0; i < 3; ++i) {
if (i != 0) {
attribs << ",";
}
attribs << workGroupInfo_.compileSizeHint_[i];
}
attribs << ")";
}
if (!workGroupInfo_.compileVecTypeHint_.empty()) {
attribs << " vec_type_hint("
<< workGroupInfo_.compileVecTypeHint_
<< ")";
}
// Destroy old signature if it was allocated before
// (offline devices path)
delete signature_;
signature_ = new amd::KernelSignature(params, attribs.str());
if (NULL != signature_) {
return true;
}
return false;
}
Kernel::~Kernel()
{
delete signature_;
}
void
Memory::saveWriteMapInfo(
const amd::Coord3D origin,
const amd::Coord3D region,
bool entire)
{
writeMapInfo_.origin_ = origin;
writeMapInfo_.region_ = region;
writeMapInfo_.entire_ = entire;
flags_ |= UnmapWrite;
}
Program::Program(amd::Device& device)
: device_(device)
, type_(TYPE_NONE)
, clBinary_(NULL)
, llvmBinary_()
, llvmBinaryIsSpir_(false)
, compileOptions_()
, linkOptions_()
, lastBuildOptionsArg_()
, buildStatus_(CL_BUILD_NONE)
, buildError_(CL_SUCCESS)
, globalVariableTotalSize_(0)
, programOptions(NULL)
{ }
Program::~Program()
{
clear();
}
void
Program::clear()
{
// Destroy all device kernels
kernels_t::const_iterator it;
for (it = kernels_.begin(); it != kernels_.end(); ++it) {
delete it->second;
}
kernels_.clear();
}
bool
Program::initBuild(amd::option::Options* options)
{
programOptions = options;
if (options->oVariables->DumpFlags > 0) {
static amd::Atomic<unsigned> build_num = 0;
options->setBuildNo(build_num++);
}
buildLog_.clear();
if (!initClBinary()) {
return false;
}
return true;
}
bool
Program::finiBuild(bool isBuildGood)
{
return true;
}
cl_int
Program::compile(const std::string& sourceCode,
const std::vector<const std::string*>& headers,
const char** headerIncludeNames,
const char* origOptions,
amd::option::Options* options)
{
uint64_t start_time = 0;
if (options->oVariables->EnableBuildTiming) {
buildLog_ = "\nStart timing major build components.....\n\n";
start_time = amd::Os::timeNanos();
}
lastBuildOptionsArg_ = origOptions ? origOptions : "";
if (options) {
compileOptions_ = options->origOptionStr;
}
buildStatus_ = CL_BUILD_IN_PROGRESS;
if (!initBuild(options)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation init failed.";
}
}
if (options->oVariables->FP32RoundDivideSqrt &&
!(device().info().singleFPConfig_
& CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
buildStatus_ = CL_BUILD_ERROR;
buildLog_ += "Error: -cl-fp32-correctly-rounded-divide-sqrt "\
"specified without device support";
}
// Compile the source code if any
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) &&
!sourceCode.empty() &&
!compileImpl(sourceCode, headers, headerIncludeNames, options)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation failed.";
}
}
setType(TYPE_COMPILED);
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) &&
!createBinary(options)) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
}
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation fini failed.";
}
}
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
buildStatus_ = CL_BUILD_SUCCESS;
}
else {
buildError_ = CL_COMPILE_PROGRAM_FAILURE;
}
if (options->oVariables->EnableBuildTiming) {
std::stringstream tmp_ss;
tmp_ss << "\nTotal Compile Time: "
<< (amd::Os::timeNanos() - start_time)/1000ULL
<< " us\n";
buildLog_ += tmp_ss.str();
}
if (options->oVariables->BuildLog && !buildLog_.empty()) {
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
fprintf(stderr, "%s\n", options->optionsLog().c_str());
fprintf(stderr, "%s\n", buildLog_.c_str());
}
else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
printf("%s\n", options->optionsLog().c_str());
printf("%s\n", buildLog_.c_str());
}
else {
std::fstream f;
std::stringstream tmp_ss;
std::string logs = options->optionsLog() + buildLog_;
tmp_ss << options->oVariables->BuildLog
<< "." << options->getBuildNo();
f.open(tmp_ss.str().c_str(),
(std::fstream::out | std::fstream::binary));
f.write(logs.data(), logs.size());
f.close();
}
}
return buildError();
}
cl_int Program::link(const std::vector<Program*>& inputPrograms,
const char* origLinkOptions,
amd::option::Options* linkOptions)
{
lastBuildOptionsArg_ = origLinkOptions ? origLinkOptions : "";
if (linkOptions) {
linkOptions_ = linkOptions->origOptionStr;
}
buildStatus_ = CL_BUILD_IN_PROGRESS;
amd::option::Options options;
if (!getCompileOptionsAtLinking(inputPrograms, linkOptions)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ += "Internal error: Get compile options failed.";
}
}
else {
amd::option::parseAllOptions(compileOptions_, options);
}
uint64_t start_time = 0;
if (options.oVariables->EnableBuildTiming) {
buildLog_ = "\nStart timing major build components.....\n\n";
start_time = amd::Os::timeNanos();
}
// initBuild() will clear buildLog_, so store it in a temporary variable
std::string tmpBuildLog = buildLog_;
if ((buildStatus_ == CL_BUILD_IN_PROGRESS)
&& !initBuild(&options)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ += "Internal error: Compilation init failed.";
}
}
buildLog_ += tmpBuildLog;
if (options.oVariables->FP32RoundDivideSqrt &&
!(device().info().singleFPConfig_
& CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
buildStatus_ = CL_BUILD_ERROR;
buildLog_ += "Error: -cl-fp32-correctly-rounded-divide-sqrt "\
"specified without device support";
}
bool createLibrary
= linkOptions ? linkOptions->oVariables->clCreateLibrary : false;
if ((buildStatus_ == CL_BUILD_IN_PROGRESS)
&& !linkImpl(inputPrograms, &options, createLibrary)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ += "Internal error: Link failed.\n";
buildLog_ += "Make sure the system setup is correct.";
}
}
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation fini failed.";
}
}
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
buildStatus_ = CL_BUILD_SUCCESS;
}
else {
buildError_ = CL_LINK_PROGRAM_FAILURE;
}
if (options.oVariables->EnableBuildTiming) {
std::stringstream tmp_ss;
tmp_ss << "\nTotal Link Time: "
<< (amd::Os::timeNanos() - start_time)/1000ULL
<< " us\n";
buildLog_ += tmp_ss.str();
}
if (options.oVariables->BuildLog && !buildLog_.empty()) {
if (strcmp(options.oVariables->BuildLog, "stderr") == 0) {
fprintf(stderr, "%s\n", options.optionsLog().c_str());
fprintf(stderr, "%s\n", buildLog_.c_str());
}
else if (strcmp(options.oVariables->BuildLog, "stdout") == 0) {
printf("%s\n", options.optionsLog().c_str());
printf("%s\n", buildLog_.c_str());
}
else {
std::fstream f;
std::stringstream tmp_ss;
std::string logs = options.optionsLog() + buildLog_;
tmp_ss << options.oVariables->BuildLog
<< "." << options.getBuildNo();
f.open(tmp_ss.str().c_str(),
(std::fstream::out | std::fstream::binary));
f.write(logs.data(), logs.size());
f.close();
}
}
return buildError();
}
cl_int
Program::build(const std::string& sourceCode,
const char* origOptions,
amd::option::Options* options)
{
uint64_t start_time = 0;
if (options->oVariables->EnableBuildTiming) {
buildLog_ = "\nStart timing major build components.....\n\n";
start_time = amd::Os::timeNanos();
}
lastBuildOptionsArg_ = origOptions ? origOptions : "";
if (options) {
compileOptions_ = options->origOptionStr;
}
buildStatus_ = CL_BUILD_IN_PROGRESS;
if (!initBuild(options)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation init failed.";
}
}
if (options->oVariables->FP32RoundDivideSqrt &&
!(device().info().singleFPConfig_ & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)) {
buildStatus_ = CL_BUILD_ERROR;
buildLog_ += "Error: -cl-fp32-correctly-rounded-divide-sqrt "\
"specified without device support";
}
// Compile the source code if any
std::vector<const std::string*> headers;
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) &&
!sourceCode.empty() && !compileImpl(sourceCode, headers, NULL, options)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation failed.";
}
}
if ((buildStatus_ == CL_BUILD_IN_PROGRESS) && !linkImpl(options)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ += "Internal error: Link failed.\n";
buildLog_ += "Make sure the system setup is correct.";
}
}
if (!finiBuild(buildStatus_ == CL_BUILD_IN_PROGRESS)) {
buildStatus_ = CL_BUILD_ERROR;
if (buildLog_.empty()) {
buildLog_ = "Internal error: Compilation fini failed.";
}
}
if (buildStatus_ == CL_BUILD_IN_PROGRESS) {
buildStatus_ = CL_BUILD_SUCCESS;
}
else {
buildError_ = CL_BUILD_PROGRAM_FAILURE;
}
if (options->oVariables->EnableBuildTiming) {
std::stringstream tmp_ss;
tmp_ss << "\nTotal Build Time: "
<< (amd::Os::timeNanos() - start_time)/1000ULL
<< " us\n";
buildLog_ += tmp_ss.str();
}
if (options->oVariables->BuildLog && !buildLog_.empty()) {
if (strcmp(options->oVariables->BuildLog, "stderr") == 0) {
fprintf(stderr, "%s\n", options->optionsLog().c_str());
fprintf(stderr, "%s\n", buildLog_.c_str());
}
else if (strcmp(options->oVariables->BuildLog, "stdout") == 0) {
printf("%s\n", options->optionsLog().c_str());
printf("%s\n", buildLog_.c_str());
}
else {
std::fstream f;
std::stringstream tmp_ss;
std::string logs = options->optionsLog() + buildLog_;
tmp_ss << options->oVariables->BuildLog << "." << options->getBuildNo();
f.open(tmp_ss.str().c_str(), (std::fstream::out | std::fstream::binary));
f.write(logs.data(), logs.size());
f.close();
}
}
return buildError();
}
bool
Program::getCompileOptionsAtLinking(const std::vector<Program*>& inputPrograms,
const amd::option::Options* linkOptions)
{
amd::option::Options compileOptions;
std::vector<device::Program*>::const_iterator it
= inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd
= inputPrograms.end();
for (size_t i = 0; it != itEnd; ++it, ++i) {
Program* program = *it;
amd::option::Options compileOptions2;
amd::option::Options* thisCompileOptions
= i == 0 ? &compileOptions : &compileOptions2;
if (!amd::option::parseAllOptions(program->compileOptions_,
*thisCompileOptions)) {
buildLog_ += thisCompileOptions->optionsLog();
LogError("Bad compile options from input binary");
return false;
}
if (i == 0)
compileOptions_ = program->compileOptions_;
// if we are linking a program executable, and if "program" is a
// compiled module or a library created with "-enable-link-options",
// we can overwrite "program"'s compile options with linking options
if (!linkOptions_.empty()
&& !linkOptions->oVariables->clCreateLibrary) {
bool linkOptsCanOverwrite = false;
if (program->type() != TYPE_LIBRARY) {
linkOptsCanOverwrite = true;
}
else {
amd::option::Options thisLinkOptions;
if (!amd::option::parseLinkOptions(program->linkOptions_,
thisLinkOptions)) {
buildLog_ += thisLinkOptions.optionsLog();
LogError("Bad link options from input binary");
return false;
}
if (thisLinkOptions.oVariables->clEnableLinkOptions)
linkOptsCanOverwrite = true;
}
if (linkOptsCanOverwrite) {
if (!thisCompileOptions->setOptionVariablesAs(*linkOptions)) {
buildLog_ += thisCompileOptions->optionsLog();
LogError("Bad compile options from input binary");
return false;
}
}
if (i == 0)
compileOptions_ += " " + linkOptions_;
}
// warn if input modules have inconsistent compile options
if (i > 0) {
if (!compileOptions.equals(*thisCompileOptions,
true/*ignore clc options*/)) {
buildLog_ += "Warning: Input OpenCL binaries has inconsistent"
" compile options. Using compile options from"
" the first input binary!\n";
}
}
}
return true;
}
bool
Program::initClBinary(char* binaryIn, size_t size)
{
if (!initClBinary()) {
return false;
}
// Save the original binary that isn't owned by ClBinary
clBinary()->saveOrigBinary(binaryIn, size);
char* bin = binaryIn;
size_t sz = size;
//unencrypted
int encryptCode = 0;
char* decryptedBin = NULL;
if (isBcMagic(binaryIn))
{
acl_error err = ACL_SUCCESS;
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass
= (info().arch_id == aclX64 || info().arch_id == aclAMDIL64)
? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* aclbin_v30 = aclBinaryInit(sizeof(aclBinary), &info(), &binOpts, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclBinaryInit failed");
aclBinaryFini(aclbin_v30);
return false;
}
err = aclInsertSection(device().compiler(), aclbin_v30, binaryIn, size, aclSPIR);
if (ACL_SUCCESS != err) {
LogWarning("aclInsertSection failed");
aclBinaryFini(aclbin_v30);
return false;
}
aclBinary* aclbin_v21 = aclCreateFromBinary(aclbin_v30,aclBIFVersion21);
err = aclWriteToMem(aclbin_v21, reinterpret_cast<void**>(&bin), &sz);
if (err != ACL_SUCCESS) {
LogWarning("aclWriteToMem failed");
aclBinaryFini(aclbin_v30);
aclBinaryFini(aclbin_v21);
return false;
}
aclBinaryFini(aclbin_v30);
aclBinaryFini(aclbin_v21);
}
else
{
size_t decryptedSize;
if (!clBinary()->decryptElf(binaryIn,size,
&decryptedBin,&decryptedSize,&encryptCode)) {
return false;
}
if (decryptedBin != NULL) {
// It is decrypted binary.
bin = decryptedBin;
sz = decryptedSize;
}
if (!isElf(bin)) {
// Invalid binary.
if (decryptedBin != NULL) {
delete [] decryptedBin;
}
return false;
}
}
clBinary()->setFlags(encryptCode);
return clBinary()->setBinary(bin, sz, (decryptedBin != NULL));
}
bool
Program::setBinary(char* binaryIn, size_t size)
{
if (!initClBinary(binaryIn, size)) {
return false;
}
if (!clBinary()->setElfIn(ELFCLASS32)) {
LogError("Setting input OCL binary failed");
return false;
}
uint16_t type;
if (!clBinary()->elfIn()->getType(type)) {
LogError("Bad OCL Binary: error loading ELF type!");
return false;
}
switch (type) {
case ET_NONE:
{
setType(TYPE_NONE);
break;
}
case ET_REL:
{
if (clBinary()->isSPIR()) {
setType(TYPE_INTERMEDIATE);
} else {
setType(TYPE_COMPILED);
}
break;
}
case ET_DYN:
{
setType(TYPE_LIBRARY);
break;
}
case ET_EXEC:
{
setType(TYPE_EXECUTABLE);
break;
}
default:
LogError("Bad OCL Binary: bad ELF type!");
return false;
}
clBinary()->loadCompileOptions(compileOptions_);
clBinary()->loadLinkOptions(linkOptions_);
clBinary()->resetElfIn();
return true;
}
bool
Program::createBIFBinary(aclBinary* bin)
{
acl_error err;
char *binaryIn = NULL;
size_t size;
err = aclWriteToMem(bin, reinterpret_cast<void**>(&binaryIn), &size);
if (err != ACL_SUCCESS) {
LogWarning("aclWriteToMem failed");
return false;
}
clBinary()->saveBIFBinary(binaryIn, size);
aclFreeMem(bin, binaryIn);
return true;
}
ClBinary::ClBinary(const amd::Device& dev, BinaryImageFormat bifVer)
: dev_(dev)
, binary_(NULL)
, size_(0)
, flags_(0)
, origBinary_(NULL)
, origSize_(0)
, encryptCode_ (0)
, elfIn_(NULL)
, elfOut_(NULL)
, format_(bifVer)
{
}
ClBinary::~ClBinary()
{
release();
if (elfIn_) {
delete elfIn_;
}
if (elfOut_) {
delete elfOut_;
}
}
std::string
ClBinary::getBIFSymbol(unsigned int symbolID) const
{
size_t nSymbols = 0;
// Due to PRE & POST defines in bif_section_labels.hpp conflict with
// PRE & POST struct members in sp3-si-chip-registers.h
// unable to include bif_section_labels.hpp in device.hpp
//! @todo: resolve conflict by renaming defines,
// then include bif_section_labels.hpp in device.hpp &
// use oclBIFSymbolID instead of unsigned int as a parameter
const oclBIFSymbolID symID = static_cast<oclBIFSymbolID>(symbolID);
switch (format_) {
case BIF_VERSION2: {
nSymbols = sizeof(BIF20)/sizeof(oclBIFSymbolStruct);
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF20, nSymbols, symID);
assert(symb && "BIF20 symbol with symbolID not found");
if (symb) {
return std::string(symb->str[PRE]) + std::string(symb->str[POST]);
}
break;
}
case BIF_VERSION3: {
nSymbols = sizeof(BIF30)/sizeof(oclBIFSymbolStruct);
const oclBIFSymbolStruct* symb = findBIFSymbolStruct(BIF30, nSymbols, symID);
assert(symb && "BIF30 symbol with symbolID not found");
if (symb) {
return std::string(symb->str[PRE]) + std::string(symb->str[POST]);
}
break;
}
default:
assert(0 && "unexpected BIF type");
return "";
}
return "";
}
void
ClBinary::init(amd::option::Options* optionsObj, bool amdilRequired)
{
// option has higher priority than environment variable.
if ((flags_ & BinarySourceMask) != BinaryRemoveSource) {
// set to zero
flags_ = (flags_ & (~BinarySourceMask));
flags_ |= (optionsObj->oVariables->BinSOURCE
? BinarySaveSource : BinaryNoSaveSource);
}
if ((flags_ & BinaryLlvmirMask) != BinaryRemoveLlvmir) {
// set to zero
flags_ = (flags_ & (~BinaryLlvmirMask));
flags_ |= (optionsObj->oVariables->BinLLVMIR
? BinarySaveLlvmir : BinaryNoSaveLlvmir);
}
// If amdilRequired is true, force to save AMDIL (for correctness)
if ((flags_ & BinaryAmdilMask) != BinaryRemoveAmdil ||
amdilRequired) {
// set to zero
flags_ = (flags_ & (~BinaryAmdilMask));
flags_ |= ((optionsObj->oVariables->BinAMDIL || amdilRequired)
? BinarySaveAmdil : BinaryNoSaveAmdil);
}
if ((flags_ & BinaryIsaMask) != BinaryRemoveIsa) {
// set to zero
flags_ = (flags_ & (~BinaryIsaMask));
flags_ |= ((optionsObj->oVariables->BinEXE)
? BinarySaveIsa : BinaryNoSaveIsa);
}
if ((flags_ & BinaryASMask) != BinaryRemoveAS) {
// set to zero
flags_ = (flags_ & (~BinaryASMask));
flags_ |= ((optionsObj->oVariables->BinAS)
? BinarySaveAS : BinaryNoSaveAS);
}
}
bool
ClBinary::isRecompilable(std::string& llvmBinary,
amd::OclElf::oclElfPlatform thePlatform)
{
/* It is recompilable if there is llvmir that was generated for
the same platform (CPU or GPU) and with the same bitness.
Note: the bitness has been checked in initClBinary(), no need
to check it here.
*/
if (llvmBinary.empty() ) {
return false;
}
uint16_t elf_target;
amd::OclElf::oclElfPlatform platform;
if (elfIn()->getTarget(elf_target, platform)){
if (platform == thePlatform){
return true;
}
if ((platform == amd::OclElf::COMPLIB_PLATFORM) &&
(((thePlatform == amd::OclElf::CAL_PLATFORM) &&
((elf_target == (uint16_t)EM_AMDIL) ||
(elf_target == (uint16_t)EM_HSAIL) ||
(elf_target == (uint16_t)EM_HSAIL_64))) ||
((thePlatform == amd::OclElf::CPU_PLATFORM) &&
((elf_target == (uint16_t)EM_386) ||
(elf_target == (uint16_t)EM_X86_64))))){
return true;
}
}
return false;
}
void
ClBinary::release()
{
if (isBinaryAllocated() && (binary_ != NULL)) {
delete [] binary_;
binary_ = NULL;
flags_ &= ~BinaryAllocated;
}
}
void
ClBinary::saveBIFBinary(char* binaryIn, size_t size)
{
char *image = new char[size];
memcpy(image, binaryIn, size);
setBinary(image, size, true);
return;
}
bool
ClBinary::createElfBinary(bool doencrypt, Program::type_t type)
{
#if 0
if (!saveISA() && !saveAMDIL() && !saveLLVMIR() && !saveSOURCE()) {
return true;
}
#endif
release();
size_t imageSize;
char* image;
assert (elfOut_ && "elfOut_ should be initialized in ClBinary::data()");
// Insert Version string that builds this binary into .comment section
const device::Info& devInfo = dev_.info();
std::string buildVerInfo("@(#) ");
if (devInfo.version_ != NULL) {
buildVerInfo.append(devInfo.version_);
buildVerInfo.append(". Driver version: ");
buildVerInfo.append(devInfo.driverVersion_);
}
else {
// char OpenCLVersion[256];
// size_t sz;
// cl_int ret= clGetPlatformInfo(AMD_PLATFORM, CL_PLATFORM_VERSION, 256, OpenCLVersion, &sz);
// if (ret == CL_SUCCESS) {
// buildVerInfo.append(OpenCLVersion, sz);
// }
// If CAL is unavailable, just hard-code the OpenCL driver version
buildVerInfo.append("OpenCL 1.1" AMD_PLATFORM_INFO);
}
elfOut_->addSection(amd::OclElf::COMMENT, buildVerInfo.data(), buildVerInfo.size());
switch (type) {
case Program::TYPE_NONE:
{
elfOut_->setType(ET_NONE);
break;
}
case Program::TYPE_COMPILED:
{
elfOut_->setType(ET_REL);
break;
}
case Program::TYPE_LIBRARY:
{
elfOut_->setType(ET_DYN);
break;
}
case Program::TYPE_EXECUTABLE:
{
elfOut_->setType(ET_EXEC);
break;
}
default:
assert(0 && "unexpected elf type");
}
if (!elfOut_->dumpImage(&image, &imageSize)) {
return false;
}
#if defined(HAVE_BLOWFISH_H)
if (doencrypt) {
// Increase the size by 64 to accomodate extra headers
int outBufSize = (int)(imageSize + 64);
char * outBuf = new char[outBufSize];
if (outBuf == NULL) {
return false;
}
memset(outBuf, '\0', outBufSize);
int outBytes = 0;
bool success = amd::oclEncrypt(0, image, imageSize, outBuf, outBufSize, &outBytes);
delete [] image;
if (!success) {
delete [] outBuf;
return false;
}
image = outBuf;
imageSize = outBytes;
}
#endif
setBinary(image, imageSize, true);
return true;
}
Program::binary_t ClBinary::data() const
{
return std::make_pair(binary_, size_);
}
bool
ClBinary::setBinary(char* theBinary, size_t theBinarySize, bool allocated)
{
release();
size_ = theBinarySize;
binary_ = theBinary;
if (allocated) {
flags_ |= BinaryAllocated;
}
return true;
}
void
ClBinary::setFlags(int encryptCode)
{
encryptCode_ = encryptCode;
if (encryptCode != 0) {
flags_ = (flags_ & (~(BinarySourceMask | BinaryLlvmirMask |
BinaryAmdilMask | BinaryIsaMask |
BinaryASMask)));
flags_ |= (BinaryRemoveSource | BinaryRemoveLlvmir |
BinaryRemoveAmdil | BinarySaveIsa |
BinaryRemoveAS);
}
}
bool
ClBinary::decryptElf(char* binaryIn, size_t size,
char** decryptBin, size_t* decryptSize, int* encryptCode)
{
*decryptBin = NULL;
#if defined(HAVE_BLOWFISH_H)
int outBufSize = 0;
if (amd::isEncryptedBIF(binaryIn, (int)size, &outBufSize)) {
char* outBuf = new (std::nothrow) char[outBufSize];
if (outBuf == NULL) {
return false;
}
// Decrypt
int outDataSize = 0;
if (!amd::oclDecrypt(binaryIn, (int)size, outBuf, outBufSize, &outDataSize)) {
delete [] outBuf;
return false;
}
*decryptBin = reinterpret_cast<char*>(outBuf);
*decryptSize = outDataSize;
*encryptCode = 1;
}
#endif
return true;
}
bool
ClBinary::setElfIn(unsigned char eclass)
{
if (elfIn_) return true;
if (binary_ == NULL) {
return false;
}
elfIn_ = new amd::OclElf(eclass, binary_, size_, NULL, ELF_C_READ);
if ( (elfIn_ == NULL)|| elfIn_->hasError() ) {
if (elfIn_) {
delete elfIn_;
elfIn_ = NULL;
}
LogError("Creating input ELF object failed");
return false;
}
return true;
}
void ClBinary::resetElfIn()
{
if (elfIn_) {
delete elfIn_;
elfIn_ = NULL;
}
}
bool ClBinary::setElfOut(unsigned char eclass, const char* outFile)
{
elfOut_ = new amd::OclElf(eclass, NULL, 0, outFile, ELF_C_WRITE);
if ( (elfOut_ == NULL) || elfOut_->hasError() ) {
if (elfOut_) {
delete elfOut_;
}
LogError("Creating ouput ELF object failed");
return false;
}
return setElfTarget();
}
void ClBinary::resetElfOut()
{
if (elfOut_) {
delete elfOut_;
elfOut_ = NULL;
}
}
bool
ClBinary::loadLlvmBinary(std::string& llvmBinary, bool& llvmBinaryIsSpir) const
{
// Check if current binary already has LLVMIR
char *section = NULL;
size_t sz = 0;
if (elfIn_->getSection(amd::OclElf::LLVMIR, &section, &sz) && section && sz > 0) {
llvmBinary.append(section, sz);
llvmBinaryIsSpir = false;
return true;
} else if (elfIn_->getSection(amd::OclElf::SPIR, &section, &sz) && section && sz > 0) {
llvmBinary.append(section, sz);
llvmBinaryIsSpir = true;
return true;
}
return false;
}
bool ClBinary::loadCompileOptions(std::string& compileOptions) const
{
char *options = NULL;
size_t sz;
compileOptions.clear();
if (elfIn_->getSymbol(amd::OclElf::COMMENT,
getBIFSymbol(symOpenclCompilerOptions).c_str(), &options, &sz)) {
if (sz > 0) {
compileOptions.append(options, sz);
}
return true;
}
return false;
}
bool ClBinary::loadLinkOptions(std::string& linkOptions) const
{
char *options = NULL;
size_t sz;
linkOptions.clear();
if (elfIn_->getSymbol(amd::OclElf::COMMENT,
getBIFSymbol(symOpenclLinkerOptions).c_str(), &options, &sz)) {
if (sz > 0) {
linkOptions.append(options, sz);
}
return true;
}
return false;
}
void ClBinary::storeCompileOptions(const std::string& compileOptions)
{
elfOut()->addSymbol(amd::OclElf::COMMENT,
getBIFSymbol(symOpenclCompilerOptions).c_str(),
compileOptions.c_str(), compileOptions.length());
}
void ClBinary::storeLinkOptions(const std::string& linkOptions)
{
elfOut()->addSymbol(amd::OclElf::COMMENT,
getBIFSymbol(symOpenclLinkerOptions).c_str(),
linkOptions.c_str(), linkOptions.length());
}
bool
ClBinary::isSPIR() const
{
char *section = NULL;
size_t sz = 0;
if (elfIn_->getSection(amd::OclElf::LLVMIR, &section, &sz) && section && sz > 0)
return false;
if (elfIn_->getSection(amd::OclElf::SPIR, &section, &sz) && section && sz > 0)
return true;
return false;
}
cl_device_partition_property
PartitionType::toCL() const
{
static cl_device_partition_property conv[] = {
CL_DEVICE_PARTITION_EQUALLY,
CL_DEVICE_PARTITION_BY_COUNTS,
CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN
};
return conv[amd::leastBitSet(value_)];
}
size_t
PartitionType::toCL(cl_device_partition_property* types) const
{
size_t i = 0;
if (equally_) {
types[i++] = CL_DEVICE_PARTITION_EQUALLY;
}
if (byCounts_) {
types[i++] = CL_DEVICE_PARTITION_BY_COUNTS;
}
if (byAffinityDomain_) {
types[i++] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
}
return i;
}
cl_device_affinity_domain
AffinityDomain::toCL() const
{
return (cl_device_affinity_domain)value_;
}
#ifdef cl_ext_device_fission
cl_device_partition_property_ext
PartitionType::toCLExt() const
{
static cl_device_partition_property_ext conv[] = {
CL_DEVICE_PARTITION_EQUALLY_EXT,
CL_DEVICE_PARTITION_BY_COUNTS_EXT,
CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT
};
return conv[amd::leastBitSet(value_)];
}
size_t
PartitionType::toCLExt(cl_device_partition_property_ext* types) const
{
size_t i = 0;
if (equally_) {
types[i++] = CL_DEVICE_PARTITION_EQUALLY_EXT;
}
if (byCounts_) {
types[i++] = CL_DEVICE_PARTITION_BY_COUNTS_EXT;
}
if (byAffinityDomain_) {
types[i++] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT;
}
return i;
}
cl_device_partition_property_ext
AffinityDomain::toCLExt() const
{
static cl_device_partition_property_ext conv[] = {
CL_AFFINITY_DOMAIN_NUMA_EXT,
CL_AFFINITY_DOMAIN_L4_CACHE_EXT,
CL_AFFINITY_DOMAIN_L3_CACHE_EXT,
CL_AFFINITY_DOMAIN_L2_CACHE_EXT,
CL_AFFINITY_DOMAIN_L1_CACHE_EXT,
CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT
};
return conv[amd::leastBitSet(value_)];
}
size_t
AffinityDomain::toCLExt(cl_device_partition_property_ext* affinities) const
{
size_t i = 0;
if (numa_) {
affinities[i++] = CL_AFFINITY_DOMAIN_NUMA_EXT;
}
if (cacheL4_) {
affinities[i++] = CL_AFFINITY_DOMAIN_L4_CACHE_EXT;
}
if (cacheL3_) {
affinities[i++] = CL_AFFINITY_DOMAIN_L3_CACHE_EXT;
}
if (cacheL2_) {
affinities[i++] = CL_AFFINITY_DOMAIN_L2_CACHE_EXT;
}
if (cacheL1_) {
affinities[i++] = CL_AFFINITY_DOMAIN_L1_CACHE_EXT;
}
if (next_) {
affinities[i++] = CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT;
}
return i;
}
#endif // cl_ext_device_fission
} // namespace device