Update code object handling for GSL, PAL and ROCm

- Correct GSL path to report targets using the TargetID syntax.

- Correct GSL path to check compatibility of code objects when
  loading.

- Add concept of an device isa and create a registery used by ROCm,
  PAL and GSL.

- Support XNACK and SRAMECC target features consistently for PAL and ROCm.

- Correct logic for NullDevices and asserts to avoid memory coruption.

- Allow all NullDevices to be created for HIP.

- Numerous other code improvements.

Change-Id: I40abf3d2b22249c1492d1af5919665f8184f4e0e
Этот коммит содержится в:
Tony Tye
2021-01-10 12:17:06 +00:00
родитель 4c16051f6d
Коммит c7e8d91e14
30 изменённых файлов: 1020 добавлений и 969 удалений
+172 -1
Просмотреть файл
@@ -23,6 +23,11 @@
#include "utils/options.hpp"
#include "comgrctx.hpp"
#include <algorithm>
#include <array>
#include <cassert>
#include <cstring>
#if defined(WITH_HSA_DEVICE)
#include "device/rocm/rocdevice.hpp"
extern amd::AppProfile* rocCreateAppProfile();
@@ -65,6 +70,11 @@ extern void DeviceUnload();
#include <algorithm>
#include <numeric>
namespace {
constexpr char hsaIsaNamePrefix[] = "amdgcn-amd-amdhsa--";
} // namespace
namespace device {
extern const char* BlitSourceCode;
@@ -77,6 +87,166 @@ bool VirtualDevice::ActiveWait() const {
namespace amd {
std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
constexpr amd::Isa::Feature NONE = amd::Isa::Feature::Unsupported;
constexpr amd::Isa::Feature ANY = amd::Isa::Feature::Any;
constexpr amd::Isa::Feature OFF = amd::Isa::Feature::Disabled;
constexpr amd::Isa::Feature ON = amd::Isa::Feature::Enabled;
static constexpr Isa supportedIsas_[] = {
// NOTE: Add new targets by adding rows for each permutation of the SRAMECC
// and XNACK target feature values. If the target does not support the
// feature then only NONE is used. If it supports the feature than include
// rows for ANY, OFF and ON (but not NONE).
//
// Use the Target ID syntax. This comprises the processor name, followed by
// the target feature settings in alphebetic order separated by ':'. If a
// target feature is omitted it means either it is not supported, or it has
// the ANY value. If the target feature is disabled then use a '-' suffix,
// and if enabled use a '+' suffix.
//
// If the HSAIL or AMD IL compilers do not support the target, then use
// nullptr for the ID.
//
// -------------------- Compiler -------------------- ------- Runtime ----- ---- IP ---- --- Target --- ---------- Target Properties ----------
// Supported Version Features Mem
// SIMD Channel LDS LDS
// SIMD/ SIMD Instr Bank Size/ Mem
// Target ID HSAIL ID AMD IL ID ROC PAL GSL Maj/Min/Stp SRAMECC XNACK CU Width Width Width CU Banks
{"gfx600", "Tahiti", "Tahiti", false, false, true, 6, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx601", "Pitcairn", "Pitcairn", false, false, true, 6, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Capeverde
{"gfx602", "Oland", "Oland", false, false, true, 6, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Hainan
{"gfx700", "Kaveri", "Kalindi", true, true, true, 7, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Spectre, Spooky, Kalindi
{"gfx701", "Hawaii", "Hawaii", true, true, true, 7, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaiipro
{"gfx702", nullptr, nullptr, true, true, true, 7, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaii (can execute Hawiipro code)
{"gfx703", nullptr, nullptr, false, false, false, 7, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Mullins
{"gfx704", "Bonaire", "Bonaire", false, true, true, 7, 0, 4, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx705", "Mullins", "Mullins", false, true, true, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari
{"gfx801", nullptr, nullptr, true, false, false, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx801:xnack-", "Carrizo", "Carrizo", true, true, true, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx801:xnack+", nullptr, nullptr, true, false, false, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx802", "Tonga", "Tonga", true, true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Iceland
{"gfx803", "Fiji", "Fiji", true, true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Ellesmere/Polaris10, Baffin/Polaris11, Polaris12, Polaris22/VegaM
{"gfx805", nullptr, nullptr, true, false, false, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Tongapro
{"gfx810", nullptr, nullptr, true, false, false, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx810:xnack-", "Stoney", "Stoney", true, true, true, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx810:xnack+", nullptr, nullptr, true, false, false, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx900", "gfx901", nullptr, true, true, false, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Greenland
{"gfx900:xnack-", "gfx900", nullptr, true, true, !IS_BRAHMA, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx900:xnack+", "gfx901", nullptr, true, true, false, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx902", "gfx903", nullptr, true, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven
{"gfx902:xnack-", "gfx902", nullptr, true, true, !IS_BRAHMA, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx902:xnack+", "gfx903", nullptr, true, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx904", "gfx905", nullptr, true, true, false, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega12
{"gfx904:xnack-", "gfx904", nullptr, true, true, !IS_BRAHMA, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx904:xnack+", "gfx905", nullptr, true, true, false, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906", "gfx907", nullptr, true, true, false, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega20
{"gfx906:sramecc-", "gfx907", nullptr, true, true, !IS_BRAHMA & false, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc+", nullptr, nullptr, true, false, false, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:xnack-", "gfx906", nullptr, true, true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:xnack+", "gfx907", nullptr, true, true, false, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc-:xnack-", "gfx906", nullptr, true, true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc-:xnack+", "gfx907", nullptr, true, true, false, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc+:xnack-", nullptr, nullptr, true, false, false, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx906:sramecc+:xnack+", nullptr, nullptr, true, false, false, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc-", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc+", nullptr, nullptr, true, false, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc-:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc-:xnack+", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc+:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx908:sramecc+:xnack+", nullptr, nullptr, true, false, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx909", nullptr, nullptr, false, false, false, 9, 0, 9, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code)
{"gfx909:xnack-", nullptr, nullptr, false, false, !IS_BRAHMA & false, 9, 0, 9, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx909:xnack+", nullptr, nullptr, false, false, false, 9, 0, 9, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c", nullptr, nullptr, false, false, false, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Renoir
{"gfx90c:xnack-", nullptr, nullptr, false, false, !IS_BRAHMA & false, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx90c:xnack+", nullptr, nullptr, false, false, false, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
{"gfx1010", nullptr, nullptr, true, false, false, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1010:xnack-", "gfx1010", nullptr, true, true, false, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1010:xnack+", nullptr, nullptr, true, false, false, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1011", nullptr, nullptr, true, false, false, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1011:xnack-", "gfx1011", nullptr, true, true, false, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1011:xnack+", nullptr, nullptr, true, false, false, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1012", nullptr, nullptr, true, false, false, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1012:xnack-", "gfx1012", nullptr, true, true, false, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1012:xnack+", nullptr, nullptr, true, false, false, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1030", nullptr, nullptr, true, false, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1031", nullptr, nullptr, true, false, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1032", nullptr, nullptr, true, false, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
{"gfx1033", nullptr, nullptr, false, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}
};
return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_));
}
std::string Isa::processorName() const {
std::string processor(targetId_);
return processor.substr(0, processor.find(':'));
}
std::string Isa::isaName() const {
return std::string(hsaIsaNamePrefix) + targetId();
}
bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) {
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
codeObjectIsa.versionStepping() != agentIsa.versionStepping())
return false;
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
agentIsa.sramecc() != Feature::Any);
if ((codeObjectIsa.sramecc() == Feature::Enabled ||
codeObjectIsa.sramecc() == Feature::Disabled) &&
codeObjectIsa.sramecc() != agentIsa.sramecc())
return false;
assert(codeObjectIsa.isXnackSupported() == agentIsa.isXnackSupported() &&
agentIsa.xnack() != Feature::Any);
if ((codeObjectIsa.xnack() == Feature::Enabled || codeObjectIsa.xnack() == Feature::Disabled) &&
codeObjectIsa.xnack() != agentIsa.xnack())
return false;
return true;
}
const Isa* Isa::findIsa(const char *isaName) {
if (!isaName)
return nullptr;
const char* prefix = std::strstr(isaName, hsaIsaNamePrefix);
if (prefix != isaName)
return nullptr;
const char *targetId = isaName + std::strlen(hsaIsaNamePrefix);
auto supportedIsas_ = supportedIsas();
auto isaIter = std::find_if(supportedIsas_.first, supportedIsas_.second, [&](const Isa& isa) {
return std::strcmp(targetId, isa.targetId_) == 0;
});
return isaIter == supportedIsas_.second ? nullptr : isaIter;
}
const Isa* Isa::findIsa(uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
Isa::Feature sramecc, Isa::Feature xnack) {
auto supportedIsas_ = supportedIsas();
auto isaIter = std::find_if(supportedIsas_.first, supportedIsas_.second, [&](const Isa& isa) {
return versionMajor == isa.versionMajor_ && versionMinor == isa.versionMinor_ &&
versionStepping == isa.versionStepping_ &&
(isa.sramecc_ == amd::Isa::Feature::Unsupported || isa.sramecc_ == sramecc) &&
(isa.xnack_ == amd::Isa::Feature::Unsupported || isa.xnack_ == xnack);
});
return isaIter == supportedIsas_.second ? nullptr : isaIter;
}
const Isa* Isa::begin() {
return supportedIsas().first;
}
const Isa* Isa::end() {
return supportedIsas().second;
}
std::vector<Device*>* Device::devices_ = nullptr;
AppProfile Device::appProfile_;
@@ -300,8 +470,9 @@ bool Device::ValidateComgr() {
return true;
}
bool Device::create() {
bool Device::create(const Isa &isa) {
assert(!vaCacheAccess_ && !vaCacheMap_);
isa_ = &isa;
vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
if (nullptr == vaCacheAccess_) {
return false;
+219 -2
Просмотреть файл
@@ -37,6 +37,7 @@
#include "acl.h"
#include "hwdebug.hpp"
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <string>
@@ -79,6 +80,7 @@ class SvmUnmapMemoryCommand;
class SvmPrefetchAsyncCommand;
class TransferBufferFileCommand;
class HwDebugManager;
class Isa;
class Device;
struct KernelParameterDescriptor;
struct Coord3D;
@@ -408,7 +410,7 @@ struct Info : public amd::EmbeddedObject {
//! Device name string
char name_[0x40];
//! Target ID string
//! Target triple plus target ID string
char targetId_[0x40];
//! Vendor name string
@@ -1238,6 +1240,214 @@ class MemObjMap : public AllStatic {
static amd::Monitor AllocatedLock_; //!< amd monitor locker
};
/// @brief Instruction Set Architecture properties.
class Isa {
public:
/// @brief Isa's target feature setting type.
enum class Feature : uint8_t {
Unsupported,
Any,
Disabled,
Enabled,
};
//! Return a non-zero uint64_t value that uniquely identifies the device.
//! This can be used when a scalar value handle to the device is require.
static uint64_t toHandle(const Isa *isa) {
static_assert(reinterpret_cast<uint64_t>(static_cast<const Isa*>(nullptr)) == 0,
"nullptr value is not 0");
static_assert(sizeof(isa) <= sizeof(uint64_t), "Handle size does not match pointer size");
return isa ? reinterpret_cast<uint64_t>(isa) : 0;
}
//! Return the device corresponding to a handle returned by Isa::handle,
//! or nullptr if the handle is 0. This can be used when a scalar value
//! handle for a device is provided.
static const Isa* fromHandle(uint64_t handle) {
static_assert(reinterpret_cast<uint64_t>(static_cast<const Isa*>(nullptr)) == 0,
"nullptr value is not 0");
static_assert(sizeof(handle) <= sizeof(uint64_t), "Handle size does not match pointer size");
return handle ? reinterpret_cast<const Isa*>(handle) : nullptr;
}
/// @returns This Isa's target triple and target ID name.
std::string isaName() const;
/// @returns This Isa's processor name.
std::string processorName() const;
/// @returns This Isa's target ID name.
const char *targetId() const {
return targetId_;
}
/// @returns This Isa's name to use with the HSAIL compiler.
const char *hsailName() const {
return hsailId_;
}
/// @returns This Isa's name to use with the AMD IL compiler.
const char *amdIlName() const {
return amdIlId_;
}
/// @returns If the ROCm runtime supports the ISA.
bool runtimeRocSupported() const {
return runtimeRocSupported_;
}
/// @returns If the PAL runtime supports the ISA.
bool runtimePalSupported() const {
return runtimePalSupported_;
}
/// @returns If the GSL runtime supports the ISA.
bool runtimeGslSupported() const {
return runtimeGslSupported_;
}
/// @returns SRAM ECC feature status.
const Feature &sramecc() const {
return sramecc_;
}
/// @returns XNACK feature status.
const Feature &xnack() const {
return xnack_;
}
/// @returns True if SRAMECC feature is supported, false otherwise.
bool isSrameccSupported() const {
return sramecc_ != Feature::Unsupported;
}
/// @returns True if XNACK feature is supported, false otherwise.
bool isXnackSupported() const {
return xnack_ != Feature::Unsupported;
}
/// @returns This Isa's major version.
uint32_t versionMajor() const {
return versionMajor_;
}
/// @returns This Isa's minor version.
uint32_t versionMinor() const {
return versionMinor_;
}
/// @returns This Isa's stepping version.
uint32_t versionStepping() const {
return versionStepping_;
}
/// @returns This Isa's number of SIMDs per CU.
uint32_t simdPerCU() const {
return simdPerCU_;
}
/// @returns This Isa's
uint32_t simdWidth() const {
return simdWidth_;
}
/// @returns This Isa's number of instructions processed per SIMD.
uint32_t simdInstructionWidth() const {
return simdInstructionWidth_;
}
/// @returns This Isa's memory channel bank width.
uint32_t memChannelBankWidth() const {
return memChannelBankWidth_;
}
/// @returns This Isa's local memory size per CU.
uint32_t localMemSizePerCU() const {
return localMemSizePerCU_;
}
/// @returns This Isa's number of banks of local memory.
uint32_t localMemBanks() const {
return localMemBanks_;
}
/// @returns True if @p codeObjectIsa and @p agentIsa are compatible,
/// false otherwise.
static bool isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa);
/// @returns Isa for requested @p isaName, null pointer if not supported.
static const Isa* findIsa(const char *isaName);
/// @returns Isa for requested @p version, null pointer if not supported.
static const Isa* findIsa(uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
Feature sramecc = Feature::Any, Feature xnack = Feature::Any);
/// @returns Iterator for first isa.
static const Isa* begin();
/// @returns Iterator for one past the end isa.
static const Isa* end();
private:
constexpr Isa(const char* targetId, const char* hsailId, const char* amdIlId,
bool runtimeRocSupported, bool runtimePalSupported, bool runtimeGslSupported,
uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
Feature sramecc, Feature xnack, uint32_t simdPerCU, uint32_t simdWidth,
uint32_t simdInstructionWidth, uint32_t memChannelBankWidth,
uint32_t localMemSizePerCU, uint32_t localMemBanks)
: targetId_(targetId),
hsailId_(hsailId),
amdIlId_(amdIlId),
runtimeRocSupported_(runtimeRocSupported),
runtimePalSupported_(runtimePalSupported),
runtimeGslSupported_(runtimeGslSupported),
versionMajor_(versionMajor),
versionMinor_(versionMinor),
versionStepping_(versionStepping),
sramecc_(sramecc),
xnack_(xnack),
simdPerCU_(simdPerCU),
simdWidth_(simdWidth),
simdInstructionWidth_(simdInstructionWidth),
memChannelBankWidth_(memChannelBankWidth),
localMemSizePerCU_(localMemSizePerCU),
localMemBanks_(localMemBanks) {}
// @brief Returns the begin and end iterators for the suppported ISAs.
static std::pair<const Isa*, const Isa*> supportedIsas();
// @brief Isa's target ID name. Used for LLVM COde Object Manager
// compilations.
const char* targetId_;
// @brief Isa's HSAIL name. Used for the Compiler Library for HSAIL
// compilation using the Shader Compiler Finalizer. Empty string if
// unsupported.
const char* hsailId_;
// @brief Isa's AMD IL name. Used for the Compiler Library for AMD IL
// compilation using the Shader Compiler. Empty string if unsupported.
const char* amdIlId_;
bool runtimeRocSupported_; //!< ROCm runtime is supported.
bool runtimePalSupported_; //!< PAL runtime is supported.
bool runtimeGslSupported_; //!< GSL runtime is supported.
uint32_t versionMajor_; //!< Isa's major version.
uint32_t versionMinor_; //!< Isa's minor version.
uint32_t versionStepping_; //!< Isa's stepping version.
Feature sramecc_; //!< SRAMECC feature.
Feature xnack_; //!< XNACK feature.
uint32_t simdPerCU_; //!< Number of SIMDs per CU.
uint32_t simdWidth_; //!< Number of workitems processed per SIMD.
uint32_t simdInstructionWidth_; //!< Number of instructions processed per SIMD.
uint32_t memChannelBankWidth_; //!< Memory channel bank width.
uint32_t localMemSizePerCU_; //!< Local memory size per CU.
uint32_t localMemBanks_; //!< Number of banks of local memory.
}; // class Isa
/*! \addtogroup Runtime
* @{
*
@@ -1300,7 +1510,7 @@ class Device : public RuntimeObject {
virtual ~Device();
//! Initializes abstraction layer device object
bool create();
bool create(const Isa &isa);
uint retain() {
// Overwrite the RuntimeObject::retain().
@@ -1476,6 +1686,12 @@ class Device : public RuntimeObject {
//! Returns TRUE if the device is available for computations
bool isOnline() const { return online_; }
//! Returns device isa.
const Isa &isa() const {
assert(isa_);
return *isa_;
}
//! Return a non-zero uint64_t value that uniquely identifies the device.
//! This can be used when a scalar value handle to the device is require.
static uint64_t toHandle(const Device *device) {
@@ -1611,6 +1827,7 @@ class Device : public RuntimeObject {
static Memory* p2p_stage_; //!< Staging resources
private:
const Isa *isa_; //!< Device isa
bool IsTypeMatching(cl_device_type type, bool offlineDevices);
#if defined(WITH_HSA_DEVICE)
+1 -1
Просмотреть файл
@@ -1166,7 +1166,7 @@ bool Kernel::SetAvailableSgprVgpr() {
bool hasVgprMeta = false;
amd_comgr_status_t status = amd::Comgr::get_isa_metadata(
prog().device().info().targetId_, &isaMeta);
prog().device().isa().isaName().c_str(), &isaMeta);
if (status == AMD_COMGR_STATUS_SUCCESS) {
hasIsaMeta = true;
+26 -15
Просмотреть файл
@@ -82,7 +82,6 @@ Program::Program(amd::Device& device, amd::Program& owner)
lastBuildOptionsArg_(),
buildStatus_(CL_BUILD_NONE),
buildError_(CL_SUCCESS),
machineTarget_(nullptr),
globalVariableTotalSize_(0),
programOptions_(nullptr)
{
@@ -286,7 +285,7 @@ amd_comgr_status_t Program::createAction(const amd_comgr_language_t oclver,
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
status = amd::Comgr::action_info_set_isa_name(*action, device().info().targetId_);
status = amd::Comgr::action_info_set_isa_name(*action, device().isa().isaName().c_str());
}
if (status == AMD_COMGR_STATUS_SUCCESS) {
@@ -719,8 +718,14 @@ bool Program::compileImplHSAIL(const std::string& sourceCode,
acl_error errorCode;
aclTargetInfo target;
std::string arch = LP64_SWITCH("hsail", "hsail64");
target = aclGetTargetInfo(arch.c_str(), machineTarget_, &errorCode);
const char* arch = LP64_SWITCH("hsail", "hsail64");
const char* hsailName = device().isa().hsailName();
if (!hsailName) {
// HSAIL compiler does not support device's ISA.
LogPrintfError("HSAIL compiler does not support %s", device().isa().targetId());
return false;
}
target = aclGetTargetInfo(arch, hsailName, &errorCode);
// end if asic info is ready
// We dump the source code for each program (param: headers)
@@ -1107,7 +1112,7 @@ bool Program::linkImplLC(amd::option::Options* options) {
linkOptions.push_back("correctly_rounded_sqrt");
}
if (options->oVariables->DenormsAreZero || AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
(device().info().gfxipMajor_ < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) {
(device().isa().versionMajor() < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) {
linkOptions.push_back("daz_opt");
}
if (options->oVariables->FiniteMathOnly || options->oVariables->FastRelaxedMath) {
@@ -1365,9 +1370,7 @@ bool Program::initBuild(amd::option::Options* options) {
return false;
}
const char* devName = machineTarget_;
options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
clBinary()->getEncryptCode(), true);
options->setPerBuildInfo(device().isa().targetId(), clBinary()->getEncryptCode(), true);
// Elf Binary setup
std::string outFileName;
@@ -1703,17 +1706,26 @@ int32_t Program::build(const std::string& sourceCode, const char* origOptions,
// ================================================================================================
std::vector<std::string> Program::ProcessOptions(amd::option::Options* options) {
std::string scratchStr;
std::vector<std::string> optionsVec;
if (!isLC()) {
optionsVec.push_back("-D__AMD__=1");
scratchStr.clear();
optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("__=1"));
std::string processorName = device().isa().processorName();
const char* hsailName = device().isa().hsailName();
const char* amdIlName = device().isa().amdIlName();
scratchStr.clear();
optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("=1"));
optionsVec.push_back(std::string("-D__") + processorName + "__=1");
optionsVec.push_back(std::string("-D__") + processorName + "=1");
if (hsailName && (strcmp(hsailName, processorName.c_str()) != 0)) {
optionsVec.push_back(std::string("-D__") + hsailName + "__=1");
optionsVec.push_back(std::string("-D__") + hsailName + "=1");
}
if (amdIlName && (strcmp(amdIlName, processorName.c_str()) != 0) &&
(!hsailName || strcmp(amdIlName, hsailName) != 0)) {
optionsVec.push_back(std::string("-D__") + amdIlName + "__=1");
optionsVec.push_back(std::string("-D__") + amdIlName + "=1");
}
// Set options for the standard device specific options
// All our devices support these options now
@@ -1785,8 +1797,7 @@ std::vector<std::string> Program::ProcessOptions(amd::option::Options* options)
}
} else {
for (auto e : extensions) {
scratchStr.clear();
optionsVec.push_back(scratchStr.append("-D").append(e).append("=1"));
optionsVec.push_back(std::string("-D") + e + "=1");
}
}
}
+1 -5
Просмотреть файл
@@ -117,7 +117,6 @@ class Program : public amd::HeapObject {
int32_t buildStatus_; //!< build status.
int32_t buildError_; //!< build error
const char* machineTarget_; //!< Machine target for this program
aclTargetInfo info_; //!< The info target for this binary.
size_t globalVariableTotalSize_;
amd::option::Options* programOptions_;
@@ -233,9 +232,6 @@ class Program : public amd::HeapObject {
const uint32_t codeObjectVer() const { return codeObjectVer_; }
#endif
//! Get the machine target for the program
const char* machineTarget() const { return machineTarget_; }
//! Check if program is HIP based
const bool isHIP() const { return (isHIP_ == 1); }
@@ -293,7 +289,7 @@ class Program : public amd::HeapObject {
void releaseClBinary();
//! return target info
virtual const aclTargetInfo& info(const char* str = "") = 0;
virtual const aclTargetInfo& info() = 0;
virtual bool setKernels(
amd::option::Options* options, void* binary, size_t binSize,
+1 -1
Просмотреть файл
@@ -66,7 +66,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
if (platform == amd::Elf::COMPLIB_PLATFORM) {
// BIF 3.0
uint32_t flag;
aclTargetInfo tgtInfo = aclGetTargetInfo("amdil", dev().hwInfo()->targetName_, NULL);
aclTargetInfo tgtInfo = aclGetTargetInfo("amdil", dev().isa().amdIlName(), NULL);
if (!elfIn()->getFlags(flag)) {
LogError("The OCL binary image loading failed: incorrect format");
return false;
-151
Просмотреть файл
@@ -80,157 +80,6 @@ static constexpr uint HsaSamplerObjectAlignment = 16;
//! HSA path specific defines for images
static constexpr uint DeviceQueueMaskSize = 32;
//! Defines all supported ASIC families
enum AsicFamilies { Family7xx, Family8xx, FamilyTotal };
// FIXME: Change to use 2 digit for major/minor/stepping.
enum gfx_handle {
gfx600 = 600,
gfx601 = 601,
gfx602 = 602,
gfx700 = 700,
gfx701 = 701,
gfx702 = 702,
gfx704 = 704,
gfx705 = 705,
gfx800 = 800,
gfx801 = 801,
gfx802 = 802,
gfx803 = 803,
gfx810 = 810,
gfx900 = 900,
gfx902 = 902,
gfx904 = 904,
gfx906 = 906,
gfx909 = 909,
gfx90c = 9012
};
// FIXME: Does this need updating?
struct AMDDeviceInfo {
uint machine_; //!< Machine target ID
const char* targetName_; //!< Target name
const char* machineTarget_; //!< Machine target
uint simdPerCU_; //!< Number of SIMDs per CU
uint simdWidth_; //!< Number of workitems processed per SIMD
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
uint memChannelBankWidth_; //!< Memory channel bank width
uint localMemSizePerCU_; //!< Local memory size per CU
uint localMemBanks_; //!< Number of banks of local memory
uint gfxipVersion_; //!< The core engine GFXIP version
uint gfxipMajor_; //!< The core engine GFXIP Major version
uint gfxipMinor_; //!< The core engine GFXIP Minor version
uint gfxipStepping_; //!< The core engine GFXIP Stepping version
};
static constexpr AMDDeviceInfo DeviceInfo[] = {
// Machine targetName machineTarget
/* CAL_TARGET_600 */ {ED_ATI_CAL_MACHINE_R600_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_610 */ {ED_ATI_CAL_MACHINE_R610_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_630 */ {ED_ATI_CAL_MACHINE_R630_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_670 */ {ED_ATI_CAL_MACHINE_R670_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_7XX */ {ED_ATI_CAL_MACHINE_R770_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_770 */ {ED_ATI_CAL_MACHINE_R770_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_710 */ {ED_ATI_CAL_MACHINE_R710_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_730 */ {ED_ATI_CAL_MACHINE_R730_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
/* CAL_TARGET_CYPRESS */ {ED_ATI_CAL_MACHINE_CYPRESS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32,
400, 4, 0, 0},
/* CAL_TARGET_JUNIPER */ {ED_ATI_CAL_MACHINE_JUNIPER_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32,
400, 4, 0, 0},
/* CAL_TARGET_REDWOOD */ {ED_ATI_CAL_MACHINE_REDWOOD_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16,
400, 4, 0, 0},
/* CAL_TARGET_CEDAR */ {ED_ATI_CAL_MACHINE_CEDAR_ISA, "", "", 1, 8, 5, 256, 32 * Ki, 16, 400, 4, 0, 0},
/* CAL_TARGET_SUMO */ {ED_ATI_CAL_MACHINE_SUMO_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16, 400, 4, 0, 0},
/* CAL_TARGET_SUPERSUMO*/ {ED_ATI_CAL_MACHINE_SUPERSUMO_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16,
400, 4, 0, 0},
/* CAL_TARGET_WRESTLER*/ {ED_ATI_CAL_MACHINE_WRESTLER_ISA, "", "", 1, 8, 5, 256, 32 * Ki, 16,
400, 4, 0, 0},
/* CAL_TARGET_CAYMAN */ {ED_ATI_CAL_MACHINE_CAYMAN_ISA, "", "", 1, 16, 4, 256, 32 * Ki, 32,
500, 5, 0, 0},
/* CAL_TARGET_KAUAI */ {ED_ATI_CAL_MACHINE_KAUAI_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0},
/* CAL_TARGET_BARTS */ {ED_ATI_CAL_MACHINE_BARTS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0},
/* CAL_TARGET_TURKS */ {ED_ATI_CAL_MACHINE_TURKS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0},
/* CAL_TARGET_CAICOS */ {ED_ATI_CAL_MACHINE_CAICOS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32,
400, 4, 0, 0},
/* CAL_TARGET_TAHITI */ {ED_ATI_CAL_MACHINE_TAHITI_ISA, "Tahiti", "tahiti", 4, 16, 1, 256,
64 * Ki, 32, gfx600, 6, 0, 0},
/* CAL_TARGET_PITCAIRN */ {ED_ATI_CAL_MACHINE_PITCAIRN_ISA, "Pitcairn", "pitcairn", 4, 16, 1,
256, 64 * Ki, 32, gfx601, 6, 0, 1},
/* CAL_TARGET_CAPEVERDE */ {ED_ATI_CAL_MACHINE_CAPEVERDE_ISA, "Capeverde", "capeverde", 4, 16,
1, 256, 64 * Ki, 32, gfx601, 6, 0, 1},
/* CAL_TARGET_DEVASTATOR */ {ED_ATI_CAL_MACHINE_DEVASTATOR_ISA, "", "", 1, 16, 4, 256, 32 * Ki,
32, 500, 5, 0, 0},
/* CAL_TARGET_SCRAPPER */ {ED_ATI_CAL_MACHINE_SCRAPPER_ISA, "", "", 1, 16, 4, 256, 32 * Ki, 32,
500, 5, 0, 0},
/* CAL_TARGET_OLAND */ {ED_ATI_CAL_MACHINE_OLAND_ISA, "Oland", "oland", 4, 16, 1, 256, 64 * Ki,
32, gfx602, 6, 0, 2},
/* CAL_TARGET_BONAIRE */ {ED_ATI_CAL_MACHINE_BONAIRE_ISA, "Bonaire", "bonaire", 4, 16, 1, 256,
64 * Ki, 32, gfx704, 7, 0, 4},
/* CAL_TARGET_SPECTRE */ {ED_ATI_CAL_MACHINE_SPECTRE_ISA, "Spectre", "spectre", 4, 16, 1, 256,
64 * Ki, 32, gfx700, 7, 0, 0},
/* CAL_TARGET_SPOOKY */ {ED_ATI_CAL_MACHINE_SPOOKY_ISA, "Spooky", "spooky", 4, 16, 1, 256,
64 * Ki, 32, gfx700, 7, 0, 0},
/* CAL_TARGET_KALINDI */ {ED_ATI_CAL_MACHINE_KALINDI_ISA, "Kalindi", "kalindi", 4, 16, 1, 256,
64 * Ki, 32, gfx700, 7, 0, 0},
/* CAL_TARGET_HAINAN */ {ED_ATI_CAL_MACHINE_HAINAN_ISA, "Hainan", "hainan", 4, 16, 1, 256,
64 * Ki, 32, gfx602, 6, 0, 2},
/* CAL_TARGET_HAWAII */ {ED_ATI_CAL_MACHINE_HAWAII_ISA, "Hawaii", "hawaii", 4, 16, 1, 256,
64 * Ki, 32, gfx701, 7, 0, 1},
/* CAL_TARGET_ICELAND */ {ED_ATI_CAL_MACHINE_ICELAND_ISA, "Iceland", "iceland", 4, 16, 1, 256,
64 * Ki, 32, gfx802, 8, 0, 2},
/* CAL_TARGET_TONGA */ {ED_ATI_CAL_MACHINE_TONGA_ISA, "Tonga", "tonga", 4, 16, 1, 256, 64 * Ki,
32, gfx802, 8, 0, 2},
/* CAL_TARGET_MULLINS */ {ED_ATI_CAL_MACHINE_GODAVARI_ISA, "Mullins", "mullins", 4, 16, 1, 256,
64 * Ki, 32, gfx705, 7, 0, 5},
/* CAL_TARGET_FIJI */ {ED_ATI_CAL_MACHINE_FIJI_ISA, "Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32,
gfx803, 8, 0, 3},
/* CAL_TARGET_CARRIZO */ {ED_ATI_CAL_MACHINE_CARRIZO_ISA, "Carrizo", "carrizo", 4, 16, 1, 256,
64 * Ki, 32, gfx801, 8, 0, 1},
/* CAL_TARGET_ELLESMERE */ {ED_ATI_CAL_MACHINE_ELLESMERE_ISA, "Ellesmere", "ellesmere", 4, 16,
1, 256, 64 * Ki, 32, gfx803, 8, 0, 3},
/* CAL_TARGET_BAFFIN */ {ED_ATI_CAL_MACHINE_BAFFIN_ISA, "Baffin", "baffin", 4, 16, 1, 256,
64 * Ki, 32, gfx803, 8, 0, 3},
/* CAL_TARGET_GREENLAND */ {ED_ATI_CAL_MACHINE_GREENLAND_ISA, IF(IS_BRAHMA, "", "gfx900"),
IF(IS_BRAHMA, "", "gfx900"), 4, 16, 1, 256, 64 * Ki, 32, gfx900, 9, 0, 0},
/* CAL_TARGET_STONEY */ {ED_ATI_CAL_MACHINE_STONEY_ISA, "Stoney", "stoney", 4, 16, 1, 256,
64 * Ki, 32, gfx810, 8, 1, 0},
/* CAL_TARGET_LEXA */ {ED_ATI_CAL_MACHINE_LEXA_ISA, "gfx803", "gfx803", 4, 16, 1, 256, 64 * Ki,
32, gfx803, 8, 0, 3},
/* CAL_TARGET_RAVEN */ {ED_ATI_CAL_MACHINE_RAVEN_ISA, IF(IS_BRAHMA, "", "gfx902"),
IF(IS_BRAHMA, "", "gfx902"), 4, 16, 1, 256, 64 * Ki, 32, gfx902, 9, 0, 2},
/* CAL_TARGET_RAVEN2 */ {ED_ATI_CAL_MACHINE_RAVEN2_ISA, IF(IS_BRAHMA, "", "gfx909"),
IF(IS_BRAHMA, "", "gfx902"), 4, 16, 1, 256, 64 * Ki, 32, gfx909, 9, 0, 9},
/* CAL_TARGET_RENOIR */{ ED_ATI_CAL_MACHINE_RENOIR_ISA, IF(IS_BRAHMA, "", "gfx90c"),
IF(IS_BRAHMA, "", "gfx90c"), 4, 16, 1, 256, 64 * Ki, 32, gfx90c, 9, 0, 12},
/* CAL_TARGET_POLARIS22 */ {ED_ATI_CAL_MACHINE_POLARIS22_ISA, IF(IS_BRAHMA, "", "gfx803"),
IF(IS_BRAHMA, "", "gfx803"), 4, 16, 1, 256, 64 * Ki, 32, gfx803, 8, 0, 3},
/* CAL_TARGET_VEGA12 */{ ED_ATI_CAL_MACHINE_VEGA12_ISA, IF(IS_BRAHMA, "", "gfx904"),
IF(IS_BRAHMA, "", "gfx904"), 4, 16, 1, 256, 64 * Ki, 32, gfx904, 9, 0, 4},
/* CAL_TARGET_VEGA20 */{ ED_ATI_CAL_MACHINE_VEGA20_ISA, IF(IS_BRAHMA, "", "gfx906"),
IF(IS_BRAHMA, "", "gfx906"), 4, 16, 1, 256, 64 * Ki, 32, gfx906, 9, 0, 6 },
};
// FIXME: These need updating to new Target ID format. Or is all this code nw
// obsolete and should be deleted? How is XNACK and SRAMECC settings supported?
static constexpr const char* Gfx600 = "amdgcn-amd-amdhsa--gfx600";
static constexpr const char* Gfx601 = "amdgcn-amd-amdhsa--gfx601";
static constexpr const char* Gfx602 = "amdgcn-amd-amdhsa--gfx602";
static constexpr const char* Gfx700 = "amdgcn-amd-amdhsa--gfx700";
static constexpr const char* Gfx701 = "amdgcn-amd-amdhsa--gfx701";
static constexpr const char* Gfx702 = "amdgcn-amd-amdhsa--gfx702";
static constexpr const char* Gfx704 = "amdgcn-amd-amdhsa--gfx704";
static constexpr const char* Gfx705 = "amdgcn-amd-amdhsa--gfx705";
static constexpr const char* Gfx801 = "amdgcn-amd-amdhsa--gfx801:xnack+";
static constexpr const char* Gfx802 = "amdgcn-amd-amdhsa--gfx802";
static constexpr const char* Gfx803 = "amdgcn-amd-amdhsa--gfx803";
static constexpr const char* Gfx810 = "amdgcn-amd-amdhsa--gfx810:xnack+";
static constexpr const char* Gfx900 = "amdgcn-amd-amdhsa--gfx900:xnack-";
static constexpr const char* Gfx902 = "amdgcn-amd-amdhsa--gfx902:xnack+";
static constexpr const char* Gfx904 = "amdgcn-amd-amdhsa--gfx904:xnack-";
static constexpr const char* Gfx906 = "amdgcn-amd-amdhsa--gfx906:xnack-";
static constexpr const char* Gfx909 = "amdgcn-amd-amdhsa--gfx909:xnack+";
static constexpr const char* Gfx90c = "amdgcn-amd-amdhsa--gfx90c:xnack+";
// Supported OpenCL versions
enum OclVersion { OpenCL10, OpenCL11, OpenCL12, OpenCL20, OpenCL21 };
+195 -118
Просмотреть файл
@@ -58,6 +58,93 @@
#include <iostream>
#include <sstream>
namespace {
//! Define the mapping from CAL asic enumeration values to the
//! compiler gfx major/minor/stepping version.
struct CalDevice {
uint32_t gfxipMajor_; //!< The core engine GFXIP Major version
uint32_t gfxipMinor_; //!< The core engine GFXIP Minor version
uint32_t gfxipStepping_; //!< The core engine GFXIP Stepping version
CALMachineType calMachine_; //!< CAL machine type
const char* calName_; //!< CAL device name
CALtarget calTarget_; //!< CAL target
bool preferPal_; //!< Prefer to use PAL if GPU_ENABLE_PAL=2
bool nullUseDouble_; //!< Use double precision for a NullDevice
bool nullUseOpenCL200_; //!< Use OpenCL 2.0 for a NullDevice
};
static constexpr CalDevice supportedCalDevices[] = {
// Prefer - NullDevice -
// GFX Version GSL Machine CAL Name CAL Target PAL double OCL200
{6, 0, 0, ED_ATI_CAL_MACHINE_TAHITI_ISA, "Tahiti", CAL_TARGET_TAHITI, false, true, false},
{6, 0, 1, ED_ATI_CAL_MACHINE_PITCAIRN_ISA, "Pitcairn", CAL_TARGET_PITCAIRN, false, true, false},
{6, 0, 1, ED_ATI_CAL_MACHINE_CAPEVERDE_ISA, "Capeverde", CAL_TARGET_CAPEVERDE, false, true, false},
{6, 0, 2, ED_ATI_CAL_MACHINE_OLAND_ISA, "Oland", CAL_TARGET_OLAND, false, true, false},
{6, 0, 2, ED_ATI_CAL_MACHINE_HAINAN_ISA, "Hainan", CAL_TARGET_HAINAN, false, true, false},
{7, 0, 0, ED_ATI_CAL_MACHINE_KALINDI_ISA, "Kalindi", CAL_TARGET_KALINDI, false, true, true },
{7, 0, 0, ED_ATI_CAL_MACHINE_SPECTRE_ISA, "Spectre", CAL_TARGET_SPECTRE, false, true, true },
{7, 0, 0, ED_ATI_CAL_MACHINE_SPOOKY_ISA, "Spooky", CAL_TARGET_SPOOKY, false, true, true },
{7, 0, 2, ED_ATI_CAL_MACHINE_HAWAII_ISA, "Hawaii", CAL_TARGET_HAWAII, false, true, true }, // Also Hawaiipro (generated code is for Hawaiipro)
{7, 0, 4, ED_ATI_CAL_MACHINE_BONAIRE_ISA, "Bonaire", CAL_TARGET_BONAIRE, false, true, true },
{7, 0, 5, ED_ATI_CAL_MACHINE_GODAVARI_ISA, "Mullins", CAL_TARGET_GODAVARI, false, true, true }, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins.
{8, 0, 1, ED_ATI_CAL_MACHINE_CARRIZO_ISA, "Carrizo", CAL_TARGET_CARRIZO, false, true, true }, // Also Bristol Ridge
{8, 0, 2, ED_ATI_CAL_MACHINE_ICELAND_ISA, "Iceland", CAL_TARGET_ICELAND, false, true, true },
{8, 0, 2, ED_ATI_CAL_MACHINE_TONGA_ISA, "Tonga", CAL_TARGET_TONGA, false, true, true }, // Also Tongapro (generated code is for Tonga)
{8, 0, 3, ED_ATI_CAL_MACHINE_FIJI_ISA, "Fiji", CAL_TARGET_FIJI, false, true, true },
{8, 0, 3, ED_ATI_CAL_MACHINE_ELLESMERE_ISA, "Ellesmere", CAL_TARGET_ELLESMERE, false, true, true }, // Polaris10
{8, 0, 3, ED_ATI_CAL_MACHINE_BAFFIN_ISA, "Baffin", CAL_TARGET_BAFFIN, false, true, true }, // Polaris11
{8, 0, 3, ED_ATI_CAL_MACHINE_LEXA_ISA, "gfx803", CAL_TARGET_LEXA, false, true, true }, // Polaris12
#if !defined(BRAHMA)
{8, 0, 3, ED_ATI_CAL_MACHINE_POLARIS22_ISA, "gfx803", CAL_TARGET_POLARIS22, false, true, true },
#endif
{8, 1, 0, ED_ATI_CAL_MACHINE_STONEY_ISA, "Stoney", CAL_TARGET_STONEY, false, true, true },
#if !defined(BRAHMA)
{9, 0, 0, ED_ATI_CAL_MACHINE_GREENLAND_ISA, "gfx900", CAL_TARGET_GREENLAND, true, true, true }, // Vega10
{9, 0, 2, ED_ATI_CAL_MACHINE_RAVEN_ISA, "gfx902", CAL_TARGET_RAVEN, true, true, true },
{9, 0, 4, ED_ATI_CAL_MACHINE_VEGA12_ISA, "gfx904", CAL_TARGET_VEGA12, true, true, true },
{9, 0, 6, ED_ATI_CAL_MACHINE_VEGA20_ISA, "gfx906", CAL_TARGET_VEGA20, true, true, true },
{9, 0, 9, ED_ATI_CAL_MACHINE_RAVEN2_ISA, "gfx909", CAL_TARGET_RAVEN2, true, true, true },
{9, 0, 12, ED_ATI_CAL_MACHINE_RENOIR_ISA, "gfx90c", CAL_TARGET_RENOIR, true, true, true },
#endif
};
static_assert(CAL_TARGET_LAST == CAL_TARGET_VEGA20, "Add new CAL targets to mapping");
static std::tuple<const amd::Isa*, CALMachineType, const char*, bool, bool, bool> findIsa(
CALtarget calTarget, bool sramecc, bool xnack) {
auto calDeviceIter =
std::find_if(std::begin(supportedCalDevices), std::end(supportedCalDevices),
[&](const CalDevice& calDevice) { return calDevice.calTarget_ == calTarget; });
if (calDeviceIter == std::end(supportedCalDevices)) {
return std::make_tuple(nullptr, static_cast<CALMachineType>(0), nullptr, false, false, false);
}
const amd::Isa* isa = amd::Isa::findIsa(
calDeviceIter->gfxipMajor_, calDeviceIter->gfxipMinor_, calDeviceIter->gfxipStepping_,
sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled,
xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled);
return std::make_tuple(isa, calDeviceIter->calMachine_, calDeviceIter->calName_,
calDeviceIter->preferPal_, calDeviceIter->nullUseDouble_,
calDeviceIter->nullUseOpenCL200_);
}
static std::tuple<bool, CALMachineType, CALtarget, const char*, bool, bool, bool> findCal(
uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxipStepping) {
auto calDeviceIter = std::find_if(std::begin(supportedCalDevices), std::end(supportedCalDevices),
[&](const CalDevice& calDevice) {
return calDevice.gfxipMajor_ == gfxipMajor &&
calDevice.gfxipMinor_ == gfxipMinor &&
calDevice.gfxipStepping_ == gfxipStepping;
});
if (calDeviceIter == std::end(supportedCalDevices)) {
return std::make_tuple(false, static_cast<CALMachineType>(0), static_cast<CALtarget>(0),
nullptr, false, false, false);
}
return std::make_tuple(true, calDeviceIter->calMachine_, calDeviceIter->calTarget_,
calDeviceIter->calName_, calDeviceIter->preferPal_,
calDeviceIter->nullUseDouble_, calDeviceIter->nullUseOpenCL200_);
}
} // namespace
bool DeviceLoad() {
bool ret = false;
@@ -79,126 +166,102 @@ aclCompiler* NullDevice::hsaCompiler_;
AppProfile Device::appProfile_;
NullDevice::NullDevice()
: amd::Device(), calTarget_(static_cast<CALtarget>(0)), hwInfo_(NULL) {}
: amd::Device(),
calTarget_(static_cast<CALtarget>(0)),
calMachine_(static_cast<CALMachineType>(0)),
calName_(nullptr) {}
bool NullDevice::init() {
std::vector<Device*> devices;
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
// Loop through all supported devices and create each of them
for (uint id = CAL_TARGET_TAHITI; id <= CAL_TARGET_LAST; ++id) {
bool foundActive = false;
bool foundDuplicate = false;
if (gpu::DeviceInfo[id].targetName_[0] == '\0') {
// Create offline devices for all ISAs not already associated with an online
// device. This allows code objects to be compiled for all supported ISAs.
std::vector<Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, false);
for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) {
if (!isa->runtimeGslSupported()) {
continue;
}
// Loop through all active devices and see if we match one
for (uint i = 0; i < devices.size(); ++i) {
if (static_cast<NullDevice*>(devices[i])->calTarget() == static_cast<CALtarget>(id)) {
foundActive = true;
bool isOnline = false;
// Check if the particular device is online
for (size_t i = 0; i < devices.size(); i++) {
if (&(devices[i]->isa()) == isa) {
isOnline = true;
break;
}
}
// Don't report an offline device if it's active
if (foundActive) {
if (isOnline) {
continue;
}
// Loop through all previous devices in the DeviceInfo list and compare them with the
// current entry to see if the current entry was listed previously in the DeviceInfo,
// if so, then it means the current entry already has been added in the offline device list
for (uint j = 0; j < id; ++j) {
if (gpu::DeviceInfo[j].targetName_[0] == '\0') {
continue;
}
if (strcmp(gpu::DeviceInfo[j].targetName_, gpu::DeviceInfo[id].targetName_) == 0) {
foundDuplicate = true;
break;
}
bool found;
CALMachineType calMachine;
CALtarget calTarget;
const char* calName;
bool preferPal;
bool nullUseDouble;
bool nullUseOpenCL200;
std::tie(found, calMachine, calTarget, calName, preferPal, nullUseDouble, nullUseOpenCL200) =
findCal(isa->versionMajor(), isa->versionMinor(), isa->versionStepping());
if (!found) {
// GSL does not support this asic.
continue;
}
// Don't report an offline device twice
if (foundDuplicate) {
continue;
std::unique_ptr<NullDevice> nullDevice(new NullDevice());
if (!nullDevice) {
LogPrintfError("Error allocating new instance of offline CAL Device %s", isa->targetId());
return false;
}
NullDevice* dev = new NullDevice();
if (NULL != dev) {
if (!dev->create(static_cast<CALtarget>(id))) {
delete dev;
} else {
dev->registerDevice();
}
if (!nullDevice->create(calName, *isa, calTarget, preferPal, nullUseDouble, nullUseOpenCL200)) {
// Skip over unsupported devices
LogPrintfError("Skipping creating new instance of offline CAL Device %s", isa->targetId());
continue;
}
nullDevice.release()->registerDevice();
}
return true;
}
bool NullDevice::create(CALtarget target) {
CALdeviceattribs calAttr = {0};
gslMemInfo memInfo = {0};
online_ = false;
calTarget_ = calAttr.target = target;
hwInfo_ = &DeviceInfo[calTarget_];
assert((target >= CAL_TARGET_TAHITI) && (target != CAL_TARGET_SCRAPPER) &&
(target != CAL_TARGET_DEVASTATOR));
if ((GPU_ENABLE_PAL == 2) && usePal()) {
bool NullDevice::create(const char* calName, const amd::Isa& isa, CALtarget target,
bool preferPal, bool doublePrecision, bool openCL200) {
if (!isa.runtimeGslSupported()) {
LogPrintfError("Offline CAL device %s is not supported", isa.targetId());
return false;
}
if ((GPU_ENABLE_PAL == 2) && isa.runtimePalSupported() && preferPal) {
LogPrintfError("Skipping as GPU_ENABLE_PAL=2 indicating to use PAL for offline CAL device %s",
isa.targetId());
return false;
}
online_ = false;
calTarget_ = target;
calName_ = calName;
// sets up vaCacheAccess_ and vaCacheMap_.
if (!amd::Device::create(isa)) {
LogPrintfError("Unable to setup offline device for CAL device %s", isa.targetId());
return false;
}
CALdeviceattribs calAttr = {0};
calAttr.target = calTarget();
// Force double if it could be supported
switch (target) {
case CAL_TARGET_PITCAIRN:
case CAL_TARGET_CAPEVERDE:
case CAL_TARGET_TAHITI:
case CAL_TARGET_OLAND:
case CAL_TARGET_HAINAN:
calAttr.doublePrecision = CAL_TRUE;
break;
case CAL_TARGET_BONAIRE:
case CAL_TARGET_SPECTRE:
case CAL_TARGET_SPOOKY:
case CAL_TARGET_KALINDI:
case CAL_TARGET_HAWAII:
case CAL_TARGET_ICELAND:
case CAL_TARGET_TONGA:
case CAL_TARGET_FIJI:
case CAL_TARGET_GODAVARI:
case CAL_TARGET_CARRIZO:
case CAL_TARGET_ELLESMERE:
case CAL_TARGET_BAFFIN:
case CAL_TARGET_GREENLAND:
case CAL_TARGET_STONEY:
case CAL_TARGET_LEXA:
case CAL_TARGET_RAVEN:
case CAL_TARGET_RAVEN2:
case CAL_TARGET_RENOIR:
case CAL_TARGET_POLARIS22:
case CAL_TARGET_VEGA12:
case CAL_TARGET_VEGA20:
calAttr.doublePrecision = CAL_TRUE;
calAttr.isOpenCL200Device = CAL_TRUE;
break;
default:
break;
if (doublePrecision) {
calAttr.doublePrecision = CAL_TRUE;
}
// Use OpenCL 2.0 if supported
if (openCL200) {
calAttr.isOpenCL200Device = CAL_TRUE;
}
settings_ = new gpu::Settings();
gpu::Settings* gpuSettings = reinterpret_cast<gpu::Settings*>(settings_);
// Create setting for the offline target
if ((gpuSettings == NULL) || !gpuSettings->create(calAttr)) {
LogPrintfError("GPU settings failed for offline device for CAL device %s", isa.targetId());
return false;
}
gslMemInfo memInfo = {0};
// Report 512MB for all offline devices
memInfo.cardMemAvailableBytes = 512 * Mi;
memInfo.cardLargestFreeBlockBytes = 512 * Mi;
@@ -243,7 +306,7 @@ bool NullDevice::create(CALtarget target) {
acl_error error;
hsaCompiler_ = aclCompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogError("Error initializing the compiler");
LogPrintfError("Error initializing the compiler for offline CAL device %s", isa.targetId());
return false;
}
}
@@ -494,14 +557,11 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
info_.platform_ = AMD_PLATFORM;
if ((calTarget() == CAL_TARGET_CARRIZO) && ASICREV_IS_CARRIZO_BRISTOL(calAttr.asicRevision)) {
const static char* bristol = "Bristol Ridge";
::strncpy(info_.name_, bristol, sizeof(info_.name_) - 1);
} else {
::strncpy(info_.name_, hwInfo()->targetName_, sizeof(info_.name_) - 1);
}
::strncpy(info_.name_, calName_, sizeof(info_.name_) - 1);
::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1);
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING);
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (GSL)%s",
isOnline() ? "" : " [Offline]");
info_.profile_ = "FULL_PROFILE";
if (settings().oclVersion_ >= OpenCL20) {
@@ -584,19 +644,19 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
info_.deviceTopology_.pcie.device = (calAttr.pciTopologyInformation & (0x1F << 3)) >> 3;
info_.deviceTopology_.pcie.function = (calAttr.pciTopologyInformation & 0x07);
info_.simdPerCU_ = hwInfo()->simdPerCU_;
info_.simdPerCU_ = isa().simdPerCU();
info_.cuPerShaderArray_ = calAttr.numberOfCUsperShaderArray;
info_.simdWidth_ = hwInfo()->simdWidth_;
info_.simdInstructionWidth_ = hwInfo()->simdInstructionWidth_;
info_.simdWidth_ = isa().simdWidth();
info_.simdInstructionWidth_ = isa().simdInstructionWidth();
info_.wavefrontWidth_ = calAttr.wavefrontSize;
info_.globalMemChannelBanks_ = calAttr.numMemBanks;
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_;
info_.localMemBanks_ = hwInfo()->localMemBanks_;
info_.gfxipMajor_ = hwInfo()->gfxipMajor_;
info_.gfxipMinor_ = hwInfo()->gfxipMinor_;
info_.gfxipStepping_ = hwInfo()->gfxipStepping_;
info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth();
info_.localMemSizePerCU_ = isa().localMemSizePerCU();
info_.localMemBanks_ = isa().localMemBanks();
info_.gfxipMajor_ = isa().versionMajor();
info_.gfxipMinor_ = isa().versionMinor();
info_.gfxipStepping_ = isa().versionStepping();
info_.numAsyncQueues_ = numComputeRings;
@@ -607,7 +667,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
info_.pcieDeviceId_ = calAttr.pcieDeviceID;
info_.pcieRevisionId_ = calAttr.pcieRevisionID;
info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * hwInfo()->simdPerCU_ * 10;
info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * isa().simdPerCU() * 10;
}
}
@@ -849,10 +909,6 @@ Device::~Device() {
extern const char* SchedulerSourceCode;
bool Device::create(CALuint ordinal, CALuint numOfDevices) {
if (!amd::Device::create()) {
return false;
}
appProfile_.init();
bool smallMemSystem = false;
@@ -882,19 +938,40 @@ bool Device::create(CALuint ordinal, CALuint numOfDevices) {
// Update CAL target
calTarget_ = getAttribs().target;
hwInfo_ = &DeviceInfo[calTarget_];
if ((GPU_ENABLE_PAL == 2) && usePal()) {
// XNACK should be set for PageMigration or IOMMUv2 support.
bool isXNACKSupported = false;
// SRAMECC should be set for ecc protected GPRs.
bool isSRAMECCSupported = false;
const amd::Isa* isa;
bool preferPal;
std::tie(isa, calMachine_, calName_, preferPal, std::ignore, std::ignore) =
findIsa(calTarget(), isSRAMECCSupported, isXNACKSupported);
if ((calTarget() == CAL_TARGET_CARRIZO) && ASICREV_IS_CARRIZO_BRISTOL(getAttribs().asicRevision)) {
calName_ = "Bristol Ridge";
}
if (!isa) {
LogPrintfError("Unsupported CAL device #%d", calTarget());
return false;
}
if (!isa->runtimeGslSupported()) {
LogPrintfError("Unsupported CAL device with ISA %s", isa->targetId());
return false;
}
if ((GPU_ENABLE_PAL == 2) && isa->runtimePalSupported() && preferPal) {
LogPrintfError("Skipping as GPU_ENABLE_PAL=2 indicating to use PAL for CAL device %s",
isa->targetId());
return false;
}
#if defined(BRAHMA)
if (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN ||
calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_POLARIS22 ||
calTarget_ == CAL_TARGET_RENOIR) {
if (!amd::Device::create(*isa)) {
LogPrintfError("Unable to setup device for CAL device %s", isa->targetId());
return false;
}
#endif
// Creates device settings
settings_ = new gpu::Settings();
+9 -9
Просмотреть файл
@@ -67,7 +67,12 @@ class NullDevice : public amd::Device {
NullDevice();
//! Creates an offline device with the specified target
bool create(CALtarget target //!< GPU device identifier
bool create(const char* calName, //!< GPU device name
const amd::Isa& isa, //!< GPU device isa
CALtarget target, //!< GPU device identifier
bool preferPal, //!< GPU prefer to use PAL if GPU_ENABLE_PAL=2
bool doublePrecision, //!< Use double precision
bool openCL200 //!< Use OpenCL 2.0
);
//! Instantiate a new virtual device
@@ -115,7 +120,7 @@ class NullDevice : public amd::Device {
CALtarget calTarget() const { return calTarget_; }
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
CALMachineType calMachine() const { return calMachine_; }
//! Empty implementation on Null device
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
@@ -131,12 +136,6 @@ class NullDevice : public amd::Device {
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
protected:
bool usePal() const {
return (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN ||
calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_RENOIR ||
calTarget_ >= CAL_TARGET_VEGA12);
}
//! Answer the question: "Should HSAIL Program be created?",
//! based on the given options.
bool isHsailProgram(amd::option::Options* options = NULL);
@@ -150,7 +149,8 @@ class NullDevice : public amd::Device {
);
CALtarget calTarget_; //!< GPU device identifier
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
CALMachineType calMachine_; //!< GPU machine identifier
const char* calName_; //!< GPU device name
};
//! Forward declarations
+3 -6
Просмотреть файл
@@ -643,11 +643,8 @@ bool NullKernel::create(const std::string& code, const std::string& metadata,
if ((binaryCode == NULL) && (binarySize == 0) && !code.empty()) {
acl_error err;
std::string arch = "amdil";
if (nullDev().settings().use64BitPtr_) {
arch += "64";
}
aclTargetInfo info = aclGetTargetInfo(arch.c_str(), nullDev().hwInfo()->targetName_, &err);
aclTargetInfo info = aclGetTargetInfo(nullDev().settings().use64BitPtr_ ? "amdil64" : "amdil",
nullDev().isa().amdIlName(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
return false;
@@ -1007,7 +1004,7 @@ bool NullKernel::createMultiBinary(uint* imageSize, void** image, const void* is
constBuffers[constBufferCount++].index = nullProg().glbCb()[i];
}
encoding.machine = nullDev().hwInfo()->machine_;
encoding.machine = nullDev().calMachine();
encoding.type = ED_ATI_CAL_TYPE_COMPUTE;
encoding.inputCount = inputResourceCount;
encoding.outputCount = outputCount;
+18 -112
Просмотреть файл
@@ -41,14 +41,10 @@
namespace gpu {
const aclTargetInfo& NullProgram::info(const char* str) {
const aclTargetInfo& NullProgram::info() {
acl_error err;
std::string arch = "amdil";
if (dev().settings().use64BitPtr_) {
arch += "64";
}
info_ = aclGetTargetInfo(arch.c_str(),
(str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str), &err);
info_ = aclGetTargetInfo(gpuNullDevice().settings().use64BitPtr_ ? "amdil64" : "amdil",
device().isa().amdIlName(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
@@ -1507,7 +1503,6 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
executable_(NULL),
loaderContext_(this) {
assert(device.isOnline());
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
}
@@ -1520,7 +1515,6 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
loaderContext_(this) {
assert(!device.isOnline());
isNull_ = true;
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
// Cannot load onto a NullDevice.
loader_ = nullptr;
@@ -1769,14 +1763,10 @@ void HSAILProgram::fillResListWithKernels(std::vector<const Memory*>& memList) c
}
}
const aclTargetInfo& HSAILProgram::info(const char* str) {
const aclTargetInfo& HSAILProgram::info() {
acl_error err;
std::string arch = "hsail";
if (dev().settings().use64BitPtr_) {
arch = "hsail64";
}
info_ = aclGetTargetInfo(arch.c_str(),
(str && str[0] == '\0' ? gpuNullDevice().hwInfo()->targetName_ : str), &err);
info_ = aclGetTargetInfo(gpuNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail",
device().isa().hsailName(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
@@ -1802,107 +1792,23 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) {
}
hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char* name) {
hsa_isa_t isa = {0};
if (!strcmp(Gfx600, name)) {
isa.handle = gfx600;
return isa;
}
if (!strcmp(Gfx601, name)) {
isa.handle = gfx601;
return isa;
}
if (!strcmp(Gfx602, name)) {
isa.handle = gfx602;
return isa;
}
if (!strcmp(Gfx700, name)) {
isa.handle = gfx700;
return isa;
}
if (!strcmp(Gfx701, name)) {
isa.handle = gfx701;
return isa;
}
if (!strcmp(Gfx702, name)) {
isa.handle = gfx702;
return isa;
}
if (!strcmp(Gfx705, name)) {
isa.handle = gfx702;
return isa;
}
if (!strcmp(Gfx801, name)) {
isa.handle = gfx801;
return isa;
}
if (!strcmp(Gfx802, name)) {
isa.handle = gfx802;
return isa;
}
if (!strcmp(Gfx803, name)) {
isa.handle = gfx803;
return isa;
}
if (!strcmp(Gfx810, name)) {
isa.handle = gfx810;
return isa;
}
if (!strcmp(Gfx900, name)) {
isa.handle = gfx900;
return isa;
}
if (!strcmp(Gfx902, name)) {
isa.handle = gfx902;
return isa;
}
if (!strcmp(Gfx904, name)) {
isa.handle = gfx904;
return isa;
}
if (!strcmp(Gfx906, name)) {
isa.handle = gfx906;
return isa;
}
if (!strcmp(Gfx909, name)) {
isa.handle = gfx909;
return isa;
}
if (!strcmp(Gfx90c, name)) {
isa.handle = gfx90c;
return isa;
}
return isa;
const amd::Isa* isa_p = amd::Isa::findIsa(name);
return {amd::Isa::toHandle(isa_p)};
}
bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
uint dev_gfxip = program_->gpuNullDevice().hwInfo()->gfxipVersion_;
uint isa_gfxip = isa.handle;
switch (dev_gfxip) {
case gfx700:
case gfx704:
case gfx801:
case gfx802:
case gfx803:
case gfx810:
case gfx900:
case gfx902:
case gfx904:
case gfx906:
case gfx909:
case gfx90c:
return isa_gfxip == dev_gfxip;
case gfx701:
case gfx702:
// gfx701 only differs from gfx702 by faster fp operations and can be loaded on either device.
return isa_gfxip == gfx701|| isa_gfxip == gfx702;
case gfx600:
case gfx601:
case gfx602:
default:
LogPrintfError("Unsupported gfxip version gfx%d", dev_gfxip);
// The HSA loader uses a handle value of 0 to indicate the ISA is invalid.
const amd::Isa* code_object_isa_p = amd::Isa::fromHandle(isa.handle);
if (!code_object_isa_p || !code_object_isa_p->runtimeGslSupported()) {
// The ISA is either not supported because ORCAHSALoaderContext::IsaFromName
// could not find it, or the PAL runtime does not support it.
return false;
}
if (program_->isNull()) {
// Cannot load code onto offline devices.
return false;
}
return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa());
}
void* ORCAHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent,
+2 -2
Просмотреть файл
@@ -272,7 +272,7 @@ class NullProgram : public device::Program {
std::vector<device::PrintfInfo> printf_; //!< Format strings for GPU printf support
std::vector<uint> glbCb_; //!< Global constant buffers
virtual const aclTargetInfo& info(const char* str = "");
virtual const aclTargetInfo& info();
virtual bool saveBinaryAndSetType(type_t type) { return true; }
@@ -503,7 +503,7 @@ class HSAILProgram : public device::Program {
virtual bool createBinary(amd::option::Options* options);
virtual const aclTargetInfo& info(const char* str = "");
virtual const aclTargetInfo& info();
private:
//! Disable default copy constructor
+1 -1
Просмотреть файл
@@ -122,7 +122,7 @@ bool NullKernel::siCreateHwInfo(const void* shader, AMUabiAddEncoding& encoding)
i++;
newInfos[i].address = AMU_ABI_WAVEFRONT_SIZE;
newInfos[i].value = nullDev().hwInfo()->simdWidth_ * 4; // options.WavefrontSize;
newInfos[i].value = nullDev().isa().simdWidth() * 4; // options.WavefrontSize;
i++;
newInfos[i].address = AMU_ABI_LDS_SIZE_AVAIL;
-70
Просмотреть файл
@@ -132,76 +132,6 @@ static constexpr uint HsaSamplerObjectAlignment = 16;
//! HSA path specific defines for images
static constexpr uint DeviceQueueMaskSize = 32;
struct AMDDeviceInfo {
const char* machineTarget_; //!< Machine target
const char* machineTargetLC_; //!< Machine target for LC
uint simdWidth_; //!< Number of workitems processed per SIMD
uint memChannelBankWidth_; //!< Memory channel bank width
uint localMemBanks_; //!< Number of banks of local memory
uint gfxipVersionLC_; //!< The core engine GFXIP version for LC
uint gfxipVersion_; //!< The core engine GFXIP version
bool xnackEnabled_; //!< Enable XNACK feature
Pal::AsicRevision asicRevision_; //!< PAL AsicRevision
bool xnackSupported_; //!< XNACK is supported
bool srameccSumpported_; //!< SRAMECC is supported
};
static constexpr AMDDeviceInfo UnknownDevice = {"", "", 16, 256, 32, 0, 0, false};
static constexpr AMDDeviceInfo DeviceInfo[] = {
/* Unknown */ UnknownDevice,
/* Tahiti */ {"", "", 16, 256, 32, 600, 600, false},
/* Pitcairn */ {"", "", 16, 256, 32, 600, 600, false},
/* Capeverde */ {"", "", 16, 256, 32, 700, 700, false},
/* Oland */ {"", "", 16, 256, 32, 600, 600, false},
/* Hainan */ {"", "", 16, 256, 32, 600, 600, false},
/* Bonaire */ {"Bonaire", "", 16, 256, 32, 700, 700, false},
/* Hawaii */ {"Hawaii", "", 16, 256, 32, 701, 701, false},
/* Hawaii */ {"", "", 16, 256, 32, 701, 701, false},
/* Hawaii */ {"", "", 16, 256, 32, 701, 701, false},
/* Kalindi */ {"Kalindi", "", 16, 256, 32, 702, 702, false},
/* Godavari */ {"Mullins", "", 16, 256, 32, 702, 702, false},
/* Spectre */ {"Spectre", "", 16, 256, 32, 701, 701, false},
/* Spooky */ {"Spooky", "", 16, 256, 32, 701, 701, false},
/* Carrizo */ {"Carrizo", "", 16, 256, 32, 801, 801, false},
/* Bristol */ {"Bristol Ridge", "", 16, 256, 32, 801, 801, false},
/* Stoney */ {"Stoney", "", 16, 256, 32, 810, 810, false},
/* Iceland */ {"Iceland", "gfx802", 16, 256, 32, 802, 800, false},
/* Tonga */ {"Tonga", "gfx802", 16, 256, 32, 802, 800, false},
/* Fiji */ {"Fiji", "gfx803", 16, 256, 32, 803, 804, false},
/* Ellesmere */ {"Ellesmere", "gfx803", 16, 256, 32, 803, 804, false},
/* Baffin */ {"Baffin", "gfx803", 16, 256, 32, 803, 804, false},
/* Lexa */ {"gfx804", "gfx803", 16, 256, 32, 803, 804, false},
};
static constexpr AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = {
/* Vega10 */ {"gfx900", "gfx900", 16, 256, 32, 900, 900, false, Pal::AsicRevision::Vega10, true, false},
/* Vega10 XNACK */ {"gfx901", "gfx900", 16, 256, 32, 900, 901, true, Pal::AsicRevision::Vega10, true, false},
/* Vega12 */ {"gfx904", "gfx904", 16, 256, 32, 904, 904, false, Pal::AsicRevision::Vega12, true, false},
/* Vega12 XNACK */ {"gfx905", "gfx904", 16, 256, 32, 904, 905, true, Pal::AsicRevision::Vega12, true, false},
/* Vega20 */ {"gfx906", "gfx906", 16, 256, 32, 906, 906, false, Pal::AsicRevision::Vega20, true, true},
/* Vega20 XNACK */ {"gfx907", "gfx906", 16, 256, 32, 906, 907, true, Pal::AsicRevision::Vega20, true, true},
/* Raven */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Raven, true, false},
/* Raven XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Raven, true, false},
/* Raven2 */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Raven2, true, false},
/* Raven2 XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Raven2, true, false},
/* Renoir */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Renoir, true, false},
/* Renoir XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Renoir, true, false},
/* Navi10 */ {"gfx1010", "gfx1010", 32, 256, 32, 1010, 1010, false, Pal::AsicRevision::Navi10, true, false},
/* Navi10 XNACK */ {"gfx1010", "gfx1010", 32, 256, 32, 1010, 1010, true, Pal::AsicRevision::Navi10, true, false},
/* Navi12 */ {"gfx1011", "gfx1011", 32, 256, 32, 1011, 1011, false, Pal::AsicRevision::Navi12, true, false},
/* Navi12 XNACK */ {"gfx1011", "gfx1011", 32, 256, 32, 1011, 1011, true, Pal::AsicRevision::Navi12, true, false},
/* Navi14 */ {"gfx1012", "gfx1012", 32, 256, 32, 1012, 1012, false, Pal::AsicRevision::Navi14, true, false},
/* Navi14 XNACK */ {"gfx1012", "gfx1012", 32, 256, 32, 1012, 1012, true, Pal::AsicRevision::Navi14, true, false},
/* Navi21 */ {"gfx1030", "gfx1030", 32, 256, 32, 1030, 1030, false, Pal::AsicRevision::Navi21, false, false},
/* Navi22 */ {"gfx1031", "gfx1031", 32, 256, 32, 1031, 1031, false, Pal::AsicRevision::Navi22, false, false},
/* Navi23 */ {"gfx1032", "gfx1032", 32, 256, 32, 1032, 1032, false, Pal::AsicRevision::Navi23, false, false},
};
// Supported OpenCL versions
enum OclVersion {
OpenCL10 = 0x10,
+168 -219
Просмотреть файл
@@ -48,11 +48,100 @@
#endif // _WIN32
#include <algorithm>
#include <array>
#include <cstring>
#include <ctype.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <tuple>
namespace {
//! Define the mapping from PAL asic revision enumeration values to the
//! compiler gfx major/minor/stepping version.
struct PalDevice {
uint32_t gfxipMajor_; //!< The core engine GFXIP Major version
uint32_t gfxipMinor_; //!< The core engine GFXIP Minor version
uint32_t gfxipStepping_; //!< The core engine GFXIP Stepping version
Pal::GfxIpLevel gfxIpLevel_; //!< PAL gfx IP level
const char* palName_; //!< PAL device name
Pal::AsicRevision asicRevision_; //!< PAL AsicRevision
};
static constexpr PalDevice supportedPalDevices[] = {
// GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision
{6, 0, 0, Pal::GfxIpLevel::GfxIp6, "Tahiti", Pal::AsicRevision::Tahiti},
{6, 0, 1, Pal::GfxIpLevel::GfxIp6, "Pitcairn", Pal::AsicRevision::Pitcairn},
{6, 0, 1, Pal::GfxIpLevel::GfxIp6, "Capeverde", Pal::AsicRevision::Capeverde},
{6, 0, 2, Pal::GfxIpLevel::GfxIp6, "Oland", Pal::AsicRevision::Oland},
{6, 0, 2, Pal::GfxIpLevel::GfxIp6, "Hainan", Pal::AsicRevision::Hainan},
{7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Kalindi", Pal::AsicRevision::Kalindi},
{7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spectre", Pal::AsicRevision::Spectre},
{7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spooky", Pal::AsicRevision::Spooky},
{7, 0, 1, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::HawaiiPro},
{7, 0, 2, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::Hawaii},
{7, 0, 4, Pal::GfxIpLevel::GfxIp7, "Bonaire", Pal::AsicRevision::Bonaire},
{7, 0, 5, Pal::GfxIpLevel::GfxIp7, "Mullins", Pal::AsicRevision::Godavari}, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins.
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo},
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol},
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland},
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Tonga", Pal::AsicRevision::Tonga}, // Also Tongapro (generated code is for Tonga)
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Fiji", Pal::AsicRevision::Fiji},
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Ellesmere", Pal::AsicRevision::Polaris10}, // Ellesmere
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Baffin", Pal::AsicRevision::Polaris11}, // Baffin
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris12}, // Lexa
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris22},
{8, 1, 0, Pal::GfxIpLevel::GfxIp8_1, "Stoney", Pal::AsicRevision::Stoney},
{9, 0, 0, Pal::GfxIpLevel::GfxIp9, "gfx900", Pal::AsicRevision::Vega10},
{9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven},
{9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12},
{9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20},
{9, 0, 9, Pal::GfxIpLevel::GfxIp9, "gfx909", Pal::AsicRevision::Raven2},
{9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir},
{10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10},
{10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12},
{10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14},
{10, 3, 0, Pal::GfxIpLevel::GfxIp10_3, "gfx1030", Pal::AsicRevision::Navi21},
{10, 3, 1, Pal::GfxIpLevel::GfxIp10_3, "gfx1031", Pal::AsicRevision::Navi22},
{10, 3, 2, Pal::GfxIpLevel::GfxIp10_3, "gfx1032", Pal::AsicRevision::Navi23},
#if PAL_BUILD_VAN_GOGH
{10, 3, 3, Pal::GfxIpLevel::GfxIp10_3, "", Pal::AsicRevision::VanGogh},
#endif
};
static std::tuple<const amd::Isa*, const char*> findIsa(Pal::AsicRevision asicRevision,
bool sramecc, bool xnack) {
auto palDeviceIter = std::find_if(
std::begin(supportedPalDevices), std::end(supportedPalDevices),
[&](const PalDevice& palDevice) { return palDevice.asicRevision_ == asicRevision; });
if (palDeviceIter == std::end(supportedPalDevices)) {
return std::make_tuple(nullptr, nullptr);
}
const amd::Isa* isa = amd::Isa::findIsa(
palDeviceIter->gfxipMajor_, palDeviceIter->gfxipMinor_, palDeviceIter->gfxipStepping_,
sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled,
xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled);
return std::make_tuple(isa, palDeviceIter->palName_);
}
static std::tuple<Pal::GfxIpLevel, Pal::AsicRevision, const char*> findPal(uint32_t gfxipMajor,
uint32_t gfxipMinor,
uint32_t gfxipStepping) {
auto palDeviceIter = std::find_if(std::begin(supportedPalDevices), std::end(supportedPalDevices),
[&](const PalDevice& palDevice) {
return palDevice.gfxipMajor_ == gfxipMajor &&
palDevice.gfxipMinor_ == gfxipMinor &&
palDevice.gfxipStepping_ == gfxipStepping;
});
if (palDeviceIter == std::end(supportedPalDevices)) {
return std::make_tuple(Pal::GfxIpLevel::None, Pal::AsicRevision::Unknown, nullptr);
}
return std::make_tuple(palDeviceIter->gfxIpLevel_, palDeviceIter->asicRevision_,
palDeviceIter->palName_);
}
} // namespace
bool PalDeviceLoad() {
bool ret = false;
@@ -76,185 +165,63 @@ Pal::IPlatform* Device::platform_;
NullDevice::Compiler* NullDevice::compiler_;
AppProfile Device::appProfile_;
NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), hwInfo_(nullptr) {}
NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), palName_(nullptr) {}
bool NullDevice::init() {
std::vector<Device*> devices;
std::string driverVersion;
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
// TODO: Currently PAL only supports for GFXIP9+.
// Comment out this section for SWDEV-146950 since Kalindi and Mullins
// does not works for LC offline compilation without knowing which GFXIP
// should be used for them.
#if defined(WITH_COMPILER_LIB)
// Loop through all supported devices and create each of them
for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
bool foundActive = false;
Pal::AsicRevision revision = static_cast<Pal::AsicRevision>(id);
if (pal::DeviceInfo[id].machineTarget_[0] == '\0') {
// Create offline devices for all ISAs not already associated with an online
// device. This allows code objects to be compiled for all supported ISAs.
std::vector<Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, false);
for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) {
if (!isa->runtimePalSupported()) {
continue;
}
bool isOnline = false;
// Check if the particular device is online
for (size_t i = 0; i < devices.size(); i++) {
if (&(devices[i]->isa()) == isa) {
isOnline = true;
break;
}
}
if (isOnline) {
continue;
}
// Loop through all active PAL devices and see if we match one
for (uint i = 0; i < devices.size(); ++i) {
driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
if (driverVersion.find("PAL") != std::string::npos) {
if (static_cast<NullDevice*>(devices[i])->asicRevision() == revision) {
foundActive = true;
break;
}
}
}
// Don't report an offline device if it's active
if (foundActive) {
Pal::GfxIpLevel gfxIpLevel;
Pal::AsicRevision asicRevision;
const char* palName;
std::tie(gfxIpLevel, asicRevision, palName) =
findPal(isa->versionMajor(), isa->versionMinor(), isa->versionStepping());
if (asicRevision == Pal::AsicRevision::Unknown) {
// PAL does not support this asic.
continue;
}
NullDevice* dev = new NullDevice();
if (nullptr != dev) {
if (!dev->create(id, Pal::GfxIpLevel::_None)) {
delete dev;
} else {
dev->registerDevice();
}
std::unique_ptr<NullDevice> nullDevice(new NullDevice());
if (!nullDevice) {
LogPrintfError("Error allocating new instance of offline PAL Device %s", isa->targetId());
return false;
}
if (!nullDevice->create(palName, *isa, gfxIpLevel, asicRevision)) {
// Skip over unsupported devices
LogPrintfError("Skipping creating new instance of offline PAL Device %s", isa->targetId());
continue;
}
nullDevice.release()->registerDevice();
}
#endif // defined(WITH_COMPILER_LIB)
// Loop through all supported devices and create each of them
for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
bool foundActive = false;
bool foundDuplicate = false;
uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
if (pal::Gfx9PlusSubDeviceInfo[id].machineTarget_[0] == '\0') {
continue;
}
// Loop through all active PAL devices and see if we match one
for (uint i = 0; i < devices.size(); ++i) {
driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
if (driverVersion.find("PAL") != std::string::npos) {
gfxipVersion = devices[i]->settings().useLightning_
? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_
: pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
uint gfxIpCurrent = devices[i]->settings().useLightning_
? static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersionLC_
: static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_;
if (gfxIpCurrent == gfxipVersion) {
foundActive = true;
break;
}
}
}
// Don't report an offline device if it's active
if (foundActive) {
continue;
}
// Loop through all previous devices in the Gfx9PlusSubDeviceInfo list
// and compare them with the current entry to see if the current entry
// was listed previously in the Gfx9PlusSubDeviceInfo, if so, then it
// means the current entry already has been added in the offline device list
for (uint j = 0; j < id; ++j) {
if (pal::Gfx9PlusSubDeviceInfo[j].machineTarget_[0] == '\0') {
continue;
}
if ((strcmp(pal::Gfx9PlusSubDeviceInfo[j].machineTarget_,
pal::Gfx9PlusSubDeviceInfo[id].machineTarget_) == 0) &&
(pal::Gfx9PlusSubDeviceInfo[j].xnackEnabled_ ==
pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_)) {
foundDuplicate = true;
break;
}
}
// Don't report an offline device twice
if (foundDuplicate) {
continue;
}
Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
uint ipLevelMajor = round(gfxipVersion / 100);
uint ipLevelMinor = round(gfxipVersion / 10 % 10);
switch (ipLevelMajor) {
case 9:
ipLevel = Pal::GfxIpLevel::GfxIp9;
break;
case 10:
switch (ipLevelMinor) {
case 0:
ShouldNotReachHere();
break;
case 1:
ipLevel = Pal::GfxIpLevel::GfxIp10_1;
break;
case 2:
ShouldNotReachHere();
break;
case 3:
ipLevel = Pal::GfxIpLevel::GfxIp10_3;
break;
case 4:
ShouldNotReachHere();
break;
default:
ShouldNotReachHere();
break;
}
break;
case 11:
switch (ipLevelMinor) {
case 0:
ShouldNotReachHere();
break;
default:
ShouldNotReachHere();
break;
}
break;
default:
ShouldNotReachHere();
break;
}
NullDevice* dev = new NullDevice();
if (nullptr != dev) {
if (!dev->create(id, ipLevel)) {
delete dev;
} else {
dev->registerDevice();
}
}
}
return true;
}
bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) {
// Update HW info for the device
if ((GPU_ENABLE_PAL == 1) && (ipLevel == Pal::GfxIpLevel::_None)) {
hwInfo_ = &DeviceInfo[id];
} else if (ipLevel >= Pal::GfxIpLevel::GfxIp9) {
hwInfo_ = &Gfx9PlusSubDeviceInfo[id];
} else {
return false;
}
Pal::AsicRevision asicRevision = hwInfo_->asicRevision_;
if (amd::IS_HIP && IS_MAINLINE &&
(asicRevision != Pal::AsicRevision::Vega20)) {
bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLevel ipLevel,
Pal::AsicRevision asicRevision) {
if (!isa.runtimePalSupported()) {
LogPrintfError("Offline PAL device %s is not supported", isa.targetId());
return false;
}
online_ = false;
palName_ = palName;
Pal::DeviceProperties properties = {};
// Use fake GFX IP for the device init
@@ -274,12 +241,19 @@ bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) {
Pal::WorkStationCaps wscaps = {};
// Create setting for the offline target
if ((palSettings == nullptr) || !palSettings->create(properties, heaps, wscaps)) {
if ((palSettings == nullptr) ||
!palSettings->create(properties, heaps, wscaps, isa.xnack() == amd::Isa::Feature::Enabled)) {
LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId());
return false;
}
if (!ValidateComgr()) {
LogError("Code object manager initialization failed!");
LogPrintfError("Code object manager initialization failed for offline PAL device %s", isa.targetId());
return false;
}
if (!amd::Device::create(isa)) {
LogPrintfError("Unable to setup device for PAL offline device %s", isa.targetId());
return false;
}
@@ -306,7 +280,7 @@ bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) {
acl_error error;
compiler_ = aclCompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogError("Error initializing the compiler");
LogPrintfError("Error initializing the compiler for offline PAL device %s", isa.targetId());
return false;
}
#endif // defined(WITH_COMPILER_LIB)
@@ -511,34 +485,12 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.platform_ = AMD_PLATFORM;
if (settings().useLightning_) {
::strncpy(info_.name_, hwInfo()->machineTargetLC_, sizeof(info_.name_) - 1);
if (hwInfo()->srameccSumpported_) {
if (palProp.gfxipProperties.shaderCore.flags.eccProtectedGprs) {
::strcat(info_.name_, ":sramecc+");
} else {
::strcat(info_.name_, ":sramecc-");
}
}
if (hwInfo()->xnackSupported_) {
if (hwInfo()->xnackEnabled_) {
::strcat(info_.name_, ":xnack+");
} else {
::strcat(info_.name_, ":xnack-");
}
}
::strncpy(info_.targetId_, "amdgcn-amd-amdhsa--", sizeof(info_.targetId_) - 1);
::strcat(info_.targetId_, info_.name_);
} else {
::strncpy(info_.name_, hwInfo()->machineTarget_, sizeof(info_.name_) - 1);
}
::strncpy(info_.name_, settings().useLightning_ ? isa().targetId() : palName_,
sizeof(info_.name_));
::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1);
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)",
settings().useLightning_ ? ",LC" : ",HSAIL");
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)%s",
settings().useLightning_ ? ",LC" : ",HSAIL", isOnline() ? "" : " [Offline]");
info_.profile_ = "FULL_PROFILE";
if (settings().oclVersion_ >= OpenCL20) {
@@ -625,23 +577,20 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
? (2 * palProp.gfxipProperties.shaderCore.numSimdsPerCu)
: palProp.gfxipProperties.shaderCore.numSimdsPerCu;
info_.cuPerShaderArray_ = palProp.gfxipProperties.shaderCore.numCusPerShaderArray;
info_.simdWidth_ = hwInfo()->simdWidth_;
info_.simdWidth_ = isa().simdWidth();
info_.simdInstructionWidth_ = 1;
info_.wavefrontWidth_ =
settings().enableWave32Mode_ ? 32 : palProp.gfxipProperties.shaderCore.nativeWavefrontSize;
info_.availableSGPRs_ = palProp.gfxipProperties.shaderCore.numAvailableSgprs;
info_.globalMemChannelBanks_ = 4;
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth();
info_.localMemSizePerCU_ = palProp.gfxipProperties.shaderCore.ldsSizePerCu;
info_.localMemBanks_ = hwInfo()->localMemBanks_;
info_.localMemBanks_ = isa().localMemBanks();
uint gfxipVersion =
settings().useLightning_ ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
info_.gfxipMajor_ = gfxipVersion / 100;
info_.gfxipMinor_ = gfxipVersion / 10 % 10;
info_.gfxipStepping_ = gfxipVersion % 10;
info_.gfxipMajor_ = isa().versionMajor();
info_.gfxipMinor_ = isa().versionMinor();
info_.gfxipStepping_ = isa().versionStepping();
info_.timeStampFrequency_ = 1000000;
info_.numAsyncQueues_ = numComputeRings;
@@ -860,9 +809,6 @@ uint32_t gStartDevice = 0;
uint32_t gNumDevices = 0;
bool Device::create(Pal::IDevice* device) {
if (!amd::Device::create()) {
return false;
}
resourceList_ = new std::unordered_set<Resource*>();
if (nullptr == resourceList_) {
return false;
@@ -884,23 +830,25 @@ bool Device::create(Pal::IDevice* device) {
// XNACK flag should be set for PageMigration or IOMMUv2 support.
// Note: Navi2x should have a fix in HW.
bool isXNACKSupported = (ipLevel_ <= Pal::GfxIpLevel::GfxIp10_1) &&
bool isXNACKEnabled =
(static_cast<uint>(properties().gpuMemoryProperties.flags.pageMigrationEnabled ||
properties().gpuMemoryProperties.flags.iommuv2Support));
// Update HW info for the device
if ((GPU_ENABLE_PAL == 1) && (properties().revision <= Pal::AsicRevision::Polaris12)) {
hwInfo_ = &DeviceInfo[static_cast<uint>(properties().revision)];
} else if (ipLevel_ >= Pal::GfxIpLevel::GfxIp9) {
// For compiler sub targets
for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
if ((Gfx9PlusSubDeviceInfo[id].asicRevision_ == asicRevision_) &&
(Gfx9PlusSubDeviceInfo[id].xnackEnabled_ == isXNACKSupported)) {
hwInfo_ = &Gfx9PlusSubDeviceInfo[id];
break;
}
}
} else {
bool isSRAMECCEnabled = properties().gfxipProperties.shaderCore.flags.eccProtectedGprs;
const amd::Isa* isa;
std::tie(isa, palName_) = findIsa(asicRevision_, isSRAMECCEnabled, isXNACKEnabled);
if (!isa) {
LogPrintfError("Unsupported PAL device with ASIC revision #%d", asicRevision_);
return false;
}
if (!isa->runtimePalSupported()) {
LogPrintfError("Unsupported PAL device with ISA %s", isa->targetId());
return false;
}
if (!amd::Device::create(*isa)) {
LogPrintfError("Unable to setup device for PAL device %s", isa->targetId());
return false;
}
@@ -953,8 +901,9 @@ bool Device::create(Pal::IDevice* device) {
iDev()->QueryWorkStationCaps(&wscaps);
pal::Settings* gpuSettings = reinterpret_cast<pal::Settings*>(settings_);
if ((gpuSettings == nullptr) ||
!gpuSettings->create(properties(), heaps_, wscaps, appProfile_.reportAsOCL12Device())) {
if (!gpuSettings ||
!gpuSettings->create(properties(), heaps_, wscaps, isa->xnack() == amd::Isa::Feature::Enabled,
appProfile_.reportAsOCL12Device())) {
return false;
}
+5 -5
Просмотреть файл
@@ -64,8 +64,10 @@ class NullDevice : public amd::Device {
NullDevice();
//! Creates an offline device with the specified target
bool create(uint id, //!< index in the AMDDeviceInfo[]
Pal::GfxIpLevel ipLevel //!< GPU ip level
bool create(const char* palName, //!< Device name
const amd::Isa& isa, //!< Device ISA
Pal::GfxIpLevel ipLevel, //!< GPU ip level
Pal::AsicRevision asicRevision //!< PAL ASIC revision
);
//! Instantiate a new virtual device
@@ -114,8 +116,6 @@ class NullDevice : public amd::Device {
Pal::GfxIpLevel ipLevel() const { return ipLevel_; }
Pal::AsicRevision asicRevision() const { return asicRevision_; }
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
//! Empty implementation on Null device
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
@@ -139,7 +139,7 @@ class NullDevice : public amd::Device {
Pal::AsicRevision asicRevision_; //!< ASIC revision
Pal::GfxIpLevel ipLevel_; //!< Device IP level
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
const char* palName_; //!< Device name
//! Fills OpenCL device info structure
void fillDeviceInfo(const Pal::DeviceProperties& palProp, //!< PAL device properties
+16 -60
Просмотреть файл
@@ -174,7 +174,6 @@ bool Segment::freeze(bool destroySysmem) {
return result;
}
static constexpr const char* Carrizo = "Carrizo";
HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
: Program(device, owner),
rawBinary_(nullptr),
@@ -185,11 +184,6 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
executable_(nullptr),
loaderContext_(this) {
assert(device.isOnline());
if (dev().asicRevision() == Pal::AsicRevision::Bristol) {
machineTarget_ = Carrizo;
} else {
machineTarget_ = dev().hwInfo()->machineTarget_;
}
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
}
@@ -204,11 +198,6 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
loaderContext_(this) {
assert(!device.isOnline());
isNull_ = true;
if (dev().asicRevision() == Pal::AsicRevision::Bristol) {
machineTarget_ = Carrizo;
} else {
machineTarget_ = dev().hwInfo()->machineTarget_;
}
// Cannot load onto a NullDevice.
loader_ = nullptr;
}
@@ -368,15 +357,11 @@ bool HSAILProgram::allocKernelTable() {
void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
const aclTargetInfo& HSAILProgram::info(const char* str) {
const aclTargetInfo& HSAILProgram::info() {
#if defined(WITH_COMPILER_LIB)
acl_error err;
std::string arch = "hsail";
if (dev().settings().use64BitPtr_) {
arch = "hsail64";
}
info_ = aclGetTargetInfo(arch.c_str(),
(str && str[0] == '\0' ? palNullDevice().hwInfo()->machineTarget_ : str), &err);
info_ = aclGetTargetInfo(palNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail",
device().isa().hsailName(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
@@ -531,49 +516,23 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p
}
hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
hsa_isa_t isa = {0};
uint32_t gfxip = 0;
std::string gfx_target(name);
if (gfx_target.find("amdgcn-") == 0) {
std::string gfxip_version_str = gfx_target.substr(gfx_target.find("gfx") + 3);
gfxip = std::atoi(gfxip_version_str.c_str());
} else {
// FIXME: Old way. To be remove.
uint32_t shift = 1;
size_t last = gfx_target.length();
std::string ver;
do {
size_t first = gfx_target.find_last_of(':', last);
ver = gfx_target.substr(first + 1, last - first);
last = first - 1;
gfxip += static_cast<uint32_t>(atoi(ver.c_str())) * shift;
shift *= 10;
} while (shift <= 100);
}
isa.handle = gfxip;
return isa;
const amd::Isa* isa_p = amd::Isa::findIsa(name);
return {amd::Isa::toHandle(isa_p)};
}
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
uint32_t gfxipVersion = program_->palNullDevice().settings().useLightning_
? program_->palNullDevice().hwInfo()->gfxipVersionLC_
: program_->palNullDevice().hwInfo()->gfxipVersion_;
uint32_t majorSrc = gfxipVersion / 10;
uint32_t minorSrc = gfxipVersion % 10;
uint32_t majorTrg = isa.handle / 10;
uint32_t minorTrg = isa.handle % 10;
if (majorSrc != majorTrg) {
// The HSA loader uses a handle value of 0 to indicate the ISA is invalid.
const amd::Isa* code_object_isa_p = amd::Isa::fromHandle(isa.handle);
if (!code_object_isa_p || !code_object_isa_p->runtimePalSupported()) {
// The ISA is either not supported because PALHSALoaderContext::IsaFromName
// could not find it, or the PAL runtime does not support it.
return false;
} else if (minorTrg == minorSrc) {
return true;
} else if (minorTrg < minorSrc) {
LogWarning("ISA downgrade for execution!");
return true;
}
return false;
if (program_->isNull()) {
// Cannot load code onto offline devices.
return false;
}
return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa());
}
void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent,
@@ -584,7 +543,7 @@ void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_ag
// Note: In Linux ::posix_memalign() requires at least 16 bytes for the alignment.
align = amd::alignUp(align, 16);
void* ptr = amd::Os::alignedMalloc(size, align);
if ((ptr != nullptr) && zero) {
if (ptr && zero) {
memset(ptr, 0, size);
}
return ptr;
@@ -778,9 +737,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
return true;
}
hsa_agent_t agent;
agent.handle = 1;
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
if (executable_ == nullptr) {
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
+1 -2
Просмотреть файл
@@ -202,7 +202,7 @@ class HSAILProgram : public device::Program {
virtual bool createBinary(amd::option::Options* options);
virtual const aclTargetInfo& info(const char* str = "");
virtual const aclTargetInfo& info();
virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize,
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
@@ -250,7 +250,6 @@ class LightningProgram : public HSAILProgram {
LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
machineTarget_ = palNullDevice().hwInfo()->machineTargetLC_;
}
LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
+9 -7
Просмотреть файл
@@ -174,7 +174,7 @@ Settings::Settings() {
bool Settings::create(const Pal::DeviceProperties& palProp,
const Pal::GpuMemoryHeapProperties* heaps, const Pal::WorkStationCaps& wscaps,
bool reportAsOCL12Device) {
bool enableXNACK, bool reportAsOCL12Device) {
uint32_t osVer = 0x0;
// Disable thread trace by default for all devices
@@ -202,19 +202,19 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
apuSystem_ = true;
}
enableXNACK_ = enableXNACK;
hsailExplicitXnack_ = enableXNACK;
switch (palProp.revision) {
case Pal::AsicRevision::Navi23:
case Pal::AsicRevision::Navi22:
case Pal::AsicRevision::Navi21:
case Pal::AsicRevision::Navi14:
case Pal::AsicRevision::Navi12:
case Pal::AsicRevision::Navi10:
case Pal::AsicRevision::Navi10_A0:
case Pal::AsicRevision::Navi23:
case Pal::AsicRevision::Navi22:
case Pal::AsicRevision::Navi21:
gfx10Plus_ = true;
useLightning_ = GPU_ENABLE_LC;
hsailExplicitXnack_ =
static_cast<uint>(palProp.gpuMemoryProperties.flags.pageMigrationEnabled ||
palProp.gpuMemoryProperties.flags.iommuv2Support);
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
if (useLightning_) {
enableWave32Mode_ = true;
@@ -264,6 +264,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
case Pal::AsicRevision::Polaris10:
case Pal::AsicRevision::Polaris11:
case Pal::AsicRevision::Polaris12:
case Pal::AsicRevision::Polaris22:
// Disable tiling aperture on VI+
linearPersistentImage_ = true;
// Keep this false even though we have support
@@ -289,6 +290,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
// Fall through ...
case Pal::AsicRevision::Bonaire:
case Pal::AsicRevision::Hawaii:
case Pal::AsicRevision::HawaiiPro:
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
reportFMAF_ = false;
if ((palProp.revision == Pal::AsicRevision::Hawaii) || aiPlus_) {
+1
Просмотреть файл
@@ -127,6 +127,7 @@ class Settings : public device::Settings {
bool create(const Pal::DeviceProperties& palProp, //!< PAL device properties
const Pal::GpuMemoryHeapProperties* heaps, //!< PAL heap settings
const Pal::WorkStationCaps& wscaps, //!< PAL workstation settings
bool enableXNACK, //!< XNACK is enabled on this device
bool reportAsOCL12Device = false //!< Report As OpenCL1.2 Device
);
+5 -5
Просмотреть файл
@@ -934,7 +934,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
amd::Image* srcImage = static_cast<amd::Image*>(srcMemory.owner());
amd::Image::Format newFormat(dstImage->getImageFormat());
bool swapLayer =
(dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10);
(dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10);
// Find unsupported formats
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
@@ -1126,7 +1126,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
amd::Image* srcImage = static_cast<amd::Image*>(srcMemory.owner());
amd::Image::Format newFormat(srcImage->getImageFormat());
bool swapLayer =
(srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10);
(srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10);
// Find unsupported formats
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
@@ -1364,14 +1364,14 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
// Program source origin
int32_t srcOrg[4] = {(int32_t)srcOrigin[0], (int32_t)srcOrigin[1], (int32_t)srcOrigin[2], 0};
if ((srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10)) {
if ((srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10)) {
srcOrg[3] = 1;
}
setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
// Program destinaiton origin
int32_t dstOrg[4] = {(int32_t)dstOrigin[0], (int32_t)dstOrigin[1], (int32_t)dstOrigin[2], 0};
if ((dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10)) {
if ((dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10)) {
dstOrg[3] = 1;
}
setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg);
@@ -2072,7 +2072,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
amd::Image* image = static_cast<amd::Image*>(memory.owner());
amd::Image::Format newFormat(image->getImageFormat());
bool swapLayer =
(image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10);
(image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10);
// Program the kernels workload depending on the fill dimensions
fillType = FillImage;
+1 -1
Просмотреть файл
@@ -430,7 +430,7 @@ PerfCounter::PerfCounter(const Device& device, //!< A ROC device object
info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id)
// these block indices are valid for the SI (Gfx8) & Gfx9 devices
switch (roc_device_.deviceInfo().gfxipMajor_) {
switch (roc_device_.isa().versionMajor()) {
case (8):
gfxVersion_ = ROC_GFX8;
if (blockIndex < viBlockIdOrcaToRocr.size()) {
-43
Просмотреть файл
@@ -33,49 +33,6 @@ static constexpr uint DeviceQueueMaskSize = 32;
//! Set to match the number of pipes, which is 8.
static constexpr uint kMaxAsyncQueues = 8;
typedef uint HsaDeviceId;
struct AMDDeviceInfo {
const char* machineTarget_; //!< Machine target
const char* machineTargetLC_;//!< Machine target for LC
uint simdPerCU_; //!< Number of SIMDs per CU
uint simdWidth_; //!< Number of workitems processed per SIMD
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
uint memChannelBankWidth_; //!< Memory channel bank width
uint localMemSizePerCU_; //!< Local memory size per CU
uint localMemBanks_; //!< Number of banks of local memory
uint gfxipMajor_; //!< The core engine GFXIP Major version
uint gfxipMinor_; //!< The core engine GFXIP Minor version
uint gfxipStepping_; //!< The core engine GFXIP Stepping version
uint pciDeviceId_; //!< PCIe device id
};
constexpr HsaDeviceId HSA_INVALID_DEVICE_ID = -1;
static constexpr AMDDeviceInfo DeviceInfo[] = {
/* KAVERI_SPECTRE */ {"Spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0},
/* KAVERI_SPOOKY */ {"Spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0},
/* HAWAII */ {"Hawaii", "gfx701", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0},
/* CARRIZO */ {"Carrizo", "gfx801", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 1, 0},
/* TONGA */ {"Tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 2, 0},
/* ICELAND */ {"Iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 2, 0},
/* FIJI */ {"Fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0},
/* ELLESMERE */ {"Ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0},
/* BAFFIN */ {"Baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0},
/* VEGA10 */ {"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 0, 0},
/* VEGA10_HBCC */ {"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 1, 0},
/* RAVEN */ {"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 2, 0},
/* VEGA12 */ {"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 4, 0},
/* VEGA20 */ {"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 6, 0},
/* ARCTURUS */ {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 8, 0},
/* NAVI10 */ {"gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 0, 0},
/* NAVI12 */ {"gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 1, 0},
/* NAVI14 */ {"gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 2, 0},
/* SIENNA_CICHILD */ {"gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 0, 0},
/* NAVY_FLOUNDER */ {"gfx1031", "gfx1031", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 1, 0},
/* DIMGREY CAVEFISH*/{"gfx1032", "gfx1032", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 2, 0}
};
} // namespace roc
#endif
+154 -120
Просмотреть файл
@@ -48,6 +48,7 @@
#include <cstring>
#include <fstream>
#include <iostream>
#include <memory>
#ifdef ROCCLR_SUPPORT_NUMA_POLICY
#include <numaif.h>
#endif // ROCCLR_SUPPORT_NUMA_POLICY
@@ -61,9 +62,9 @@
#ifndef WITHOUT_HSA_BACKEND
namespace {
inline bool getIsaMeta(const char* targetId, amd_comgr_metadata_node_t& isaMeta) {
inline bool getIsaMeta(std::string isaName, amd_comgr_metadata_node_t& isaMeta) {
amd_comgr_status_t status;
status = amd::Comgr::get_isa_metadata(targetId, &isaMeta);
status = amd::Comgr::get_isa_metadata(isaName.c_str(), &isaMeta);
return (status == AMD_COMGR_STATUS_SUCCESS) ? true : false;
}
@@ -99,34 +100,13 @@ std::vector<AgentInfo> roc::Device::cpu_agents_;
address Device::mg_sync_ = nullptr;
static HsaDeviceId getHsaDeviceId(hsa_agent_t device, uint32_t& pci_id) {
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(device, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &pci_id)) {
return HSA_INVALID_DEVICE_ID;
bool NullDevice::create(const amd::Isa &isa) {
if (!isa.runtimeRocSupported()) {
LogPrintfError("Offline HSA device %s is not supported", isa.targetId());
return false;
}
char agent_name[64] = {0};
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(device, HSA_AGENT_INFO_NAME, agent_name)) {
return HSA_INVALID_DEVICE_ID;
}
if (::strncmp(agent_name, "gfx", 3) != 0) {
return HSA_INVALID_DEVICE_ID;
}
for (uint i = 0; i < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++i) {
if (::strcmp(agent_name, DeviceInfo[i].machineTargetLC_) == 0) {
return i;
}
}
return HSA_INVALID_DEVICE_ID;
}
bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
online_ = false;
deviceInfo_ = deviceInfo;
// Mark the device as GPU type
info_.type_ = CL_DEVICE_TYPE_GPU;
info_.vendorId_ = 0x1002;
@@ -134,24 +114,38 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
roc::Settings* hsaSettings = new roc::Settings();
settings_ = hsaSettings;
if (!hsaSettings ||
!hsaSettings->create(false, deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_)) {
LogError("Error creating settings for nullptr HSA device");
!hsaSettings->create(false, isa.versionMajor(), isa.versionMinor(),
isa.xnack() == amd::Isa::Feature::Enabled)) {
LogPrintfError("Error creating settings for offline HSA device %s", isa.targetId());
return false;
}
if (!ValidateComgr()) {
LogError("Code object manager initialization failed!");
LogPrintfError("Code object manager initialization failed for offline HSA device %s",
isa.targetId());
return false;
}
if (!amd::Device::create(isa)) {
LogPrintfError("Unable to setup offline HSA device %s", isa.targetId());
return false;
}
// Report the device name
::strncpy(info_.name_, "AMD HSA Device", sizeof(info_.name_) - 1);
::strncpy(info_.name_, isa.targetId(), sizeof(info_.name_) - 1);
info_.gfxipMajor_ = isa.versionMajor();
info_.gfxipMinor_ = isa.versionMinor();
info_.gfxipStepping_ = isa.versionStepping();
::strncpy(info_.targetId_, isa.isaName().c_str(), sizeof(info_.targetId_) - 1);
info_.extensions_ = getExtensionString();
info_.maxWorkGroupSize_ = hsaSettings->maxWorkGroupSize_;
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
info_.oclcVersion_ = "OpenCL C " OPENCL_C_VERSION_STR " ";
info_.spirVersions_ = "";
::strncpy(info_.driverVersion_, "1.0 Provisional (hsa)", sizeof(info_.driverVersion_) - 1);
std::stringstream ss;
ss << AMD_BUILD_STRING " (HSA," << (settings().useLightning_ ? "LC" : "HSAIL");
ss << ") [Offline]";
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
return true;
}
@@ -160,6 +154,7 @@ Device::Device(hsa_agent_t bkendDevice)
: mapCacheOps_(nullptr)
, mapCache_(nullptr)
, _bkendDevice(bkendDevice)
, pciDeviceId_(0)
, gpuvm_segment_max_alloc_(0)
, alloc_granularity_(0)
, context_(nullptr)
@@ -311,37 +306,35 @@ bool NullDevice::init() {
return false;
}
// Return without initializing offline device list
return true;
#if defined(WITH_COMPILER_LIB)
// If there is an HSA enabled device online then skip any offline device
std::vector<Device*> devices;
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
// Load the offline devices
// Iterate through the set of available offline devices
for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); id++) {
// Create offline devices for all ISAs not already associated with an online
// device. This allows code objects to be compiled for all supported ISAs.
std::vector<Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, false);
for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) {
if (!isa->runtimeRocSupported()) {
continue;
}
bool isOnline = false;
// Check if the particular device is online
for (unsigned int i = 0; i < devices.size(); i++) {
if (::strcmp(static_cast<NullDevice*>(devices[i])->deviceInfo_.machineTarget_,
DeviceInfo[id].machineTarget_) == 0) {
for (size_t i = 0; i < devices.size(); i++) {
if (&(devices[i]->isa()) == isa) {
isOnline = true;
break;
}
}
if (isOnline) {
continue;
}
NullDevice* nullDevice = new NullDevice();
if (!nullDevice->create(DeviceInfo[id])) {
LogError("Error creating new instance of Device.");
delete nullDevice;
std::unique_ptr<NullDevice> nullDevice(new NullDevice());
if (!nullDevice) {
LogPrintfError("Error allocating new instance of offline HSA device %s", isa->targetId());
return false;
}
nullDevice->registerDevice();
if (!nullDevice->create(*isa)) {
LogPrintfError("Skipping creating new instance of offline HSA sevice %s", isa->targetId());
continue;
}
nullDevice.release()->registerDevice();
}
#endif // defined(WITH_COMPILER_LIB)
return true;
}
@@ -516,22 +509,11 @@ bool Device::init() {
for (auto agent : gpu_agents_) {
std::unique_ptr<Device> roc_device(new Device(agent));
if (!roc_device) {
LogError("Error creating new instance of Device on then heap.");
return false;
}
uint32_t pci_id;
HsaDeviceId deviceId = getHsaDeviceId(agent, pci_id);
if (deviceId == HSA_INVALID_DEVICE_ID) {
LogPrintfError("Invalid HSA device %x", pci_id);
continue;
}
roc_device->deviceInfo_ = DeviceInfo[deviceId];
roc_device->deviceInfo_.pciDeviceId_ = pci_id;
if (!roc_device->create()) {
LogError("Error creating new instance of Device.");
continue;
@@ -585,16 +567,84 @@ void Device::tearDown() {
}
bool Device::create() {
char agent_name[64] = {0};
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_NAME, agent_name)) {
LogError("Unable to get HSA device name");
return false;
}
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID,
&pciDeviceId_)) {
LogPrintfError("Unable to get PCI ID of HSA device %s", agent_name);
return false;
}
struct agent_isas_t {
uint count;
hsa_isa_t first_isa;
} agent_isas = {0, {0}};
if (HSA_STATUS_SUCCESS !=
hsa_agent_iterate_isas(_bkendDevice,
[](hsa_isa_t isa, void* data) {
agent_isas_t* agent_isas = static_cast<agent_isas_t*>(data);
if (agent_isas->count++ == 0) {
agent_isas->first_isa = isa;
}
return HSA_STATUS_SUCCESS;
},
&agent_isas)) {
LogPrintfError("Unable to iterate supported ISAs for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
if (agent_isas.count != 1) {
LogPrintfError("HSA device %s (PCI ID %x) has %u ISAs but can only support a single ISA",
agent_name, pciDeviceId_, agent_isas.count);
return false;
}
uint32_t isa_name_length = 0;
if (HSA_STATUS_SUCCESS !=
hsa_isa_get_info_alt(agent_isas.first_isa, (hsa_isa_info_t)HSA_ISA_INFO_NAME_LENGTH,
&isa_name_length)) {
LogPrintfError("Unable to get ISA name length for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
std::vector<char> isa_name(isa_name_length + 1, '\0');
if (HSA_STATUS_SUCCESS !=
hsa_isa_get_info_alt(agent_isas.first_isa, (hsa_isa_info_t)HSA_ISA_INFO_NAME,
isa_name.data())) {
LogPrintfError("Unable to get ISA name for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
const amd::Isa *isa = amd::Isa::findIsa(isa_name.data());
if (!isa || !isa->runtimeRocSupported()) {
LogPrintfError("Unsupported HSA device %s (PCI ID %x) for ISA %s", agent_name, pciDeviceId_,
isa_name.data());
return false;
}
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) {
LogPrintfError("Unable to get profile for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_);
return false;
}
uint32_t coop_groups = 0;
// Check cooperative groups for HIP only
if (amd::IS_HIP && (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES), &coop_groups))) {
if (amd::IS_HIP &&
(HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES),
&coop_groups))) {
LogPrintfError(
"Unable to determine if cooperative queues are supported for HSA device %s (PCI ID %x)",
agent_name, pciDeviceId_);
return false;
}
@@ -603,17 +653,23 @@ bool Device::create() {
roc::Settings* hsaSettings = new roc::Settings();
settings_ = hsaSettings;
if (!hsaSettings ||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipMajor_,
deviceInfo_.gfxipMinor_, coop_groups)) {
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(),
isa->versionMinor(), isa->xnack() == amd::Isa::Feature::Enabled,
coop_groups)) {
LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
if (!ValidateComgr()) {
LogError("Code object manager initialization failed!");
LogPrintfError("Code object manager initialization failed for HSA device %s (PCI ID %x)",
agent_name, pciDeviceId_);
return false;
}
if (!amd::Device::create()) {
if (!amd::Device::create(*isa)) {
LogPrintfError("Unable to setup device for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
@@ -621,6 +677,8 @@ bool Device::create() {
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_BDFID), &hsa_bdf_id)) {
LogPrintfError("Unable to determine BFD ID for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
@@ -632,6 +690,8 @@ bool Device::create() {
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice,
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DOMAIN), &pci_domain_id)) {
LogPrintfError("Unable to determine domain ID for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
info_.pciDomainID = pci_domain_id;
@@ -650,7 +710,8 @@ bool Device::create() {
#endif
if (populateOCLDeviceConstants() == false) {
LogError("populateOCLDeviceConstants failed!");
LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name,
pciDeviceId_);
return false;
}
@@ -995,35 +1056,11 @@ Memory* Device::getGpuMemory(amd::Memory* mem) const {
bool Device::populateOCLDeviceConstants() {
info_.available_ = true;
hsa_isa_t isa = {0};
if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_ISA, &isa) != HSA_STATUS_SUCCESS) {
return false;
}
uint32_t isaNameLength = 0;
if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &isaNameLength) != HSA_STATUS_SUCCESS) {
return false;
}
if ((isaNameLength + 1) > sizeof(info_.targetId_)) {
return false;
}
if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, info_.targetId_) != HSA_STATUS_SUCCESS) {
return false;
}
info_.targetId_[isaNameLength] = '\0';
char *gfxSubString = ::strstr(info_.targetId_, "gfx");
if (nullptr == gfxSubString) {
return false;
}
::strncpy(info_.name_, gfxSubString, sizeof(info_.name_) - 1);
info_.gfxipMajor_ = deviceInfo_.gfxipMajor_;
info_.gfxipMinor_ = deviceInfo_.gfxipMinor_;
info_.gfxipStepping_ = deviceInfo_.gfxipStepping_;
::strncpy(info_.name_, isa().targetId(), sizeof(info_.name_) - 1);
info_.gfxipMajor_ = isa().versionMajor();
info_.gfxipMinor_ = isa().versionMinor();
info_.gfxipStepping_ = isa().versionStepping();
::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1);
char device_name[64] = {0};
if (HSA_STATUS_SUCCESS == hsa_agent_get_info(_bkendDevice,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME,
@@ -1072,7 +1109,7 @@ bool Device::populateOCLDeviceConstants() {
}
//TODO: add the assert statement for Raven
if ((info_.gfxipMajor_*100 + info_.gfxipMinor_*10 + info_.gfxipStepping_) != 902) {
if (!(isa().versionMajor() == 9 && isa().versionMinor() == 0 && isa().versionStepping() == 2)) {
assert(info_.maxEngineClockFrequency_ > 0);
}
@@ -1258,7 +1295,7 @@ bool Device::populateOCLDeviceConstants() {
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
// Enable OpenCL 2.0 for Vega10+
if (deviceInfo_.gfxipMajor_ >= 9) {
if (isa().versionMajor() >= 9) {
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " ";
} else {
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
@@ -1394,14 +1431,14 @@ bool Device::populateOCLDeviceConstants() {
}
if (amd::IS_HIP) {
// Report atomics capability based on GFX IP, control on Hawaii
if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipMajor_ >= 8) {
if (info_.hostUnifiedMemory_ || isa().versionMajor() >= 8) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
}
else if (!settings().useLightning_) {
// Report atomics capability based on GFX IP, control on Hawaii
// and Vega10.
if (info_.hostUnifiedMemory_ || (deviceInfo_.gfxipMajor_ == 8)) {
if (info_.hostUnifiedMemory_ || (isa().versionMajor() == 8)) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
}
@@ -1409,10 +1446,10 @@ bool Device::populateOCLDeviceConstants() {
if (settings().checkExtension(ClAmdDeviceAttributeQuery)) {
info_.simdPerCU_ = settings().enableWgpMode_
? (2 * deviceInfo_.simdPerCU_)
: deviceInfo_.simdPerCU_;
info_.simdWidth_ = deviceInfo_.simdWidth_;
info_.simdInstructionWidth_ = deviceInfo_.simdInstructionWidth_;
? (2 * isa().simdPerCU())
: isa().simdPerCU();
info_.simdWidth_ = isa().simdWidth();
info_.simdInstructionWidth_ = isa().simdInstructionWidth();
if (HSA_STATUS_SUCCESS !=
hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_WAVEFRONT_SIZE, &info_.wavefrontWidth_)) {
return false;
@@ -1454,16 +1491,16 @@ bool Device::populateOCLDeviceConstants() {
info_.l2CacheSize_ = cache_sizes[1];
info_.timeStampFrequency_ = 1000000;
info_.globalMemChannelBanks_ = 4;
info_.globalMemChannelBankWidth_ = deviceInfo_.memChannelBankWidth_;
info_.localMemSizePerCU_ = deviceInfo_.localMemSizePerCU_;
info_.localMemBanks_ = deviceInfo_.localMemBanks_;
info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth();
info_.localMemSizePerCU_ = isa().localMemSizePerCU();
info_.localMemBanks_ = isa().localMemBanks();
info_.numAsyncQueues_ = kMaxAsyncQueues;
info_.numRTQueues_ = info_.numAsyncQueues_;
info_.numRTCUs_ = info_.maxComputeUnits_;
//TODO: set to true once thread trace support is available
info_.threadTraceEnable_ = false;
info_.pcieDeviceId_ = deviceInfo_.pciDeviceId_;
info_.pcieDeviceId_ = pciDeviceId_;
info_.cooperativeGroups_ = settings().enableCoopGroups_;
info_.cooperativeMultiDeviceGroups_ = settings().enableCoopMultiDeviceGroups_;
}
@@ -1481,7 +1518,7 @@ bool Device::populateOCLDeviceConstants() {
// Get Values from from Comgr
amd_comgr_metadata_node_t isaMeta;
if (getIsaMeta(info_.targetId_, isaMeta)) {
if (getIsaMeta(std::move(isa().isaName()), isaMeta)) {
std::string vgprValue;
info_.availableVGPRs_ = (getValueFromIsaMeta(isaMeta, "AddressableNumVGPRs", vgprValue))
? (atoi(vgprValue.c_str()) * info_.simdPerCU_)
@@ -1595,14 +1632,11 @@ bool Device::bindExternalDevice(uint flags, void* const gfxDevice[], void* gfxCo
return false;
}
bool match = true;
match &= info_.deviceTopology_.pcie.bus == info.pci_bus;
match &= info_.deviceTopology_.pcie.device == info.pci_device;
match &= info_.deviceTopology_.pcie.function == info.pci_function;
match &= info_.vendorId_ == info.vendor_id;
match &= deviceInfo_.pciDeviceId_ == info.device_id;
return info_.deviceTopology_.pcie.bus == info.pci_bus &&
info_.deviceTopology_.pcie.device == info.pci_device &&
info_.deviceTopology_.pcie.function == info.pci_function &&
info_.vendorId_ == info.vendor_id && pciDeviceId_ == info.device_id;
return match;
#endif
}
+2 -4
Просмотреть файл
@@ -110,7 +110,7 @@ class NullDevice : public amd::Device {
NullDevice(){};
//! create the device
bool create(const AMDDeviceInfo& deviceInfo);
bool create(const amd::Isa &isa);
//! Initialise all the offline devices that can be used for compilation
static bool init();
@@ -126,7 +126,6 @@ class NullDevice : public amd::Device {
//! Construct an HSAIL program object from the ELF assuming it is valid
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr);
const AMDDeviceInfo& deviceInfo() const { return deviceInfo_; }
// List of dummy functions which are disabled for NullDevice
@@ -232,8 +231,6 @@ class NullDevice : public amd::Device {
static bool destroyCompiler();
//! Handle to the the compiler
static Compiler* compilerHandle_;
//! Device Id for an HsaDevice
AMDDeviceInfo deviceInfo_;
private:
static constexpr bool offlineDevice_ = true;
@@ -515,6 +512,7 @@ class Device : public NullDevice {
std::vector<Device*> enabled_p2p_devices_; //!< List of user enabled P2P devices for this device
mutable std::mutex lock_allow_access_; //!< To serialize allow_access calls
hsa_agent_t _bkendDevice;
uint32_t pciDeviceId_;
hsa_agent_t* p2p_agents_list_;
hsa_profile_t agent_profile_;
hsa_amd_memory_pool_t group_segment_;
+1 -1
Просмотреть файл
@@ -1049,7 +1049,7 @@ bool Image::createInteropImage() {
}
if (obj->getGLTarget() == GL_TEXTURE_CUBE_MAP) {
desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipMajor_);
desc.setFace(obj->getCubemapFace(), dev().isa().versionMajor());
}
hsa_status_t err =
+2 -4
Просмотреть файл
@@ -234,9 +234,8 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
return true;
}
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) {
machineTarget_ = rocNullDevice().deviceInfo().machineTarget_;
}
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner)
: roc::Program(device, owner) {}
HSAILProgram::~HSAILProgram() {
#if defined(WITH_COMPILER_LIB)
@@ -440,7 +439,6 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
: roc::Program(device, owner) {
isLC_ = true;
isHIP_ = (owner.language() == amd::Program::HIP);
machineTarget_ = rocNullDevice().deviceInfo().machineTargetLC_;
}
bool LightningProgram::createBinary(amd::option::Options* options) {
+1 -1
Просмотреть файл
@@ -74,7 +74,7 @@ class Program : public device::Program {
);
virtual bool createBinary(amd::option::Options* options) = 0;
virtual const aclTargetInfo& info(const char* str = "") { return info_; }
virtual const aclTargetInfo& info() { return info_; }
protected:
//! Disable default copy constructor
+4 -2
Просмотреть файл
@@ -93,7 +93,8 @@ Settings::Settings() {
barrier_sync_ = (!flagIsDefault(ROC_BARRIER_SYNC)) ? ROC_BARRIER_SYNC : true;
}
bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups) {
bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, bool enableXNACK,
bool coop_groups) {
customHostAllocator_ = false;
if (fullProfile) {
@@ -105,7 +106,8 @@ bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coo
pinnedXferSize_ = std::max(pinnedXferSize_, pinnedMinXferSize_);
stagedXferSize_ = std::max(stagedXferSize_, pinnedMinXferSize_ + 4 * Ki);
}
enableXNACK_ = apuSystem_ ? 1 : 0 ; // enable xnack for APU system
enableXNACK_ = enableXNACK;
hsailExplicitXnack_ = enableXNACK;
// Enable extensions
enableExtension(ClKhrByteAddressableStore);
+2 -1
Просмотреть файл
@@ -89,7 +89,8 @@ class Settings : public device::Settings {
Settings();
//! Creates settings
bool create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups = false);
bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, bool enableXNACK,
bool coop_groups = false);
private:
//! Disable copy constructor