Update code object handling for GSL, PAL and ROCm
- Correct GSL path to report targets using the TargetID syntax. - Correct GSL path to check compatibility of code objects when loading. - Add concept of an device isa and create a registery used by ROCm, PAL and GSL. - Support XNACK and SRAMECC target features consistently for PAL and ROCm. - Correct logic for NullDevices and asserts to avoid memory coruption. - Allow all NullDevices to be created for HIP. - Numerous other code improvements. Change-Id: I40abf3d2b22249c1492d1af5919665f8184f4e0e
Этот коммит содержится в:
@@ -23,6 +23,11 @@
|
||||
#include "utils/options.hpp"
|
||||
#include "comgrctx.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
#if defined(WITH_HSA_DEVICE)
|
||||
#include "device/rocm/rocdevice.hpp"
|
||||
extern amd::AppProfile* rocCreateAppProfile();
|
||||
@@ -65,6 +70,11 @@ extern void DeviceUnload();
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr char hsaIsaNamePrefix[] = "amdgcn-amd-amdhsa--";
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace device {
|
||||
extern const char* BlitSourceCode;
|
||||
@@ -77,6 +87,166 @@ bool VirtualDevice::ActiveWait() const {
|
||||
|
||||
namespace amd {
|
||||
|
||||
std::pair<const Isa*, const Isa*> Isa::supportedIsas() {
|
||||
constexpr amd::Isa::Feature NONE = amd::Isa::Feature::Unsupported;
|
||||
constexpr amd::Isa::Feature ANY = amd::Isa::Feature::Any;
|
||||
constexpr amd::Isa::Feature OFF = amd::Isa::Feature::Disabled;
|
||||
constexpr amd::Isa::Feature ON = amd::Isa::Feature::Enabled;
|
||||
|
||||
static constexpr Isa supportedIsas_[] = {
|
||||
|
||||
// NOTE: Add new targets by adding rows for each permutation of the SRAMECC
|
||||
// and XNACK target feature values. If the target does not support the
|
||||
// feature then only NONE is used. If it supports the feature than include
|
||||
// rows for ANY, OFF and ON (but not NONE).
|
||||
//
|
||||
// Use the Target ID syntax. This comprises the processor name, followed by
|
||||
// the target feature settings in alphebetic order separated by ':'. If a
|
||||
// target feature is omitted it means either it is not supported, or it has
|
||||
// the ANY value. If the target feature is disabled then use a '-' suffix,
|
||||
// and if enabled use a '+' suffix.
|
||||
//
|
||||
// If the HSAIL or AMD IL compilers do not support the target, then use
|
||||
// nullptr for the ID.
|
||||
//
|
||||
// -------------------- Compiler -------------------- ------- Runtime ----- ---- IP ---- --- Target --- ---------- Target Properties ----------
|
||||
// Supported Version Features Mem
|
||||
// SIMD Channel LDS LDS
|
||||
// SIMD/ SIMD Instr Bank Size/ Mem
|
||||
// Target ID HSAIL ID AMD IL ID ROC PAL GSL Maj/Min/Stp SRAMECC XNACK CU Width Width Width CU Banks
|
||||
{"gfx600", "Tahiti", "Tahiti", false, false, true, 6, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx601", "Pitcairn", "Pitcairn", false, false, true, 6, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Capeverde
|
||||
{"gfx602", "Oland", "Oland", false, false, true, 6, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Hainan
|
||||
{"gfx700", "Kaveri", "Kalindi", true, true, true, 7, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Spectre, Spooky, Kalindi
|
||||
{"gfx701", "Hawaii", "Hawaii", true, true, true, 7, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaiipro
|
||||
{"gfx702", nullptr, nullptr, true, true, true, 7, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaii (can execute Hawiipro code)
|
||||
{"gfx703", nullptr, nullptr, false, false, false, 7, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Mullins
|
||||
{"gfx704", "Bonaire", "Bonaire", false, true, true, 7, 0, 4, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx705", "Mullins", "Mullins", false, true, true, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari
|
||||
{"gfx801", nullptr, nullptr, true, false, false, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx801:xnack-", "Carrizo", "Carrizo", true, true, true, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx801:xnack+", nullptr, nullptr, true, false, false, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx802", "Tonga", "Tonga", true, true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Iceland
|
||||
{"gfx803", "Fiji", "Fiji", true, true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Ellesmere/Polaris10, Baffin/Polaris11, Polaris12, Polaris22/VegaM
|
||||
{"gfx805", nullptr, nullptr, true, false, false, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Tongapro
|
||||
{"gfx810", nullptr, nullptr, true, false, false, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx810:xnack-", "Stoney", "Stoney", true, true, true, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx810:xnack+", nullptr, nullptr, true, false, false, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx900", "gfx901", nullptr, true, true, false, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Greenland
|
||||
{"gfx900:xnack-", "gfx900", nullptr, true, true, !IS_BRAHMA, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx900:xnack+", "gfx901", nullptr, true, true, false, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx902", "gfx903", nullptr, true, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven
|
||||
{"gfx902:xnack-", "gfx902", nullptr, true, true, !IS_BRAHMA, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx902:xnack+", "gfx903", nullptr, true, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx904", "gfx905", nullptr, true, true, false, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega12
|
||||
{"gfx904:xnack-", "gfx904", nullptr, true, true, !IS_BRAHMA, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx904:xnack+", "gfx905", nullptr, true, true, false, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906", "gfx907", nullptr, true, true, false, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega20
|
||||
{"gfx906:sramecc-", "gfx907", nullptr, true, true, !IS_BRAHMA & false, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:sramecc+", nullptr, nullptr, true, false, false, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:xnack-", "gfx906", nullptr, true, true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:xnack+", "gfx907", nullptr, true, true, false, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:sramecc-:xnack-", "gfx906", nullptr, true, true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:sramecc-:xnack+", "gfx907", nullptr, true, true, false, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:sramecc+:xnack-", nullptr, nullptr, true, false, false, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx906:sramecc+:xnack+", nullptr, nullptr, true, false, false, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:sramecc-", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:sramecc+", nullptr, nullptr, true, false, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:sramecc-:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:sramecc-:xnack+", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:sramecc+:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx908:sramecc+:xnack+", nullptr, nullptr, true, false, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx909", nullptr, nullptr, false, false, false, 9, 0, 9, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code)
|
||||
{"gfx909:xnack-", nullptr, nullptr, false, false, !IS_BRAHMA & false, 9, 0, 9, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx909:xnack+", nullptr, nullptr, false, false, false, 9, 0, 9, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx90c", nullptr, nullptr, false, false, false, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Renoir
|
||||
{"gfx90c:xnack-", nullptr, nullptr, false, false, !IS_BRAHMA & false, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx90c:xnack+", nullptr, nullptr, false, false, false, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1010", nullptr, nullptr, true, false, false, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1010:xnack-", "gfx1010", nullptr, true, true, false, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1010:xnack+", nullptr, nullptr, true, false, false, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1011", nullptr, nullptr, true, false, false, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1011:xnack-", "gfx1011", nullptr, true, true, false, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1011:xnack+", nullptr, nullptr, true, false, false, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1012", nullptr, nullptr, true, false, false, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1012:xnack-", "gfx1012", nullptr, true, true, false, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1012:xnack+", nullptr, nullptr, true, false, false, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1030", nullptr, nullptr, true, false, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1031", nullptr, nullptr, true, false, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1032", nullptr, nullptr, true, false, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32},
|
||||
{"gfx1033", nullptr, nullptr, false, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}
|
||||
};
|
||||
return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_));
|
||||
}
|
||||
|
||||
std::string Isa::processorName() const {
|
||||
std::string processor(targetId_);
|
||||
return processor.substr(0, processor.find(':'));
|
||||
}
|
||||
|
||||
std::string Isa::isaName() const {
|
||||
return std::string(hsaIsaNamePrefix) + targetId();
|
||||
}
|
||||
|
||||
bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) {
|
||||
if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
|
||||
codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
|
||||
codeObjectIsa.versionStepping() != agentIsa.versionStepping())
|
||||
return false;
|
||||
|
||||
assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&
|
||||
agentIsa.sramecc() != Feature::Any);
|
||||
if ((codeObjectIsa.sramecc() == Feature::Enabled ||
|
||||
codeObjectIsa.sramecc() == Feature::Disabled) &&
|
||||
codeObjectIsa.sramecc() != agentIsa.sramecc())
|
||||
return false;
|
||||
|
||||
assert(codeObjectIsa.isXnackSupported() == agentIsa.isXnackSupported() &&
|
||||
agentIsa.xnack() != Feature::Any);
|
||||
if ((codeObjectIsa.xnack() == Feature::Enabled || codeObjectIsa.xnack() == Feature::Disabled) &&
|
||||
codeObjectIsa.xnack() != agentIsa.xnack())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const Isa* Isa::findIsa(const char *isaName) {
|
||||
if (!isaName)
|
||||
return nullptr;
|
||||
const char* prefix = std::strstr(isaName, hsaIsaNamePrefix);
|
||||
if (prefix != isaName)
|
||||
return nullptr;
|
||||
const char *targetId = isaName + std::strlen(hsaIsaNamePrefix);
|
||||
auto supportedIsas_ = supportedIsas();
|
||||
auto isaIter = std::find_if(supportedIsas_.first, supportedIsas_.second, [&](const Isa& isa) {
|
||||
return std::strcmp(targetId, isa.targetId_) == 0;
|
||||
});
|
||||
return isaIter == supportedIsas_.second ? nullptr : isaIter;
|
||||
}
|
||||
|
||||
const Isa* Isa::findIsa(uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
|
||||
Isa::Feature sramecc, Isa::Feature xnack) {
|
||||
auto supportedIsas_ = supportedIsas();
|
||||
auto isaIter = std::find_if(supportedIsas_.first, supportedIsas_.second, [&](const Isa& isa) {
|
||||
return versionMajor == isa.versionMajor_ && versionMinor == isa.versionMinor_ &&
|
||||
versionStepping == isa.versionStepping_ &&
|
||||
(isa.sramecc_ == amd::Isa::Feature::Unsupported || isa.sramecc_ == sramecc) &&
|
||||
(isa.xnack_ == amd::Isa::Feature::Unsupported || isa.xnack_ == xnack);
|
||||
});
|
||||
return isaIter == supportedIsas_.second ? nullptr : isaIter;
|
||||
}
|
||||
|
||||
const Isa* Isa::begin() {
|
||||
return supportedIsas().first;
|
||||
}
|
||||
|
||||
const Isa* Isa::end() {
|
||||
return supportedIsas().second;
|
||||
}
|
||||
|
||||
std::vector<Device*>* Device::devices_ = nullptr;
|
||||
AppProfile Device::appProfile_;
|
||||
|
||||
@@ -300,8 +470,9 @@ bool Device::ValidateComgr() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Device::create() {
|
||||
bool Device::create(const Isa &isa) {
|
||||
assert(!vaCacheAccess_ && !vaCacheMap_);
|
||||
isa_ = &isa;
|
||||
vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
|
||||
if (nullptr == vaCacheAccess_) {
|
||||
return false;
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "acl.h"
|
||||
#include "hwdebug.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
@@ -79,6 +80,7 @@ class SvmUnmapMemoryCommand;
|
||||
class SvmPrefetchAsyncCommand;
|
||||
class TransferBufferFileCommand;
|
||||
class HwDebugManager;
|
||||
class Isa;
|
||||
class Device;
|
||||
struct KernelParameterDescriptor;
|
||||
struct Coord3D;
|
||||
@@ -408,7 +410,7 @@ struct Info : public amd::EmbeddedObject {
|
||||
//! Device name string
|
||||
char name_[0x40];
|
||||
|
||||
//! Target ID string
|
||||
//! Target triple plus target ID string
|
||||
char targetId_[0x40];
|
||||
|
||||
//! Vendor name string
|
||||
@@ -1238,6 +1240,214 @@ class MemObjMap : public AllStatic {
|
||||
static amd::Monitor AllocatedLock_; //!< amd monitor locker
|
||||
};
|
||||
|
||||
/// @brief Instruction Set Architecture properties.
|
||||
class Isa {
|
||||
public:
|
||||
|
||||
/// @brief Isa's target feature setting type.
|
||||
enum class Feature : uint8_t {
|
||||
Unsupported,
|
||||
Any,
|
||||
Disabled,
|
||||
Enabled,
|
||||
};
|
||||
|
||||
//! Return a non-zero uint64_t value that uniquely identifies the device.
|
||||
//! This can be used when a scalar value handle to the device is require.
|
||||
static uint64_t toHandle(const Isa *isa) {
|
||||
static_assert(reinterpret_cast<uint64_t>(static_cast<const Isa*>(nullptr)) == 0,
|
||||
"nullptr value is not 0");
|
||||
static_assert(sizeof(isa) <= sizeof(uint64_t), "Handle size does not match pointer size");
|
||||
return isa ? reinterpret_cast<uint64_t>(isa) : 0;
|
||||
}
|
||||
|
||||
//! Return the device corresponding to a handle returned by Isa::handle,
|
||||
//! or nullptr if the handle is 0. This can be used when a scalar value
|
||||
//! handle for a device is provided.
|
||||
static const Isa* fromHandle(uint64_t handle) {
|
||||
static_assert(reinterpret_cast<uint64_t>(static_cast<const Isa*>(nullptr)) == 0,
|
||||
"nullptr value is not 0");
|
||||
static_assert(sizeof(handle) <= sizeof(uint64_t), "Handle size does not match pointer size");
|
||||
return handle ? reinterpret_cast<const Isa*>(handle) : nullptr;
|
||||
}
|
||||
|
||||
/// @returns This Isa's target triple and target ID name.
|
||||
std::string isaName() const;
|
||||
|
||||
/// @returns This Isa's processor name.
|
||||
std::string processorName() const;
|
||||
|
||||
/// @returns This Isa's target ID name.
|
||||
const char *targetId() const {
|
||||
return targetId_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's name to use with the HSAIL compiler.
|
||||
const char *hsailName() const {
|
||||
return hsailId_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's name to use with the AMD IL compiler.
|
||||
const char *amdIlName() const {
|
||||
return amdIlId_;
|
||||
}
|
||||
|
||||
/// @returns If the ROCm runtime supports the ISA.
|
||||
bool runtimeRocSupported() const {
|
||||
return runtimeRocSupported_;
|
||||
}
|
||||
|
||||
/// @returns If the PAL runtime supports the ISA.
|
||||
bool runtimePalSupported() const {
|
||||
return runtimePalSupported_;
|
||||
}
|
||||
|
||||
/// @returns If the GSL runtime supports the ISA.
|
||||
bool runtimeGslSupported() const {
|
||||
return runtimeGslSupported_;
|
||||
}
|
||||
|
||||
/// @returns SRAM ECC feature status.
|
||||
const Feature &sramecc() const {
|
||||
return sramecc_;
|
||||
}
|
||||
|
||||
/// @returns XNACK feature status.
|
||||
const Feature &xnack() const {
|
||||
return xnack_;
|
||||
}
|
||||
|
||||
/// @returns True if SRAMECC feature is supported, false otherwise.
|
||||
bool isSrameccSupported() const {
|
||||
return sramecc_ != Feature::Unsupported;
|
||||
}
|
||||
|
||||
/// @returns True if XNACK feature is supported, false otherwise.
|
||||
bool isXnackSupported() const {
|
||||
return xnack_ != Feature::Unsupported;
|
||||
}
|
||||
|
||||
/// @returns This Isa's major version.
|
||||
uint32_t versionMajor() const {
|
||||
return versionMajor_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's minor version.
|
||||
uint32_t versionMinor() const {
|
||||
return versionMinor_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's stepping version.
|
||||
uint32_t versionStepping() const {
|
||||
return versionStepping_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's number of SIMDs per CU.
|
||||
uint32_t simdPerCU() const {
|
||||
return simdPerCU_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's
|
||||
uint32_t simdWidth() const {
|
||||
return simdWidth_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's number of instructions processed per SIMD.
|
||||
uint32_t simdInstructionWidth() const {
|
||||
return simdInstructionWidth_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's memory channel bank width.
|
||||
uint32_t memChannelBankWidth() const {
|
||||
return memChannelBankWidth_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's local memory size per CU.
|
||||
uint32_t localMemSizePerCU() const {
|
||||
return localMemSizePerCU_;
|
||||
}
|
||||
|
||||
/// @returns This Isa's number of banks of local memory.
|
||||
uint32_t localMemBanks() const {
|
||||
return localMemBanks_;
|
||||
}
|
||||
|
||||
/// @returns True if @p codeObjectIsa and @p agentIsa are compatible,
|
||||
/// false otherwise.
|
||||
static bool isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa);
|
||||
|
||||
/// @returns Isa for requested @p isaName, null pointer if not supported.
|
||||
static const Isa* findIsa(const char *isaName);
|
||||
|
||||
/// @returns Isa for requested @p version, null pointer if not supported.
|
||||
static const Isa* findIsa(uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
|
||||
Feature sramecc = Feature::Any, Feature xnack = Feature::Any);
|
||||
|
||||
/// @returns Iterator for first isa.
|
||||
static const Isa* begin();
|
||||
|
||||
/// @returns Iterator for one past the end isa.
|
||||
static const Isa* end();
|
||||
|
||||
private:
|
||||
|
||||
constexpr Isa(const char* targetId, const char* hsailId, const char* amdIlId,
|
||||
bool runtimeRocSupported, bool runtimePalSupported, bool runtimeGslSupported,
|
||||
uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping,
|
||||
Feature sramecc, Feature xnack, uint32_t simdPerCU, uint32_t simdWidth,
|
||||
uint32_t simdInstructionWidth, uint32_t memChannelBankWidth,
|
||||
uint32_t localMemSizePerCU, uint32_t localMemBanks)
|
||||
: targetId_(targetId),
|
||||
hsailId_(hsailId),
|
||||
amdIlId_(amdIlId),
|
||||
runtimeRocSupported_(runtimeRocSupported),
|
||||
runtimePalSupported_(runtimePalSupported),
|
||||
runtimeGslSupported_(runtimeGslSupported),
|
||||
versionMajor_(versionMajor),
|
||||
versionMinor_(versionMinor),
|
||||
versionStepping_(versionStepping),
|
||||
sramecc_(sramecc),
|
||||
xnack_(xnack),
|
||||
simdPerCU_(simdPerCU),
|
||||
simdWidth_(simdWidth),
|
||||
simdInstructionWidth_(simdInstructionWidth),
|
||||
memChannelBankWidth_(memChannelBankWidth),
|
||||
localMemSizePerCU_(localMemSizePerCU),
|
||||
localMemBanks_(localMemBanks) {}
|
||||
|
||||
// @brief Returns the begin and end iterators for the suppported ISAs.
|
||||
static std::pair<const Isa*, const Isa*> supportedIsas();
|
||||
|
||||
// @brief Isa's target ID name. Used for LLVM COde Object Manager
|
||||
// compilations.
|
||||
const char* targetId_;
|
||||
|
||||
// @brief Isa's HSAIL name. Used for the Compiler Library for HSAIL
|
||||
// compilation using the Shader Compiler Finalizer. Empty string if
|
||||
// unsupported.
|
||||
const char* hsailId_;
|
||||
|
||||
// @brief Isa's AMD IL name. Used for the Compiler Library for AMD IL
|
||||
// compilation using the Shader Compiler. Empty string if unsupported.
|
||||
const char* amdIlId_;
|
||||
|
||||
bool runtimeRocSupported_; //!< ROCm runtime is supported.
|
||||
bool runtimePalSupported_; //!< PAL runtime is supported.
|
||||
bool runtimeGslSupported_; //!< GSL runtime is supported.
|
||||
uint32_t versionMajor_; //!< Isa's major version.
|
||||
uint32_t versionMinor_; //!< Isa's minor version.
|
||||
uint32_t versionStepping_; //!< Isa's stepping version.
|
||||
Feature sramecc_; //!< SRAMECC feature.
|
||||
Feature xnack_; //!< XNACK feature.
|
||||
uint32_t simdPerCU_; //!< Number of SIMDs per CU.
|
||||
uint32_t simdWidth_; //!< Number of workitems processed per SIMD.
|
||||
uint32_t simdInstructionWidth_; //!< Number of instructions processed per SIMD.
|
||||
uint32_t memChannelBankWidth_; //!< Memory channel bank width.
|
||||
uint32_t localMemSizePerCU_; //!< Local memory size per CU.
|
||||
uint32_t localMemBanks_; //!< Number of banks of local memory.
|
||||
|
||||
}; // class Isa
|
||||
|
||||
/*! \addtogroup Runtime
|
||||
* @{
|
||||
*
|
||||
@@ -1300,7 +1510,7 @@ class Device : public RuntimeObject {
|
||||
virtual ~Device();
|
||||
|
||||
//! Initializes abstraction layer device object
|
||||
bool create();
|
||||
bool create(const Isa &isa);
|
||||
|
||||
uint retain() {
|
||||
// Overwrite the RuntimeObject::retain().
|
||||
@@ -1476,6 +1686,12 @@ class Device : public RuntimeObject {
|
||||
//! Returns TRUE if the device is available for computations
|
||||
bool isOnline() const { return online_; }
|
||||
|
||||
//! Returns device isa.
|
||||
const Isa &isa() const {
|
||||
assert(isa_);
|
||||
return *isa_;
|
||||
}
|
||||
|
||||
//! Return a non-zero uint64_t value that uniquely identifies the device.
|
||||
//! This can be used when a scalar value handle to the device is require.
|
||||
static uint64_t toHandle(const Device *device) {
|
||||
@@ -1611,6 +1827,7 @@ class Device : public RuntimeObject {
|
||||
static Memory* p2p_stage_; //!< Staging resources
|
||||
|
||||
private:
|
||||
const Isa *isa_; //!< Device isa
|
||||
bool IsTypeMatching(cl_device_type type, bool offlineDevices);
|
||||
|
||||
#if defined(WITH_HSA_DEVICE)
|
||||
|
||||
@@ -1166,7 +1166,7 @@ bool Kernel::SetAvailableSgprVgpr() {
|
||||
bool hasVgprMeta = false;
|
||||
|
||||
amd_comgr_status_t status = amd::Comgr::get_isa_metadata(
|
||||
prog().device().info().targetId_, &isaMeta);
|
||||
prog().device().isa().isaName().c_str(), &isaMeta);
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
hasIsaMeta = true;
|
||||
|
||||
@@ -82,7 +82,6 @@ Program::Program(amd::Device& device, amd::Program& owner)
|
||||
lastBuildOptionsArg_(),
|
||||
buildStatus_(CL_BUILD_NONE),
|
||||
buildError_(CL_SUCCESS),
|
||||
machineTarget_(nullptr),
|
||||
globalVariableTotalSize_(0),
|
||||
programOptions_(nullptr)
|
||||
{
|
||||
@@ -286,7 +285,7 @@ amd_comgr_status_t Program::createAction(const amd_comgr_language_t oclver,
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
status = amd::Comgr::action_info_set_isa_name(*action, device().info().targetId_);
|
||||
status = amd::Comgr::action_info_set_isa_name(*action, device().isa().isaName().c_str());
|
||||
}
|
||||
|
||||
if (status == AMD_COMGR_STATUS_SUCCESS) {
|
||||
@@ -719,8 +718,14 @@ bool Program::compileImplHSAIL(const std::string& sourceCode,
|
||||
acl_error errorCode;
|
||||
aclTargetInfo target;
|
||||
|
||||
std::string arch = LP64_SWITCH("hsail", "hsail64");
|
||||
target = aclGetTargetInfo(arch.c_str(), machineTarget_, &errorCode);
|
||||
const char* arch = LP64_SWITCH("hsail", "hsail64");
|
||||
const char* hsailName = device().isa().hsailName();
|
||||
if (!hsailName) {
|
||||
// HSAIL compiler does not support device's ISA.
|
||||
LogPrintfError("HSAIL compiler does not support %s", device().isa().targetId());
|
||||
return false;
|
||||
}
|
||||
target = aclGetTargetInfo(arch, hsailName, &errorCode);
|
||||
|
||||
// end if asic info is ready
|
||||
// We dump the source code for each program (param: headers)
|
||||
@@ -1107,7 +1112,7 @@ bool Program::linkImplLC(amd::option::Options* options) {
|
||||
linkOptions.push_back("correctly_rounded_sqrt");
|
||||
}
|
||||
if (options->oVariables->DenormsAreZero || AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
|
||||
(device().info().gfxipMajor_ < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) {
|
||||
(device().isa().versionMajor() < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) {
|
||||
linkOptions.push_back("daz_opt");
|
||||
}
|
||||
if (options->oVariables->FiniteMathOnly || options->oVariables->FastRelaxedMath) {
|
||||
@@ -1365,9 +1370,7 @@ bool Program::initBuild(amd::option::Options* options) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* devName = machineTarget_;
|
||||
options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu",
|
||||
clBinary()->getEncryptCode(), true);
|
||||
options->setPerBuildInfo(device().isa().targetId(), clBinary()->getEncryptCode(), true);
|
||||
|
||||
// Elf Binary setup
|
||||
std::string outFileName;
|
||||
@@ -1703,17 +1706,26 @@ int32_t Program::build(const std::string& sourceCode, const char* origOptions,
|
||||
|
||||
// ================================================================================================
|
||||
std::vector<std::string> Program::ProcessOptions(amd::option::Options* options) {
|
||||
std::string scratchStr;
|
||||
std::vector<std::string> optionsVec;
|
||||
|
||||
if (!isLC()) {
|
||||
optionsVec.push_back("-D__AMD__=1");
|
||||
|
||||
scratchStr.clear();
|
||||
optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("__=1"));
|
||||
std::string processorName = device().isa().processorName();
|
||||
const char* hsailName = device().isa().hsailName();
|
||||
const char* amdIlName = device().isa().amdIlName();
|
||||
|
||||
scratchStr.clear();
|
||||
optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("=1"));
|
||||
optionsVec.push_back(std::string("-D__") + processorName + "__=1");
|
||||
optionsVec.push_back(std::string("-D__") + processorName + "=1");
|
||||
if (hsailName && (strcmp(hsailName, processorName.c_str()) != 0)) {
|
||||
optionsVec.push_back(std::string("-D__") + hsailName + "__=1");
|
||||
optionsVec.push_back(std::string("-D__") + hsailName + "=1");
|
||||
}
|
||||
if (amdIlName && (strcmp(amdIlName, processorName.c_str()) != 0) &&
|
||||
(!hsailName || strcmp(amdIlName, hsailName) != 0)) {
|
||||
optionsVec.push_back(std::string("-D__") + amdIlName + "__=1");
|
||||
optionsVec.push_back(std::string("-D__") + amdIlName + "=1");
|
||||
}
|
||||
|
||||
// Set options for the standard device specific options
|
||||
// All our devices support these options now
|
||||
@@ -1785,8 +1797,7 @@ std::vector<std::string> Program::ProcessOptions(amd::option::Options* options)
|
||||
}
|
||||
} else {
|
||||
for (auto e : extensions) {
|
||||
scratchStr.clear();
|
||||
optionsVec.push_back(scratchStr.append("-D").append(e).append("=1"));
|
||||
optionsVec.push_back(std::string("-D") + e + "=1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,7 +117,6 @@ class Program : public amd::HeapObject {
|
||||
int32_t buildStatus_; //!< build status.
|
||||
int32_t buildError_; //!< build error
|
||||
|
||||
const char* machineTarget_; //!< Machine target for this program
|
||||
aclTargetInfo info_; //!< The info target for this binary.
|
||||
size_t globalVariableTotalSize_;
|
||||
amd::option::Options* programOptions_;
|
||||
@@ -233,9 +232,6 @@ class Program : public amd::HeapObject {
|
||||
const uint32_t codeObjectVer() const { return codeObjectVer_; }
|
||||
#endif
|
||||
|
||||
//! Get the machine target for the program
|
||||
const char* machineTarget() const { return machineTarget_; }
|
||||
|
||||
//! Check if program is HIP based
|
||||
const bool isHIP() const { return (isHIP_ == 1); }
|
||||
|
||||
@@ -293,7 +289,7 @@ class Program : public amd::HeapObject {
|
||||
void releaseClBinary();
|
||||
|
||||
//! return target info
|
||||
virtual const aclTargetInfo& info(const char* str = "") = 0;
|
||||
virtual const aclTargetInfo& info() = 0;
|
||||
|
||||
virtual bool setKernels(
|
||||
amd::option::Options* options, void* binary, size_t binSize,
|
||||
|
||||
@@ -66,7 +66,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) {
|
||||
if (platform == amd::Elf::COMPLIB_PLATFORM) {
|
||||
// BIF 3.0
|
||||
uint32_t flag;
|
||||
aclTargetInfo tgtInfo = aclGetTargetInfo("amdil", dev().hwInfo()->targetName_, NULL);
|
||||
aclTargetInfo tgtInfo = aclGetTargetInfo("amdil", dev().isa().amdIlName(), NULL);
|
||||
if (!elfIn()->getFlags(flag)) {
|
||||
LogError("The OCL binary image loading failed: incorrect format");
|
||||
return false;
|
||||
|
||||
@@ -80,157 +80,6 @@ static constexpr uint HsaSamplerObjectAlignment = 16;
|
||||
//! HSA path specific defines for images
|
||||
static constexpr uint DeviceQueueMaskSize = 32;
|
||||
|
||||
//! Defines all supported ASIC families
|
||||
enum AsicFamilies { Family7xx, Family8xx, FamilyTotal };
|
||||
|
||||
// FIXME: Change to use 2 digit for major/minor/stepping.
|
||||
enum gfx_handle {
|
||||
gfx600 = 600,
|
||||
gfx601 = 601,
|
||||
gfx602 = 602,
|
||||
gfx700 = 700,
|
||||
gfx701 = 701,
|
||||
gfx702 = 702,
|
||||
gfx704 = 704,
|
||||
gfx705 = 705,
|
||||
gfx800 = 800,
|
||||
gfx801 = 801,
|
||||
gfx802 = 802,
|
||||
gfx803 = 803,
|
||||
gfx810 = 810,
|
||||
gfx900 = 900,
|
||||
gfx902 = 902,
|
||||
gfx904 = 904,
|
||||
gfx906 = 906,
|
||||
gfx909 = 909,
|
||||
gfx90c = 9012
|
||||
};
|
||||
|
||||
// FIXME: Does this need updating?
|
||||
struct AMDDeviceInfo {
|
||||
uint machine_; //!< Machine target ID
|
||||
const char* targetName_; //!< Target name
|
||||
const char* machineTarget_; //!< Machine target
|
||||
uint simdPerCU_; //!< Number of SIMDs per CU
|
||||
uint simdWidth_; //!< Number of workitems processed per SIMD
|
||||
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemSizePerCU_; //!< Local memory size per CU
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
uint gfxipVersion_; //!< The core engine GFXIP version
|
||||
uint gfxipMajor_; //!< The core engine GFXIP Major version
|
||||
uint gfxipMinor_; //!< The core engine GFXIP Minor version
|
||||
uint gfxipStepping_; //!< The core engine GFXIP Stepping version
|
||||
};
|
||||
|
||||
static constexpr AMDDeviceInfo DeviceInfo[] = {
|
||||
// Machine targetName machineTarget
|
||||
/* CAL_TARGET_600 */ {ED_ATI_CAL_MACHINE_R600_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_610 */ {ED_ATI_CAL_MACHINE_R610_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_630 */ {ED_ATI_CAL_MACHINE_R630_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_670 */ {ED_ATI_CAL_MACHINE_R670_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_7XX */ {ED_ATI_CAL_MACHINE_R770_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_770 */ {ED_ATI_CAL_MACHINE_R770_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_710 */ {ED_ATI_CAL_MACHINE_R710_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_730 */ {ED_ATI_CAL_MACHINE_R730_ISA, "", "", 0, 0, 0, 0, 0, 0, 0},
|
||||
/* CAL_TARGET_CYPRESS */ {ED_ATI_CAL_MACHINE_CYPRESS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32,
|
||||
400, 4, 0, 0},
|
||||
/* CAL_TARGET_JUNIPER */ {ED_ATI_CAL_MACHINE_JUNIPER_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32,
|
||||
400, 4, 0, 0},
|
||||
/* CAL_TARGET_REDWOOD */ {ED_ATI_CAL_MACHINE_REDWOOD_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16,
|
||||
400, 4, 0, 0},
|
||||
/* CAL_TARGET_CEDAR */ {ED_ATI_CAL_MACHINE_CEDAR_ISA, "", "", 1, 8, 5, 256, 32 * Ki, 16, 400, 4, 0, 0},
|
||||
/* CAL_TARGET_SUMO */ {ED_ATI_CAL_MACHINE_SUMO_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16, 400, 4, 0, 0},
|
||||
/* CAL_TARGET_SUPERSUMO*/ {ED_ATI_CAL_MACHINE_SUPERSUMO_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16,
|
||||
400, 4, 0, 0},
|
||||
/* CAL_TARGET_WRESTLER*/ {ED_ATI_CAL_MACHINE_WRESTLER_ISA, "", "", 1, 8, 5, 256, 32 * Ki, 16,
|
||||
400, 4, 0, 0},
|
||||
/* CAL_TARGET_CAYMAN */ {ED_ATI_CAL_MACHINE_CAYMAN_ISA, "", "", 1, 16, 4, 256, 32 * Ki, 32,
|
||||
500, 5, 0, 0},
|
||||
/* CAL_TARGET_KAUAI */ {ED_ATI_CAL_MACHINE_KAUAI_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0},
|
||||
/* CAL_TARGET_BARTS */ {ED_ATI_CAL_MACHINE_BARTS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0},
|
||||
/* CAL_TARGET_TURKS */ {ED_ATI_CAL_MACHINE_TURKS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0},
|
||||
/* CAL_TARGET_CAICOS */ {ED_ATI_CAL_MACHINE_CAICOS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32,
|
||||
400, 4, 0, 0},
|
||||
/* CAL_TARGET_TAHITI */ {ED_ATI_CAL_MACHINE_TAHITI_ISA, "Tahiti", "tahiti", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx600, 6, 0, 0},
|
||||
/* CAL_TARGET_PITCAIRN */ {ED_ATI_CAL_MACHINE_PITCAIRN_ISA, "Pitcairn", "pitcairn", 4, 16, 1,
|
||||
256, 64 * Ki, 32, gfx601, 6, 0, 1},
|
||||
/* CAL_TARGET_CAPEVERDE */ {ED_ATI_CAL_MACHINE_CAPEVERDE_ISA, "Capeverde", "capeverde", 4, 16,
|
||||
1, 256, 64 * Ki, 32, gfx601, 6, 0, 1},
|
||||
/* CAL_TARGET_DEVASTATOR */ {ED_ATI_CAL_MACHINE_DEVASTATOR_ISA, "", "", 1, 16, 4, 256, 32 * Ki,
|
||||
32, 500, 5, 0, 0},
|
||||
/* CAL_TARGET_SCRAPPER */ {ED_ATI_CAL_MACHINE_SCRAPPER_ISA, "", "", 1, 16, 4, 256, 32 * Ki, 32,
|
||||
500, 5, 0, 0},
|
||||
/* CAL_TARGET_OLAND */ {ED_ATI_CAL_MACHINE_OLAND_ISA, "Oland", "oland", 4, 16, 1, 256, 64 * Ki,
|
||||
32, gfx602, 6, 0, 2},
|
||||
/* CAL_TARGET_BONAIRE */ {ED_ATI_CAL_MACHINE_BONAIRE_ISA, "Bonaire", "bonaire", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx704, 7, 0, 4},
|
||||
/* CAL_TARGET_SPECTRE */ {ED_ATI_CAL_MACHINE_SPECTRE_ISA, "Spectre", "spectre", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx700, 7, 0, 0},
|
||||
/* CAL_TARGET_SPOOKY */ {ED_ATI_CAL_MACHINE_SPOOKY_ISA, "Spooky", "spooky", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx700, 7, 0, 0},
|
||||
/* CAL_TARGET_KALINDI */ {ED_ATI_CAL_MACHINE_KALINDI_ISA, "Kalindi", "kalindi", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx700, 7, 0, 0},
|
||||
/* CAL_TARGET_HAINAN */ {ED_ATI_CAL_MACHINE_HAINAN_ISA, "Hainan", "hainan", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx602, 6, 0, 2},
|
||||
/* CAL_TARGET_HAWAII */ {ED_ATI_CAL_MACHINE_HAWAII_ISA, "Hawaii", "hawaii", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx701, 7, 0, 1},
|
||||
/* CAL_TARGET_ICELAND */ {ED_ATI_CAL_MACHINE_ICELAND_ISA, "Iceland", "iceland", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx802, 8, 0, 2},
|
||||
/* CAL_TARGET_TONGA */ {ED_ATI_CAL_MACHINE_TONGA_ISA, "Tonga", "tonga", 4, 16, 1, 256, 64 * Ki,
|
||||
32, gfx802, 8, 0, 2},
|
||||
/* CAL_TARGET_MULLINS */ {ED_ATI_CAL_MACHINE_GODAVARI_ISA, "Mullins", "mullins", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx705, 7, 0, 5},
|
||||
/* CAL_TARGET_FIJI */ {ED_ATI_CAL_MACHINE_FIJI_ISA, "Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32,
|
||||
gfx803, 8, 0, 3},
|
||||
/* CAL_TARGET_CARRIZO */ {ED_ATI_CAL_MACHINE_CARRIZO_ISA, "Carrizo", "carrizo", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx801, 8, 0, 1},
|
||||
/* CAL_TARGET_ELLESMERE */ {ED_ATI_CAL_MACHINE_ELLESMERE_ISA, "Ellesmere", "ellesmere", 4, 16,
|
||||
1, 256, 64 * Ki, 32, gfx803, 8, 0, 3},
|
||||
/* CAL_TARGET_BAFFIN */ {ED_ATI_CAL_MACHINE_BAFFIN_ISA, "Baffin", "baffin", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx803, 8, 0, 3},
|
||||
/* CAL_TARGET_GREENLAND */ {ED_ATI_CAL_MACHINE_GREENLAND_ISA, IF(IS_BRAHMA, "", "gfx900"),
|
||||
IF(IS_BRAHMA, "", "gfx900"), 4, 16, 1, 256, 64 * Ki, 32, gfx900, 9, 0, 0},
|
||||
/* CAL_TARGET_STONEY */ {ED_ATI_CAL_MACHINE_STONEY_ISA, "Stoney", "stoney", 4, 16, 1, 256,
|
||||
64 * Ki, 32, gfx810, 8, 1, 0},
|
||||
/* CAL_TARGET_LEXA */ {ED_ATI_CAL_MACHINE_LEXA_ISA, "gfx803", "gfx803", 4, 16, 1, 256, 64 * Ki,
|
||||
32, gfx803, 8, 0, 3},
|
||||
/* CAL_TARGET_RAVEN */ {ED_ATI_CAL_MACHINE_RAVEN_ISA, IF(IS_BRAHMA, "", "gfx902"),
|
||||
IF(IS_BRAHMA, "", "gfx902"), 4, 16, 1, 256, 64 * Ki, 32, gfx902, 9, 0, 2},
|
||||
/* CAL_TARGET_RAVEN2 */ {ED_ATI_CAL_MACHINE_RAVEN2_ISA, IF(IS_BRAHMA, "", "gfx909"),
|
||||
IF(IS_BRAHMA, "", "gfx902"), 4, 16, 1, 256, 64 * Ki, 32, gfx909, 9, 0, 9},
|
||||
/* CAL_TARGET_RENOIR */{ ED_ATI_CAL_MACHINE_RENOIR_ISA, IF(IS_BRAHMA, "", "gfx90c"),
|
||||
IF(IS_BRAHMA, "", "gfx90c"), 4, 16, 1, 256, 64 * Ki, 32, gfx90c, 9, 0, 12},
|
||||
/* CAL_TARGET_POLARIS22 */ {ED_ATI_CAL_MACHINE_POLARIS22_ISA, IF(IS_BRAHMA, "", "gfx803"),
|
||||
IF(IS_BRAHMA, "", "gfx803"), 4, 16, 1, 256, 64 * Ki, 32, gfx803, 8, 0, 3},
|
||||
/* CAL_TARGET_VEGA12 */{ ED_ATI_CAL_MACHINE_VEGA12_ISA, IF(IS_BRAHMA, "", "gfx904"),
|
||||
IF(IS_BRAHMA, "", "gfx904"), 4, 16, 1, 256, 64 * Ki, 32, gfx904, 9, 0, 4},
|
||||
/* CAL_TARGET_VEGA20 */{ ED_ATI_CAL_MACHINE_VEGA20_ISA, IF(IS_BRAHMA, "", "gfx906"),
|
||||
IF(IS_BRAHMA, "", "gfx906"), 4, 16, 1, 256, 64 * Ki, 32, gfx906, 9, 0, 6 },
|
||||
};
|
||||
|
||||
// FIXME: These need updating to new Target ID format. Or is all this code nw
|
||||
// obsolete and should be deleted? How is XNACK and SRAMECC settings supported?
|
||||
static constexpr const char* Gfx600 = "amdgcn-amd-amdhsa--gfx600";
|
||||
static constexpr const char* Gfx601 = "amdgcn-amd-amdhsa--gfx601";
|
||||
static constexpr const char* Gfx602 = "amdgcn-amd-amdhsa--gfx602";
|
||||
static constexpr const char* Gfx700 = "amdgcn-amd-amdhsa--gfx700";
|
||||
static constexpr const char* Gfx701 = "amdgcn-amd-amdhsa--gfx701";
|
||||
static constexpr const char* Gfx702 = "amdgcn-amd-amdhsa--gfx702";
|
||||
static constexpr const char* Gfx704 = "amdgcn-amd-amdhsa--gfx704";
|
||||
static constexpr const char* Gfx705 = "amdgcn-amd-amdhsa--gfx705";
|
||||
static constexpr const char* Gfx801 = "amdgcn-amd-amdhsa--gfx801:xnack+";
|
||||
static constexpr const char* Gfx802 = "amdgcn-amd-amdhsa--gfx802";
|
||||
static constexpr const char* Gfx803 = "amdgcn-amd-amdhsa--gfx803";
|
||||
static constexpr const char* Gfx810 = "amdgcn-amd-amdhsa--gfx810:xnack+";
|
||||
static constexpr const char* Gfx900 = "amdgcn-amd-amdhsa--gfx900:xnack-";
|
||||
static constexpr const char* Gfx902 = "amdgcn-amd-amdhsa--gfx902:xnack+";
|
||||
static constexpr const char* Gfx904 = "amdgcn-amd-amdhsa--gfx904:xnack-";
|
||||
static constexpr const char* Gfx906 = "amdgcn-amd-amdhsa--gfx906:xnack-";
|
||||
static constexpr const char* Gfx909 = "amdgcn-amd-amdhsa--gfx909:xnack+";
|
||||
static constexpr const char* Gfx90c = "amdgcn-amd-amdhsa--gfx90c:xnack+";
|
||||
|
||||
// Supported OpenCL versions
|
||||
enum OclVersion { OpenCL10, OpenCL11, OpenCL12, OpenCL20, OpenCL21 };
|
||||
|
||||
|
||||
@@ -58,6 +58,93 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace {
|
||||
|
||||
//! Define the mapping from CAL asic enumeration values to the
|
||||
//! compiler gfx major/minor/stepping version.
|
||||
struct CalDevice {
|
||||
uint32_t gfxipMajor_; //!< The core engine GFXIP Major version
|
||||
uint32_t gfxipMinor_; //!< The core engine GFXIP Minor version
|
||||
uint32_t gfxipStepping_; //!< The core engine GFXIP Stepping version
|
||||
CALMachineType calMachine_; //!< CAL machine type
|
||||
const char* calName_; //!< CAL device name
|
||||
CALtarget calTarget_; //!< CAL target
|
||||
bool preferPal_; //!< Prefer to use PAL if GPU_ENABLE_PAL=2
|
||||
bool nullUseDouble_; //!< Use double precision for a NullDevice
|
||||
bool nullUseOpenCL200_; //!< Use OpenCL 2.0 for a NullDevice
|
||||
};
|
||||
|
||||
static constexpr CalDevice supportedCalDevices[] = {
|
||||
// Prefer - NullDevice -
|
||||
// GFX Version GSL Machine CAL Name CAL Target PAL double OCL200
|
||||
{6, 0, 0, ED_ATI_CAL_MACHINE_TAHITI_ISA, "Tahiti", CAL_TARGET_TAHITI, false, true, false},
|
||||
{6, 0, 1, ED_ATI_CAL_MACHINE_PITCAIRN_ISA, "Pitcairn", CAL_TARGET_PITCAIRN, false, true, false},
|
||||
{6, 0, 1, ED_ATI_CAL_MACHINE_CAPEVERDE_ISA, "Capeverde", CAL_TARGET_CAPEVERDE, false, true, false},
|
||||
{6, 0, 2, ED_ATI_CAL_MACHINE_OLAND_ISA, "Oland", CAL_TARGET_OLAND, false, true, false},
|
||||
{6, 0, 2, ED_ATI_CAL_MACHINE_HAINAN_ISA, "Hainan", CAL_TARGET_HAINAN, false, true, false},
|
||||
{7, 0, 0, ED_ATI_CAL_MACHINE_KALINDI_ISA, "Kalindi", CAL_TARGET_KALINDI, false, true, true },
|
||||
{7, 0, 0, ED_ATI_CAL_MACHINE_SPECTRE_ISA, "Spectre", CAL_TARGET_SPECTRE, false, true, true },
|
||||
{7, 0, 0, ED_ATI_CAL_MACHINE_SPOOKY_ISA, "Spooky", CAL_TARGET_SPOOKY, false, true, true },
|
||||
{7, 0, 2, ED_ATI_CAL_MACHINE_HAWAII_ISA, "Hawaii", CAL_TARGET_HAWAII, false, true, true }, // Also Hawaiipro (generated code is for Hawaiipro)
|
||||
{7, 0, 4, ED_ATI_CAL_MACHINE_BONAIRE_ISA, "Bonaire", CAL_TARGET_BONAIRE, false, true, true },
|
||||
{7, 0, 5, ED_ATI_CAL_MACHINE_GODAVARI_ISA, "Mullins", CAL_TARGET_GODAVARI, false, true, true }, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins.
|
||||
{8, 0, 1, ED_ATI_CAL_MACHINE_CARRIZO_ISA, "Carrizo", CAL_TARGET_CARRIZO, false, true, true }, // Also Bristol Ridge
|
||||
{8, 0, 2, ED_ATI_CAL_MACHINE_ICELAND_ISA, "Iceland", CAL_TARGET_ICELAND, false, true, true },
|
||||
{8, 0, 2, ED_ATI_CAL_MACHINE_TONGA_ISA, "Tonga", CAL_TARGET_TONGA, false, true, true }, // Also Tongapro (generated code is for Tonga)
|
||||
{8, 0, 3, ED_ATI_CAL_MACHINE_FIJI_ISA, "Fiji", CAL_TARGET_FIJI, false, true, true },
|
||||
{8, 0, 3, ED_ATI_CAL_MACHINE_ELLESMERE_ISA, "Ellesmere", CAL_TARGET_ELLESMERE, false, true, true }, // Polaris10
|
||||
{8, 0, 3, ED_ATI_CAL_MACHINE_BAFFIN_ISA, "Baffin", CAL_TARGET_BAFFIN, false, true, true }, // Polaris11
|
||||
{8, 0, 3, ED_ATI_CAL_MACHINE_LEXA_ISA, "gfx803", CAL_TARGET_LEXA, false, true, true }, // Polaris12
|
||||
#if !defined(BRAHMA)
|
||||
{8, 0, 3, ED_ATI_CAL_MACHINE_POLARIS22_ISA, "gfx803", CAL_TARGET_POLARIS22, false, true, true },
|
||||
#endif
|
||||
{8, 1, 0, ED_ATI_CAL_MACHINE_STONEY_ISA, "Stoney", CAL_TARGET_STONEY, false, true, true },
|
||||
#if !defined(BRAHMA)
|
||||
{9, 0, 0, ED_ATI_CAL_MACHINE_GREENLAND_ISA, "gfx900", CAL_TARGET_GREENLAND, true, true, true }, // Vega10
|
||||
{9, 0, 2, ED_ATI_CAL_MACHINE_RAVEN_ISA, "gfx902", CAL_TARGET_RAVEN, true, true, true },
|
||||
{9, 0, 4, ED_ATI_CAL_MACHINE_VEGA12_ISA, "gfx904", CAL_TARGET_VEGA12, true, true, true },
|
||||
{9, 0, 6, ED_ATI_CAL_MACHINE_VEGA20_ISA, "gfx906", CAL_TARGET_VEGA20, true, true, true },
|
||||
{9, 0, 9, ED_ATI_CAL_MACHINE_RAVEN2_ISA, "gfx909", CAL_TARGET_RAVEN2, true, true, true },
|
||||
{9, 0, 12, ED_ATI_CAL_MACHINE_RENOIR_ISA, "gfx90c", CAL_TARGET_RENOIR, true, true, true },
|
||||
#endif
|
||||
};
|
||||
static_assert(CAL_TARGET_LAST == CAL_TARGET_VEGA20, "Add new CAL targets to mapping");
|
||||
|
||||
static std::tuple<const amd::Isa*, CALMachineType, const char*, bool, bool, bool> findIsa(
|
||||
CALtarget calTarget, bool sramecc, bool xnack) {
|
||||
auto calDeviceIter =
|
||||
std::find_if(std::begin(supportedCalDevices), std::end(supportedCalDevices),
|
||||
[&](const CalDevice& calDevice) { return calDevice.calTarget_ == calTarget; });
|
||||
if (calDeviceIter == std::end(supportedCalDevices)) {
|
||||
return std::make_tuple(nullptr, static_cast<CALMachineType>(0), nullptr, false, false, false);
|
||||
}
|
||||
const amd::Isa* isa = amd::Isa::findIsa(
|
||||
calDeviceIter->gfxipMajor_, calDeviceIter->gfxipMinor_, calDeviceIter->gfxipStepping_,
|
||||
sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled,
|
||||
xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled);
|
||||
return std::make_tuple(isa, calDeviceIter->calMachine_, calDeviceIter->calName_,
|
||||
calDeviceIter->preferPal_, calDeviceIter->nullUseDouble_,
|
||||
calDeviceIter->nullUseOpenCL200_);
|
||||
}
|
||||
|
||||
static std::tuple<bool, CALMachineType, CALtarget, const char*, bool, bool, bool> findCal(
|
||||
uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxipStepping) {
|
||||
auto calDeviceIter = std::find_if(std::begin(supportedCalDevices), std::end(supportedCalDevices),
|
||||
[&](const CalDevice& calDevice) {
|
||||
return calDevice.gfxipMajor_ == gfxipMajor &&
|
||||
calDevice.gfxipMinor_ == gfxipMinor &&
|
||||
calDevice.gfxipStepping_ == gfxipStepping;
|
||||
});
|
||||
if (calDeviceIter == std::end(supportedCalDevices)) {
|
||||
return std::make_tuple(false, static_cast<CALMachineType>(0), static_cast<CALtarget>(0),
|
||||
nullptr, false, false, false);
|
||||
}
|
||||
return std::make_tuple(true, calDeviceIter->calMachine_, calDeviceIter->calTarget_,
|
||||
calDeviceIter->calName_, calDeviceIter->preferPal_,
|
||||
calDeviceIter->nullUseDouble_, calDeviceIter->nullUseOpenCL200_);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool DeviceLoad() {
|
||||
bool ret = false;
|
||||
@@ -79,126 +166,102 @@ aclCompiler* NullDevice::hsaCompiler_;
|
||||
AppProfile Device::appProfile_;
|
||||
|
||||
NullDevice::NullDevice()
|
||||
: amd::Device(), calTarget_(static_cast<CALtarget>(0)), hwInfo_(NULL) {}
|
||||
: amd::Device(),
|
||||
calTarget_(static_cast<CALtarget>(0)),
|
||||
calMachine_(static_cast<CALMachineType>(0)),
|
||||
calName_(nullptr) {}
|
||||
|
||||
bool NullDevice::init() {
|
||||
std::vector<Device*> devices;
|
||||
|
||||
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
|
||||
// Loop through all supported devices and create each of them
|
||||
for (uint id = CAL_TARGET_TAHITI; id <= CAL_TARGET_LAST; ++id) {
|
||||
bool foundActive = false;
|
||||
bool foundDuplicate = false;
|
||||
|
||||
if (gpu::DeviceInfo[id].targetName_[0] == '\0') {
|
||||
// Create offline devices for all ISAs not already associated with an online
|
||||
// device. This allows code objects to be compiled for all supported ISAs.
|
||||
std::vector<Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) {
|
||||
if (!isa->runtimeGslSupported()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Loop through all active devices and see if we match one
|
||||
for (uint i = 0; i < devices.size(); ++i) {
|
||||
if (static_cast<NullDevice*>(devices[i])->calTarget() == static_cast<CALtarget>(id)) {
|
||||
foundActive = true;
|
||||
bool isOnline = false;
|
||||
// Check if the particular device is online
|
||||
for (size_t i = 0; i < devices.size(); i++) {
|
||||
if (&(devices[i]->isa()) == isa) {
|
||||
isOnline = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't report an offline device if it's active
|
||||
if (foundActive) {
|
||||
if (isOnline) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Loop through all previous devices in the DeviceInfo list and compare them with the
|
||||
// current entry to see if the current entry was listed previously in the DeviceInfo,
|
||||
// if so, then it means the current entry already has been added in the offline device list
|
||||
for (uint j = 0; j < id; ++j) {
|
||||
if (gpu::DeviceInfo[j].targetName_[0] == '\0') {
|
||||
continue;
|
||||
}
|
||||
if (strcmp(gpu::DeviceInfo[j].targetName_, gpu::DeviceInfo[id].targetName_) == 0) {
|
||||
foundDuplicate = true;
|
||||
break;
|
||||
}
|
||||
bool found;
|
||||
CALMachineType calMachine;
|
||||
CALtarget calTarget;
|
||||
const char* calName;
|
||||
bool preferPal;
|
||||
bool nullUseDouble;
|
||||
bool nullUseOpenCL200;
|
||||
std::tie(found, calMachine, calTarget, calName, preferPal, nullUseDouble, nullUseOpenCL200) =
|
||||
findCal(isa->versionMajor(), isa->versionMinor(), isa->versionStepping());
|
||||
if (!found) {
|
||||
// GSL does not support this asic.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't report an offline device twice
|
||||
if (foundDuplicate) {
|
||||
continue;
|
||||
std::unique_ptr<NullDevice> nullDevice(new NullDevice());
|
||||
if (!nullDevice) {
|
||||
LogPrintfError("Error allocating new instance of offline CAL Device %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
NullDevice* dev = new NullDevice();
|
||||
if (NULL != dev) {
|
||||
if (!dev->create(static_cast<CALtarget>(id))) {
|
||||
delete dev;
|
||||
} else {
|
||||
dev->registerDevice();
|
||||
}
|
||||
if (!nullDevice->create(calName, *isa, calTarget, preferPal, nullUseDouble, nullUseOpenCL200)) {
|
||||
// Skip over unsupported devices
|
||||
LogPrintfError("Skipping creating new instance of offline CAL Device %s", isa->targetId());
|
||||
continue;
|
||||
}
|
||||
nullDevice.release()->registerDevice();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NullDevice::create(CALtarget target) {
|
||||
CALdeviceattribs calAttr = {0};
|
||||
gslMemInfo memInfo = {0};
|
||||
|
||||
online_ = false;
|
||||
|
||||
calTarget_ = calAttr.target = target;
|
||||
hwInfo_ = &DeviceInfo[calTarget_];
|
||||
|
||||
assert((target >= CAL_TARGET_TAHITI) && (target != CAL_TARGET_SCRAPPER) &&
|
||||
(target != CAL_TARGET_DEVASTATOR));
|
||||
|
||||
if ((GPU_ENABLE_PAL == 2) && usePal()) {
|
||||
bool NullDevice::create(const char* calName, const amd::Isa& isa, CALtarget target,
|
||||
bool preferPal, bool doublePrecision, bool openCL200) {
|
||||
if (!isa.runtimeGslSupported()) {
|
||||
LogPrintfError("Offline CAL device %s is not supported", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
if ((GPU_ENABLE_PAL == 2) && isa.runtimePalSupported() && preferPal) {
|
||||
LogPrintfError("Skipping as GPU_ENABLE_PAL=2 indicating to use PAL for offline CAL device %s",
|
||||
isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
online_ = false;
|
||||
calTarget_ = target;
|
||||
calName_ = calName;
|
||||
|
||||
// sets up vaCacheAccess_ and vaCacheMap_.
|
||||
if (!amd::Device::create(isa)) {
|
||||
LogPrintfError("Unable to setup offline device for CAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
CALdeviceattribs calAttr = {0};
|
||||
calAttr.target = calTarget();
|
||||
// Force double if it could be supported
|
||||
switch (target) {
|
||||
case CAL_TARGET_PITCAIRN:
|
||||
case CAL_TARGET_CAPEVERDE:
|
||||
case CAL_TARGET_TAHITI:
|
||||
case CAL_TARGET_OLAND:
|
||||
case CAL_TARGET_HAINAN:
|
||||
calAttr.doublePrecision = CAL_TRUE;
|
||||
break;
|
||||
case CAL_TARGET_BONAIRE:
|
||||
case CAL_TARGET_SPECTRE:
|
||||
case CAL_TARGET_SPOOKY:
|
||||
case CAL_TARGET_KALINDI:
|
||||
case CAL_TARGET_HAWAII:
|
||||
case CAL_TARGET_ICELAND:
|
||||
case CAL_TARGET_TONGA:
|
||||
case CAL_TARGET_FIJI:
|
||||
case CAL_TARGET_GODAVARI:
|
||||
case CAL_TARGET_CARRIZO:
|
||||
case CAL_TARGET_ELLESMERE:
|
||||
case CAL_TARGET_BAFFIN:
|
||||
case CAL_TARGET_GREENLAND:
|
||||
case CAL_TARGET_STONEY:
|
||||
case CAL_TARGET_LEXA:
|
||||
case CAL_TARGET_RAVEN:
|
||||
case CAL_TARGET_RAVEN2:
|
||||
case CAL_TARGET_RENOIR:
|
||||
case CAL_TARGET_POLARIS22:
|
||||
case CAL_TARGET_VEGA12:
|
||||
case CAL_TARGET_VEGA20:
|
||||
calAttr.doublePrecision = CAL_TRUE;
|
||||
calAttr.isOpenCL200Device = CAL_TRUE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
if (doublePrecision) {
|
||||
calAttr.doublePrecision = CAL_TRUE;
|
||||
}
|
||||
// Use OpenCL 2.0 if supported
|
||||
if (openCL200) {
|
||||
calAttr.isOpenCL200Device = CAL_TRUE;
|
||||
}
|
||||
|
||||
settings_ = new gpu::Settings();
|
||||
gpu::Settings* gpuSettings = reinterpret_cast<gpu::Settings*>(settings_);
|
||||
// Create setting for the offline target
|
||||
if ((gpuSettings == NULL) || !gpuSettings->create(calAttr)) {
|
||||
LogPrintfError("GPU settings failed for offline device for CAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
gslMemInfo memInfo = {0};
|
||||
// Report 512MB for all offline devices
|
||||
memInfo.cardMemAvailableBytes = 512 * Mi;
|
||||
memInfo.cardLargestFreeBlockBytes = 512 * Mi;
|
||||
@@ -243,7 +306,7 @@ bool NullDevice::create(CALtarget target) {
|
||||
acl_error error;
|
||||
hsaCompiler_ = aclCompilerInit(&opts, &error);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Error initializing the compiler");
|
||||
LogPrintfError("Error initializing the compiler for offline CAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -494,14 +557,11 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
|
||||
|
||||
info_.platform_ = AMD_PLATFORM;
|
||||
|
||||
if ((calTarget() == CAL_TARGET_CARRIZO) && ASICREV_IS_CARRIZO_BRISTOL(calAttr.asicRevision)) {
|
||||
const static char* bristol = "Bristol Ridge";
|
||||
::strncpy(info_.name_, bristol, sizeof(info_.name_) - 1);
|
||||
} else {
|
||||
::strncpy(info_.name_, hwInfo()->targetName_, sizeof(info_.name_) - 1);
|
||||
}
|
||||
::strncpy(info_.name_, calName_, sizeof(info_.name_) - 1);
|
||||
::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1);
|
||||
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
|
||||
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING);
|
||||
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (GSL)%s",
|
||||
isOnline() ? "" : " [Offline]");
|
||||
|
||||
info_.profile_ = "FULL_PROFILE";
|
||||
if (settings().oclVersion_ >= OpenCL20) {
|
||||
@@ -584,19 +644,19 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
|
||||
info_.deviceTopology_.pcie.device = (calAttr.pciTopologyInformation & (0x1F << 3)) >> 3;
|
||||
info_.deviceTopology_.pcie.function = (calAttr.pciTopologyInformation & 0x07);
|
||||
|
||||
info_.simdPerCU_ = hwInfo()->simdPerCU_;
|
||||
info_.simdPerCU_ = isa().simdPerCU();
|
||||
info_.cuPerShaderArray_ = calAttr.numberOfCUsperShaderArray;
|
||||
info_.simdWidth_ = hwInfo()->simdWidth_;
|
||||
info_.simdInstructionWidth_ = hwInfo()->simdInstructionWidth_;
|
||||
info_.simdWidth_ = isa().simdWidth();
|
||||
info_.simdInstructionWidth_ = isa().simdInstructionWidth();
|
||||
info_.wavefrontWidth_ = calAttr.wavefrontSize;
|
||||
|
||||
info_.globalMemChannelBanks_ = calAttr.numMemBanks;
|
||||
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
|
||||
info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_;
|
||||
info_.localMemBanks_ = hwInfo()->localMemBanks_;
|
||||
info_.gfxipMajor_ = hwInfo()->gfxipMajor_;
|
||||
info_.gfxipMinor_ = hwInfo()->gfxipMinor_;
|
||||
info_.gfxipStepping_ = hwInfo()->gfxipStepping_;
|
||||
info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth();
|
||||
info_.localMemSizePerCU_ = isa().localMemSizePerCU();
|
||||
info_.localMemBanks_ = isa().localMemBanks();
|
||||
info_.gfxipMajor_ = isa().versionMajor();
|
||||
info_.gfxipMinor_ = isa().versionMinor();
|
||||
info_.gfxipStepping_ = isa().versionStepping();
|
||||
|
||||
info_.numAsyncQueues_ = numComputeRings;
|
||||
|
||||
@@ -607,7 +667,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf
|
||||
|
||||
info_.pcieDeviceId_ = calAttr.pcieDeviceID;
|
||||
info_.pcieRevisionId_ = calAttr.pcieRevisionID;
|
||||
info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * hwInfo()->simdPerCU_ * 10;
|
||||
info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * isa().simdPerCU() * 10;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -849,10 +909,6 @@ Device::~Device() {
|
||||
extern const char* SchedulerSourceCode;
|
||||
|
||||
bool Device::create(CALuint ordinal, CALuint numOfDevices) {
|
||||
if (!amd::Device::create()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
appProfile_.init();
|
||||
|
||||
bool smallMemSystem = false;
|
||||
@@ -882,19 +938,40 @@ bool Device::create(CALuint ordinal, CALuint numOfDevices) {
|
||||
|
||||
// Update CAL target
|
||||
calTarget_ = getAttribs().target;
|
||||
hwInfo_ = &DeviceInfo[calTarget_];
|
||||
|
||||
if ((GPU_ENABLE_PAL == 2) && usePal()) {
|
||||
// XNACK should be set for PageMigration or IOMMUv2 support.
|
||||
bool isXNACKSupported = false;
|
||||
|
||||
// SRAMECC should be set for ecc protected GPRs.
|
||||
bool isSRAMECCSupported = false;
|
||||
|
||||
const amd::Isa* isa;
|
||||
bool preferPal;
|
||||
std::tie(isa, calMachine_, calName_, preferPal, std::ignore, std::ignore) =
|
||||
findIsa(calTarget(), isSRAMECCSupported, isXNACKSupported);
|
||||
|
||||
if ((calTarget() == CAL_TARGET_CARRIZO) && ASICREV_IS_CARRIZO_BRISTOL(getAttribs().asicRevision)) {
|
||||
calName_ = "Bristol Ridge";
|
||||
}
|
||||
|
||||
if (!isa) {
|
||||
LogPrintfError("Unsupported CAL device #%d", calTarget());
|
||||
return false;
|
||||
}
|
||||
if (!isa->runtimeGslSupported()) {
|
||||
LogPrintfError("Unsupported CAL device with ISA %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
if ((GPU_ENABLE_PAL == 2) && isa->runtimePalSupported() && preferPal) {
|
||||
LogPrintfError("Skipping as GPU_ENABLE_PAL=2 indicating to use PAL for CAL device %s",
|
||||
isa->targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(BRAHMA)
|
||||
if (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN ||
|
||||
calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_POLARIS22 ||
|
||||
calTarget_ == CAL_TARGET_RENOIR) {
|
||||
if (!amd::Device::create(*isa)) {
|
||||
LogPrintfError("Unable to setup device for CAL device %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Creates device settings
|
||||
settings_ = new gpu::Settings();
|
||||
|
||||
@@ -67,7 +67,12 @@ class NullDevice : public amd::Device {
|
||||
NullDevice();
|
||||
|
||||
//! Creates an offline device with the specified target
|
||||
bool create(CALtarget target //!< GPU device identifier
|
||||
bool create(const char* calName, //!< GPU device name
|
||||
const amd::Isa& isa, //!< GPU device isa
|
||||
CALtarget target, //!< GPU device identifier
|
||||
bool preferPal, //!< GPU prefer to use PAL if GPU_ENABLE_PAL=2
|
||||
bool doublePrecision, //!< Use double precision
|
||||
bool openCL200 //!< Use OpenCL 2.0
|
||||
);
|
||||
|
||||
//! Instantiate a new virtual device
|
||||
@@ -115,7 +120,7 @@ class NullDevice : public amd::Device {
|
||||
|
||||
CALtarget calTarget() const { return calTarget_; }
|
||||
|
||||
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
|
||||
CALMachineType calMachine() const { return calMachine_; }
|
||||
|
||||
//! Empty implementation on Null device
|
||||
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
|
||||
@@ -131,12 +136,6 @@ class NullDevice : public amd::Device {
|
||||
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
|
||||
|
||||
protected:
|
||||
bool usePal() const {
|
||||
return (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN ||
|
||||
calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_RENOIR ||
|
||||
calTarget_ >= CAL_TARGET_VEGA12);
|
||||
}
|
||||
|
||||
//! Answer the question: "Should HSAIL Program be created?",
|
||||
//! based on the given options.
|
||||
bool isHsailProgram(amd::option::Options* options = NULL);
|
||||
@@ -150,7 +149,8 @@ class NullDevice : public amd::Device {
|
||||
);
|
||||
|
||||
CALtarget calTarget_; //!< GPU device identifier
|
||||
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
|
||||
CALMachineType calMachine_; //!< GPU machine identifier
|
||||
const char* calName_; //!< GPU device name
|
||||
};
|
||||
|
||||
//! Forward declarations
|
||||
|
||||
@@ -643,11 +643,8 @@ bool NullKernel::create(const std::string& code, const std::string& metadata,
|
||||
|
||||
if ((binaryCode == NULL) && (binarySize == 0) && !code.empty()) {
|
||||
acl_error err;
|
||||
std::string arch = "amdil";
|
||||
if (nullDev().settings().use64BitPtr_) {
|
||||
arch += "64";
|
||||
}
|
||||
aclTargetInfo info = aclGetTargetInfo(arch.c_str(), nullDev().hwInfo()->targetName_, &err);
|
||||
aclTargetInfo info = aclGetTargetInfo(nullDev().settings().use64BitPtr_ ? "amdil64" : "amdil",
|
||||
nullDev().isa().amdIlName(), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
return false;
|
||||
@@ -1007,7 +1004,7 @@ bool NullKernel::createMultiBinary(uint* imageSize, void** image, const void* is
|
||||
constBuffers[constBufferCount++].index = nullProg().glbCb()[i];
|
||||
}
|
||||
|
||||
encoding.machine = nullDev().hwInfo()->machine_;
|
||||
encoding.machine = nullDev().calMachine();
|
||||
encoding.type = ED_ATI_CAL_TYPE_COMPUTE;
|
||||
encoding.inputCount = inputResourceCount;
|
||||
encoding.outputCount = outputCount;
|
||||
|
||||
@@ -41,14 +41,10 @@
|
||||
|
||||
namespace gpu {
|
||||
|
||||
const aclTargetInfo& NullProgram::info(const char* str) {
|
||||
const aclTargetInfo& NullProgram::info() {
|
||||
acl_error err;
|
||||
std::string arch = "amdil";
|
||||
if (dev().settings().use64BitPtr_) {
|
||||
arch += "64";
|
||||
}
|
||||
info_ = aclGetTargetInfo(arch.c_str(),
|
||||
(str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str), &err);
|
||||
info_ = aclGetTargetInfo(gpuNullDevice().settings().use64BitPtr_ ? "amdil64" : "amdil",
|
||||
device().isa().amdIlName(), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
}
|
||||
@@ -1507,7 +1503,6 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
|
||||
executable_(NULL),
|
||||
loaderContext_(this) {
|
||||
assert(device.isOnline());
|
||||
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
@@ -1520,7 +1515,6 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
|
||||
loaderContext_(this) {
|
||||
assert(!device.isOnline());
|
||||
isNull_ = true;
|
||||
machineTarget_ = gpuNullDevice().hwInfo()->targetName_;
|
||||
|
||||
// Cannot load onto a NullDevice.
|
||||
loader_ = nullptr;
|
||||
@@ -1769,14 +1763,10 @@ void HSAILProgram::fillResListWithKernels(std::vector<const Memory*>& memList) c
|
||||
}
|
||||
}
|
||||
|
||||
const aclTargetInfo& HSAILProgram::info(const char* str) {
|
||||
const aclTargetInfo& HSAILProgram::info() {
|
||||
acl_error err;
|
||||
std::string arch = "hsail";
|
||||
if (dev().settings().use64BitPtr_) {
|
||||
arch = "hsail64";
|
||||
}
|
||||
info_ = aclGetTargetInfo(arch.c_str(),
|
||||
(str && str[0] == '\0' ? gpuNullDevice().hwInfo()->targetName_ : str), &err);
|
||||
info_ = aclGetTargetInfo(gpuNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail",
|
||||
device().isa().hsailName(), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
}
|
||||
@@ -1802,107 +1792,23 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) {
|
||||
}
|
||||
|
||||
hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char* name) {
|
||||
hsa_isa_t isa = {0};
|
||||
if (!strcmp(Gfx600, name)) {
|
||||
isa.handle = gfx600;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx601, name)) {
|
||||
isa.handle = gfx601;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx602, name)) {
|
||||
isa.handle = gfx602;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx700, name)) {
|
||||
isa.handle = gfx700;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx701, name)) {
|
||||
isa.handle = gfx701;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx702, name)) {
|
||||
isa.handle = gfx702;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx705, name)) {
|
||||
isa.handle = gfx702;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx801, name)) {
|
||||
isa.handle = gfx801;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx802, name)) {
|
||||
isa.handle = gfx802;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx803, name)) {
|
||||
isa.handle = gfx803;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx810, name)) {
|
||||
isa.handle = gfx810;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx900, name)) {
|
||||
isa.handle = gfx900;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx902, name)) {
|
||||
isa.handle = gfx902;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx904, name)) {
|
||||
isa.handle = gfx904;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx906, name)) {
|
||||
isa.handle = gfx906;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx909, name)) {
|
||||
isa.handle = gfx909;
|
||||
return isa;
|
||||
}
|
||||
if (!strcmp(Gfx90c, name)) {
|
||||
isa.handle = gfx90c;
|
||||
return isa;
|
||||
}
|
||||
|
||||
return isa;
|
||||
const amd::Isa* isa_p = amd::Isa::findIsa(name);
|
||||
return {amd::Isa::toHandle(isa_p)};
|
||||
}
|
||||
|
||||
bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
|
||||
uint dev_gfxip = program_->gpuNullDevice().hwInfo()->gfxipVersion_;
|
||||
uint isa_gfxip = isa.handle;
|
||||
switch (dev_gfxip) {
|
||||
case gfx700:
|
||||
case gfx704:
|
||||
case gfx801:
|
||||
case gfx802:
|
||||
case gfx803:
|
||||
case gfx810:
|
||||
case gfx900:
|
||||
case gfx902:
|
||||
case gfx904:
|
||||
case gfx906:
|
||||
case gfx909:
|
||||
case gfx90c:
|
||||
return isa_gfxip == dev_gfxip;
|
||||
case gfx701:
|
||||
case gfx702:
|
||||
// gfx701 only differs from gfx702 by faster fp operations and can be loaded on either device.
|
||||
return isa_gfxip == gfx701|| isa_gfxip == gfx702;
|
||||
case gfx600:
|
||||
case gfx601:
|
||||
case gfx602:
|
||||
default:
|
||||
LogPrintfError("Unsupported gfxip version gfx%d", dev_gfxip);
|
||||
// The HSA loader uses a handle value of 0 to indicate the ISA is invalid.
|
||||
const amd::Isa* code_object_isa_p = amd::Isa::fromHandle(isa.handle);
|
||||
if (!code_object_isa_p || !code_object_isa_p->runtimeGslSupported()) {
|
||||
// The ISA is either not supported because ORCAHSALoaderContext::IsaFromName
|
||||
// could not find it, or the PAL runtime does not support it.
|
||||
return false;
|
||||
}
|
||||
if (program_->isNull()) {
|
||||
// Cannot load code onto offline devices.
|
||||
return false;
|
||||
}
|
||||
return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa());
|
||||
}
|
||||
|
||||
void* ORCAHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent,
|
||||
|
||||
@@ -272,7 +272,7 @@ class NullProgram : public device::Program {
|
||||
std::vector<device::PrintfInfo> printf_; //!< Format strings for GPU printf support
|
||||
std::vector<uint> glbCb_; //!< Global constant buffers
|
||||
|
||||
virtual const aclTargetInfo& info(const char* str = "");
|
||||
virtual const aclTargetInfo& info();
|
||||
|
||||
virtual bool saveBinaryAndSetType(type_t type) { return true; }
|
||||
|
||||
@@ -503,7 +503,7 @@ class HSAILProgram : public device::Program {
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options);
|
||||
|
||||
virtual const aclTargetInfo& info(const char* str = "");
|
||||
virtual const aclTargetInfo& info();
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
|
||||
@@ -122,7 +122,7 @@ bool NullKernel::siCreateHwInfo(const void* shader, AMUabiAddEncoding& encoding)
|
||||
i++;
|
||||
|
||||
newInfos[i].address = AMU_ABI_WAVEFRONT_SIZE;
|
||||
newInfos[i].value = nullDev().hwInfo()->simdWidth_ * 4; // options.WavefrontSize;
|
||||
newInfos[i].value = nullDev().isa().simdWidth() * 4; // options.WavefrontSize;
|
||||
i++;
|
||||
|
||||
newInfos[i].address = AMU_ABI_LDS_SIZE_AVAIL;
|
||||
|
||||
@@ -132,76 +132,6 @@ static constexpr uint HsaSamplerObjectAlignment = 16;
|
||||
//! HSA path specific defines for images
|
||||
static constexpr uint DeviceQueueMaskSize = 32;
|
||||
|
||||
struct AMDDeviceInfo {
|
||||
const char* machineTarget_; //!< Machine target
|
||||
const char* machineTargetLC_; //!< Machine target for LC
|
||||
uint simdWidth_; //!< Number of workitems processed per SIMD
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
uint gfxipVersionLC_; //!< The core engine GFXIP version for LC
|
||||
uint gfxipVersion_; //!< The core engine GFXIP version
|
||||
bool xnackEnabled_; //!< Enable XNACK feature
|
||||
Pal::AsicRevision asicRevision_; //!< PAL AsicRevision
|
||||
bool xnackSupported_; //!< XNACK is supported
|
||||
bool srameccSumpported_; //!< SRAMECC is supported
|
||||
};
|
||||
|
||||
static constexpr AMDDeviceInfo UnknownDevice = {"", "", 16, 256, 32, 0, 0, false};
|
||||
|
||||
static constexpr AMDDeviceInfo DeviceInfo[] = {
|
||||
/* Unknown */ UnknownDevice,
|
||||
/* Tahiti */ {"", "", 16, 256, 32, 600, 600, false},
|
||||
/* Pitcairn */ {"", "", 16, 256, 32, 600, 600, false},
|
||||
/* Capeverde */ {"", "", 16, 256, 32, 700, 700, false},
|
||||
/* Oland */ {"", "", 16, 256, 32, 600, 600, false},
|
||||
/* Hainan */ {"", "", 16, 256, 32, 600, 600, false},
|
||||
|
||||
/* Bonaire */ {"Bonaire", "", 16, 256, 32, 700, 700, false},
|
||||
/* Hawaii */ {"Hawaii", "", 16, 256, 32, 701, 701, false},
|
||||
/* Hawaii */ {"", "", 16, 256, 32, 701, 701, false},
|
||||
/* Hawaii */ {"", "", 16, 256, 32, 701, 701, false},
|
||||
|
||||
/* Kalindi */ {"Kalindi", "", 16, 256, 32, 702, 702, false},
|
||||
/* Godavari */ {"Mullins", "", 16, 256, 32, 702, 702, false},
|
||||
/* Spectre */ {"Spectre", "", 16, 256, 32, 701, 701, false},
|
||||
/* Spooky */ {"Spooky", "", 16, 256, 32, 701, 701, false},
|
||||
|
||||
/* Carrizo */ {"Carrizo", "", 16, 256, 32, 801, 801, false},
|
||||
/* Bristol */ {"Bristol Ridge", "", 16, 256, 32, 801, 801, false},
|
||||
/* Stoney */ {"Stoney", "", 16, 256, 32, 810, 810, false},
|
||||
|
||||
/* Iceland */ {"Iceland", "gfx802", 16, 256, 32, 802, 800, false},
|
||||
/* Tonga */ {"Tonga", "gfx802", 16, 256, 32, 802, 800, false},
|
||||
/* Fiji */ {"Fiji", "gfx803", 16, 256, 32, 803, 804, false},
|
||||
/* Ellesmere */ {"Ellesmere", "gfx803", 16, 256, 32, 803, 804, false},
|
||||
/* Baffin */ {"Baffin", "gfx803", 16, 256, 32, 803, 804, false},
|
||||
/* Lexa */ {"gfx804", "gfx803", 16, 256, 32, 803, 804, false},
|
||||
};
|
||||
|
||||
static constexpr AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = {
|
||||
/* Vega10 */ {"gfx900", "gfx900", 16, 256, 32, 900, 900, false, Pal::AsicRevision::Vega10, true, false},
|
||||
/* Vega10 XNACK */ {"gfx901", "gfx900", 16, 256, 32, 900, 901, true, Pal::AsicRevision::Vega10, true, false},
|
||||
/* Vega12 */ {"gfx904", "gfx904", 16, 256, 32, 904, 904, false, Pal::AsicRevision::Vega12, true, false},
|
||||
/* Vega12 XNACK */ {"gfx905", "gfx904", 16, 256, 32, 904, 905, true, Pal::AsicRevision::Vega12, true, false},
|
||||
/* Vega20 */ {"gfx906", "gfx906", 16, 256, 32, 906, 906, false, Pal::AsicRevision::Vega20, true, true},
|
||||
/* Vega20 XNACK */ {"gfx907", "gfx906", 16, 256, 32, 906, 907, true, Pal::AsicRevision::Vega20, true, true},
|
||||
/* Raven */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Raven, true, false},
|
||||
/* Raven XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Raven, true, false},
|
||||
/* Raven2 */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Raven2, true, false},
|
||||
/* Raven2 XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Raven2, true, false},
|
||||
/* Renoir */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Renoir, true, false},
|
||||
/* Renoir XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Renoir, true, false},
|
||||
/* Navi10 */ {"gfx1010", "gfx1010", 32, 256, 32, 1010, 1010, false, Pal::AsicRevision::Navi10, true, false},
|
||||
/* Navi10 XNACK */ {"gfx1010", "gfx1010", 32, 256, 32, 1010, 1010, true, Pal::AsicRevision::Navi10, true, false},
|
||||
/* Navi12 */ {"gfx1011", "gfx1011", 32, 256, 32, 1011, 1011, false, Pal::AsicRevision::Navi12, true, false},
|
||||
/* Navi12 XNACK */ {"gfx1011", "gfx1011", 32, 256, 32, 1011, 1011, true, Pal::AsicRevision::Navi12, true, false},
|
||||
/* Navi14 */ {"gfx1012", "gfx1012", 32, 256, 32, 1012, 1012, false, Pal::AsicRevision::Navi14, true, false},
|
||||
/* Navi14 XNACK */ {"gfx1012", "gfx1012", 32, 256, 32, 1012, 1012, true, Pal::AsicRevision::Navi14, true, false},
|
||||
/* Navi21 */ {"gfx1030", "gfx1030", 32, 256, 32, 1030, 1030, false, Pal::AsicRevision::Navi21, false, false},
|
||||
/* Navi22 */ {"gfx1031", "gfx1031", 32, 256, 32, 1031, 1031, false, Pal::AsicRevision::Navi22, false, false},
|
||||
/* Navi23 */ {"gfx1032", "gfx1032", 32, 256, 32, 1032, 1032, false, Pal::AsicRevision::Navi23, false, false},
|
||||
};
|
||||
|
||||
// Supported OpenCL versions
|
||||
enum OclVersion {
|
||||
OpenCL10 = 0x10,
|
||||
|
||||
@@ -48,11 +48,100 @@
|
||||
#endif // _WIN32
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <ctype.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
|
||||
namespace {
|
||||
|
||||
//! Define the mapping from PAL asic revision enumeration values to the
|
||||
//! compiler gfx major/minor/stepping version.
|
||||
struct PalDevice {
|
||||
uint32_t gfxipMajor_; //!< The core engine GFXIP Major version
|
||||
uint32_t gfxipMinor_; //!< The core engine GFXIP Minor version
|
||||
uint32_t gfxipStepping_; //!< The core engine GFXIP Stepping version
|
||||
Pal::GfxIpLevel gfxIpLevel_; //!< PAL gfx IP level
|
||||
const char* palName_; //!< PAL device name
|
||||
Pal::AsicRevision asicRevision_; //!< PAL AsicRevision
|
||||
};
|
||||
|
||||
static constexpr PalDevice supportedPalDevices[] = {
|
||||
// GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision
|
||||
{6, 0, 0, Pal::GfxIpLevel::GfxIp6, "Tahiti", Pal::AsicRevision::Tahiti},
|
||||
{6, 0, 1, Pal::GfxIpLevel::GfxIp6, "Pitcairn", Pal::AsicRevision::Pitcairn},
|
||||
{6, 0, 1, Pal::GfxIpLevel::GfxIp6, "Capeverde", Pal::AsicRevision::Capeverde},
|
||||
{6, 0, 2, Pal::GfxIpLevel::GfxIp6, "Oland", Pal::AsicRevision::Oland},
|
||||
{6, 0, 2, Pal::GfxIpLevel::GfxIp6, "Hainan", Pal::AsicRevision::Hainan},
|
||||
{7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Kalindi", Pal::AsicRevision::Kalindi},
|
||||
{7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spectre", Pal::AsicRevision::Spectre},
|
||||
{7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spooky", Pal::AsicRevision::Spooky},
|
||||
{7, 0, 1, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::HawaiiPro},
|
||||
{7, 0, 2, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::Hawaii},
|
||||
{7, 0, 4, Pal::GfxIpLevel::GfxIp7, "Bonaire", Pal::AsicRevision::Bonaire},
|
||||
{7, 0, 5, Pal::GfxIpLevel::GfxIp7, "Mullins", Pal::AsicRevision::Godavari}, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins.
|
||||
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo},
|
||||
{8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol},
|
||||
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland},
|
||||
{8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Tonga", Pal::AsicRevision::Tonga}, // Also Tongapro (generated code is for Tonga)
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Fiji", Pal::AsicRevision::Fiji},
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Ellesmere", Pal::AsicRevision::Polaris10}, // Ellesmere
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Baffin", Pal::AsicRevision::Polaris11}, // Baffin
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris12}, // Lexa
|
||||
{8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris22},
|
||||
{8, 1, 0, Pal::GfxIpLevel::GfxIp8_1, "Stoney", Pal::AsicRevision::Stoney},
|
||||
{9, 0, 0, Pal::GfxIpLevel::GfxIp9, "gfx900", Pal::AsicRevision::Vega10},
|
||||
{9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven},
|
||||
{9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12},
|
||||
{9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20},
|
||||
{9, 0, 9, Pal::GfxIpLevel::GfxIp9, "gfx909", Pal::AsicRevision::Raven2},
|
||||
{9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir},
|
||||
{10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10},
|
||||
{10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12},
|
||||
{10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14},
|
||||
{10, 3, 0, Pal::GfxIpLevel::GfxIp10_3, "gfx1030", Pal::AsicRevision::Navi21},
|
||||
{10, 3, 1, Pal::GfxIpLevel::GfxIp10_3, "gfx1031", Pal::AsicRevision::Navi22},
|
||||
{10, 3, 2, Pal::GfxIpLevel::GfxIp10_3, "gfx1032", Pal::AsicRevision::Navi23},
|
||||
#if PAL_BUILD_VAN_GOGH
|
||||
{10, 3, 3, Pal::GfxIpLevel::GfxIp10_3, "", Pal::AsicRevision::VanGogh},
|
||||
#endif
|
||||
};
|
||||
|
||||
static std::tuple<const amd::Isa*, const char*> findIsa(Pal::AsicRevision asicRevision,
|
||||
bool sramecc, bool xnack) {
|
||||
auto palDeviceIter = std::find_if(
|
||||
std::begin(supportedPalDevices), std::end(supportedPalDevices),
|
||||
[&](const PalDevice& palDevice) { return palDevice.asicRevision_ == asicRevision; });
|
||||
if (palDeviceIter == std::end(supportedPalDevices)) {
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
}
|
||||
const amd::Isa* isa = amd::Isa::findIsa(
|
||||
palDeviceIter->gfxipMajor_, palDeviceIter->gfxipMinor_, palDeviceIter->gfxipStepping_,
|
||||
sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled,
|
||||
xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled);
|
||||
return std::make_tuple(isa, palDeviceIter->palName_);
|
||||
}
|
||||
|
||||
static std::tuple<Pal::GfxIpLevel, Pal::AsicRevision, const char*> findPal(uint32_t gfxipMajor,
|
||||
uint32_t gfxipMinor,
|
||||
uint32_t gfxipStepping) {
|
||||
auto palDeviceIter = std::find_if(std::begin(supportedPalDevices), std::end(supportedPalDevices),
|
||||
[&](const PalDevice& palDevice) {
|
||||
return palDevice.gfxipMajor_ == gfxipMajor &&
|
||||
palDevice.gfxipMinor_ == gfxipMinor &&
|
||||
palDevice.gfxipStepping_ == gfxipStepping;
|
||||
});
|
||||
if (palDeviceIter == std::end(supportedPalDevices)) {
|
||||
return std::make_tuple(Pal::GfxIpLevel::None, Pal::AsicRevision::Unknown, nullptr);
|
||||
}
|
||||
return std::make_tuple(palDeviceIter->gfxIpLevel_, palDeviceIter->asicRevision_,
|
||||
palDeviceIter->palName_);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool PalDeviceLoad() {
|
||||
bool ret = false;
|
||||
@@ -76,185 +165,63 @@ Pal::IPlatform* Device::platform_;
|
||||
NullDevice::Compiler* NullDevice::compiler_;
|
||||
AppProfile Device::appProfile_;
|
||||
|
||||
NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), hwInfo_(nullptr) {}
|
||||
NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), palName_(nullptr) {}
|
||||
|
||||
bool NullDevice::init() {
|
||||
std::vector<Device*> devices;
|
||||
std::string driverVersion;
|
||||
|
||||
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
|
||||
// TODO: Currently PAL only supports for GFXIP9+.
|
||||
// Comment out this section for SWDEV-146950 since Kalindi and Mullins
|
||||
// does not works for LC offline compilation without knowing which GFXIP
|
||||
// should be used for them.
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
|
||||
// Loop through all supported devices and create each of them
|
||||
for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
|
||||
bool foundActive = false;
|
||||
Pal::AsicRevision revision = static_cast<Pal::AsicRevision>(id);
|
||||
|
||||
if (pal::DeviceInfo[id].machineTarget_[0] == '\0') {
|
||||
// Create offline devices for all ISAs not already associated with an online
|
||||
// device. This allows code objects to be compiled for all supported ISAs.
|
||||
std::vector<Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) {
|
||||
if (!isa->runtimePalSupported()) {
|
||||
continue;
|
||||
}
|
||||
bool isOnline = false;
|
||||
// Check if the particular device is online
|
||||
for (size_t i = 0; i < devices.size(); i++) {
|
||||
if (&(devices[i]->isa()) == isa) {
|
||||
isOnline = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isOnline) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Loop through all active PAL devices and see if we match one
|
||||
for (uint i = 0; i < devices.size(); ++i) {
|
||||
driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
|
||||
if (driverVersion.find("PAL") != std::string::npos) {
|
||||
if (static_cast<NullDevice*>(devices[i])->asicRevision() == revision) {
|
||||
foundActive = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't report an offline device if it's active
|
||||
if (foundActive) {
|
||||
Pal::GfxIpLevel gfxIpLevel;
|
||||
Pal::AsicRevision asicRevision;
|
||||
const char* palName;
|
||||
std::tie(gfxIpLevel, asicRevision, palName) =
|
||||
findPal(isa->versionMajor(), isa->versionMinor(), isa->versionStepping());
|
||||
if (asicRevision == Pal::AsicRevision::Unknown) {
|
||||
// PAL does not support this asic.
|
||||
continue;
|
||||
}
|
||||
|
||||
NullDevice* dev = new NullDevice();
|
||||
if (nullptr != dev) {
|
||||
if (!dev->create(id, Pal::GfxIpLevel::_None)) {
|
||||
delete dev;
|
||||
} else {
|
||||
dev->registerDevice();
|
||||
}
|
||||
std::unique_ptr<NullDevice> nullDevice(new NullDevice());
|
||||
if (!nullDevice) {
|
||||
LogPrintfError("Error allocating new instance of offline PAL Device %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
if (!nullDevice->create(palName, *isa, gfxIpLevel, asicRevision)) {
|
||||
// Skip over unsupported devices
|
||||
LogPrintfError("Skipping creating new instance of offline PAL Device %s", isa->targetId());
|
||||
continue;
|
||||
}
|
||||
nullDevice.release()->registerDevice();
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
|
||||
// Loop through all supported devices and create each of them
|
||||
for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
|
||||
bool foundActive = false;
|
||||
bool foundDuplicate = false;
|
||||
uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
|
||||
|
||||
if (pal::Gfx9PlusSubDeviceInfo[id].machineTarget_[0] == '\0') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Loop through all active PAL devices and see if we match one
|
||||
for (uint i = 0; i < devices.size(); ++i) {
|
||||
driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
|
||||
if (driverVersion.find("PAL") != std::string::npos) {
|
||||
gfxipVersion = devices[i]->settings().useLightning_
|
||||
? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_
|
||||
: pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
|
||||
uint gfxIpCurrent = devices[i]->settings().useLightning_
|
||||
? static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersionLC_
|
||||
: static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_;
|
||||
if (gfxIpCurrent == gfxipVersion) {
|
||||
foundActive = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't report an offline device if it's active
|
||||
if (foundActive) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Loop through all previous devices in the Gfx9PlusSubDeviceInfo list
|
||||
// and compare them with the current entry to see if the current entry
|
||||
// was listed previously in the Gfx9PlusSubDeviceInfo, if so, then it
|
||||
// means the current entry already has been added in the offline device list
|
||||
for (uint j = 0; j < id; ++j) {
|
||||
if (pal::Gfx9PlusSubDeviceInfo[j].machineTarget_[0] == '\0') {
|
||||
continue;
|
||||
}
|
||||
if ((strcmp(pal::Gfx9PlusSubDeviceInfo[j].machineTarget_,
|
||||
pal::Gfx9PlusSubDeviceInfo[id].machineTarget_) == 0) &&
|
||||
(pal::Gfx9PlusSubDeviceInfo[j].xnackEnabled_ ==
|
||||
pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_)) {
|
||||
foundDuplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't report an offline device twice
|
||||
if (foundDuplicate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
|
||||
uint ipLevelMajor = round(gfxipVersion / 100);
|
||||
uint ipLevelMinor = round(gfxipVersion / 10 % 10);
|
||||
switch (ipLevelMajor) {
|
||||
case 9:
|
||||
ipLevel = Pal::GfxIpLevel::GfxIp9;
|
||||
break;
|
||||
case 10:
|
||||
switch (ipLevelMinor) {
|
||||
case 0:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
case 1:
|
||||
ipLevel = Pal::GfxIpLevel::GfxIp10_1;
|
||||
break;
|
||||
case 2:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
case 3:
|
||||
ipLevel = Pal::GfxIpLevel::GfxIp10_3;
|
||||
break;
|
||||
case 4:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 11:
|
||||
switch (ipLevelMinor) {
|
||||
case 0:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
}
|
||||
|
||||
NullDevice* dev = new NullDevice();
|
||||
if (nullptr != dev) {
|
||||
if (!dev->create(id, ipLevel)) {
|
||||
delete dev;
|
||||
} else {
|
||||
dev->registerDevice();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) {
|
||||
// Update HW info for the device
|
||||
if ((GPU_ENABLE_PAL == 1) && (ipLevel == Pal::GfxIpLevel::_None)) {
|
||||
hwInfo_ = &DeviceInfo[id];
|
||||
} else if (ipLevel >= Pal::GfxIpLevel::GfxIp9) {
|
||||
hwInfo_ = &Gfx9PlusSubDeviceInfo[id];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
Pal::AsicRevision asicRevision = hwInfo_->asicRevision_;
|
||||
|
||||
if (amd::IS_HIP && IS_MAINLINE &&
|
||||
(asicRevision != Pal::AsicRevision::Vega20)) {
|
||||
bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLevel ipLevel,
|
||||
Pal::AsicRevision asicRevision) {
|
||||
if (!isa.runtimePalSupported()) {
|
||||
LogPrintfError("Offline PAL device %s is not supported", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
online_ = false;
|
||||
palName_ = palName;
|
||||
Pal::DeviceProperties properties = {};
|
||||
|
||||
// Use fake GFX IP for the device init
|
||||
@@ -274,12 +241,19 @@ bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) {
|
||||
Pal::WorkStationCaps wscaps = {};
|
||||
|
||||
// Create setting for the offline target
|
||||
if ((palSettings == nullptr) || !palSettings->create(properties, heaps, wscaps)) {
|
||||
if ((palSettings == nullptr) ||
|
||||
!palSettings->create(properties, heaps, wscaps, isa.xnack() == amd::Isa::Feature::Enabled)) {
|
||||
LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ValidateComgr()) {
|
||||
LogError("Code object manager initialization failed!");
|
||||
LogPrintfError("Code object manager initialization failed for offline PAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd::Device::create(isa)) {
|
||||
LogPrintfError("Unable to setup device for PAL offline device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -306,7 +280,7 @@ bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) {
|
||||
acl_error error;
|
||||
compiler_ = aclCompilerInit(&opts, &error);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogError("Error initializing the compiler");
|
||||
LogPrintfError("Error initializing the compiler for offline PAL device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
@@ -511,34 +485,12 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
|
||||
info_.platform_ = AMD_PLATFORM;
|
||||
|
||||
if (settings().useLightning_) {
|
||||
::strncpy(info_.name_, hwInfo()->machineTargetLC_, sizeof(info_.name_) - 1);
|
||||
|
||||
if (hwInfo()->srameccSumpported_) {
|
||||
if (palProp.gfxipProperties.shaderCore.flags.eccProtectedGprs) {
|
||||
::strcat(info_.name_, ":sramecc+");
|
||||
} else {
|
||||
::strcat(info_.name_, ":sramecc-");
|
||||
}
|
||||
}
|
||||
|
||||
if (hwInfo()->xnackSupported_) {
|
||||
if (hwInfo()->xnackEnabled_) {
|
||||
::strcat(info_.name_, ":xnack+");
|
||||
} else {
|
||||
::strcat(info_.name_, ":xnack-");
|
||||
}
|
||||
}
|
||||
|
||||
::strncpy(info_.targetId_, "amdgcn-amd-amdhsa--", sizeof(info_.targetId_) - 1);
|
||||
::strcat(info_.targetId_, info_.name_);
|
||||
} else {
|
||||
::strncpy(info_.name_, hwInfo()->machineTarget_, sizeof(info_.name_) - 1);
|
||||
}
|
||||
|
||||
::strncpy(info_.name_, settings().useLightning_ ? isa().targetId() : palName_,
|
||||
sizeof(info_.name_));
|
||||
::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1);
|
||||
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
|
||||
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)",
|
||||
settings().useLightning_ ? ",LC" : ",HSAIL");
|
||||
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)%s",
|
||||
settings().useLightning_ ? ",LC" : ",HSAIL", isOnline() ? "" : " [Offline]");
|
||||
|
||||
info_.profile_ = "FULL_PROFILE";
|
||||
if (settings().oclVersion_ >= OpenCL20) {
|
||||
@@ -625,23 +577,20 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
? (2 * palProp.gfxipProperties.shaderCore.numSimdsPerCu)
|
||||
: palProp.gfxipProperties.shaderCore.numSimdsPerCu;
|
||||
info_.cuPerShaderArray_ = palProp.gfxipProperties.shaderCore.numCusPerShaderArray;
|
||||
info_.simdWidth_ = hwInfo()->simdWidth_;
|
||||
info_.simdWidth_ = isa().simdWidth();
|
||||
info_.simdInstructionWidth_ = 1;
|
||||
info_.wavefrontWidth_ =
|
||||
settings().enableWave32Mode_ ? 32 : palProp.gfxipProperties.shaderCore.nativeWavefrontSize;
|
||||
info_.availableSGPRs_ = palProp.gfxipProperties.shaderCore.numAvailableSgprs;
|
||||
|
||||
info_.globalMemChannelBanks_ = 4;
|
||||
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
|
||||
info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth();
|
||||
info_.localMemSizePerCU_ = palProp.gfxipProperties.shaderCore.ldsSizePerCu;
|
||||
info_.localMemBanks_ = hwInfo()->localMemBanks_;
|
||||
info_.localMemBanks_ = isa().localMemBanks();
|
||||
|
||||
uint gfxipVersion =
|
||||
settings().useLightning_ ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
|
||||
|
||||
info_.gfxipMajor_ = gfxipVersion / 100;
|
||||
info_.gfxipMinor_ = gfxipVersion / 10 % 10;
|
||||
info_.gfxipStepping_ = gfxipVersion % 10;
|
||||
info_.gfxipMajor_ = isa().versionMajor();
|
||||
info_.gfxipMinor_ = isa().versionMinor();
|
||||
info_.gfxipStepping_ = isa().versionStepping();
|
||||
|
||||
info_.timeStampFrequency_ = 1000000;
|
||||
info_.numAsyncQueues_ = numComputeRings;
|
||||
@@ -860,9 +809,6 @@ uint32_t gStartDevice = 0;
|
||||
uint32_t gNumDevices = 0;
|
||||
|
||||
bool Device::create(Pal::IDevice* device) {
|
||||
if (!amd::Device::create()) {
|
||||
return false;
|
||||
}
|
||||
resourceList_ = new std::unordered_set<Resource*>();
|
||||
if (nullptr == resourceList_) {
|
||||
return false;
|
||||
@@ -884,23 +830,25 @@ bool Device::create(Pal::IDevice* device) {
|
||||
|
||||
// XNACK flag should be set for PageMigration or IOMMUv2 support.
|
||||
// Note: Navi2x should have a fix in HW.
|
||||
bool isXNACKSupported = (ipLevel_ <= Pal::GfxIpLevel::GfxIp10_1) &&
|
||||
bool isXNACKEnabled =
|
||||
(static_cast<uint>(properties().gpuMemoryProperties.flags.pageMigrationEnabled ||
|
||||
properties().gpuMemoryProperties.flags.iommuv2Support));
|
||||
|
||||
// Update HW info for the device
|
||||
if ((GPU_ENABLE_PAL == 1) && (properties().revision <= Pal::AsicRevision::Polaris12)) {
|
||||
hwInfo_ = &DeviceInfo[static_cast<uint>(properties().revision)];
|
||||
} else if (ipLevel_ >= Pal::GfxIpLevel::GfxIp9) {
|
||||
// For compiler sub targets
|
||||
for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
|
||||
if ((Gfx9PlusSubDeviceInfo[id].asicRevision_ == asicRevision_) &&
|
||||
(Gfx9PlusSubDeviceInfo[id].xnackEnabled_ == isXNACKSupported)) {
|
||||
hwInfo_ = &Gfx9PlusSubDeviceInfo[id];
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bool isSRAMECCEnabled = properties().gfxipProperties.shaderCore.flags.eccProtectedGprs;
|
||||
|
||||
const amd::Isa* isa;
|
||||
std::tie(isa, palName_) = findIsa(asicRevision_, isSRAMECCEnabled, isXNACKEnabled);
|
||||
if (!isa) {
|
||||
LogPrintfError("Unsupported PAL device with ASIC revision #%d", asicRevision_);
|
||||
return false;
|
||||
}
|
||||
if (!isa->runtimePalSupported()) {
|
||||
LogPrintfError("Unsupported PAL device with ISA %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd::Device::create(*isa)) {
|
||||
LogPrintfError("Unable to setup device for PAL device %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -953,8 +901,9 @@ bool Device::create(Pal::IDevice* device) {
|
||||
iDev()->QueryWorkStationCaps(&wscaps);
|
||||
|
||||
pal::Settings* gpuSettings = reinterpret_cast<pal::Settings*>(settings_);
|
||||
if ((gpuSettings == nullptr) ||
|
||||
!gpuSettings->create(properties(), heaps_, wscaps, appProfile_.reportAsOCL12Device())) {
|
||||
if (!gpuSettings ||
|
||||
!gpuSettings->create(properties(), heaps_, wscaps, isa->xnack() == amd::Isa::Feature::Enabled,
|
||||
appProfile_.reportAsOCL12Device())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -64,8 +64,10 @@ class NullDevice : public amd::Device {
|
||||
NullDevice();
|
||||
|
||||
//! Creates an offline device with the specified target
|
||||
bool create(uint id, //!< index in the AMDDeviceInfo[]
|
||||
Pal::GfxIpLevel ipLevel //!< GPU ip level
|
||||
bool create(const char* palName, //!< Device name
|
||||
const amd::Isa& isa, //!< Device ISA
|
||||
Pal::GfxIpLevel ipLevel, //!< GPU ip level
|
||||
Pal::AsicRevision asicRevision //!< PAL ASIC revision
|
||||
);
|
||||
|
||||
//! Instantiate a new virtual device
|
||||
@@ -114,8 +116,6 @@ class NullDevice : public amd::Device {
|
||||
Pal::GfxIpLevel ipLevel() const { return ipLevel_; }
|
||||
Pal::AsicRevision asicRevision() const { return asicRevision_; }
|
||||
|
||||
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
|
||||
|
||||
//! Empty implementation on Null device
|
||||
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
|
||||
|
||||
@@ -139,7 +139,7 @@ class NullDevice : public amd::Device {
|
||||
|
||||
Pal::AsicRevision asicRevision_; //!< ASIC revision
|
||||
Pal::GfxIpLevel ipLevel_; //!< Device IP level
|
||||
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
|
||||
const char* palName_; //!< Device name
|
||||
|
||||
//! Fills OpenCL device info structure
|
||||
void fillDeviceInfo(const Pal::DeviceProperties& palProp, //!< PAL device properties
|
||||
|
||||
@@ -174,7 +174,6 @@ bool Segment::freeze(bool destroySysmem) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static constexpr const char* Carrizo = "Carrizo";
|
||||
HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
|
||||
: Program(device, owner),
|
||||
rawBinary_(nullptr),
|
||||
@@ -185,11 +184,6 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner)
|
||||
executable_(nullptr),
|
||||
loaderContext_(this) {
|
||||
assert(device.isOnline());
|
||||
if (dev().asicRevision() == Pal::AsicRevision::Bristol) {
|
||||
machineTarget_ = Carrizo;
|
||||
} else {
|
||||
machineTarget_ = dev().hwInfo()->machineTarget_;
|
||||
}
|
||||
loader_ = amd::hsa::loader::Loader::Create(&loaderContext_);
|
||||
}
|
||||
|
||||
@@ -204,11 +198,6 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner)
|
||||
loaderContext_(this) {
|
||||
assert(!device.isOnline());
|
||||
isNull_ = true;
|
||||
if (dev().asicRevision() == Pal::AsicRevision::Bristol) {
|
||||
machineTarget_ = Carrizo;
|
||||
} else {
|
||||
machineTarget_ = dev().hwInfo()->machineTarget_;
|
||||
}
|
||||
// Cannot load onto a NullDevice.
|
||||
loader_ = nullptr;
|
||||
}
|
||||
@@ -368,15 +357,11 @@ bool HSAILProgram::allocKernelTable() {
|
||||
|
||||
void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
|
||||
|
||||
const aclTargetInfo& HSAILProgram::info(const char* str) {
|
||||
const aclTargetInfo& HSAILProgram::info() {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
acl_error err;
|
||||
std::string arch = "hsail";
|
||||
if (dev().settings().use64BitPtr_) {
|
||||
arch = "hsail64";
|
||||
}
|
||||
info_ = aclGetTargetInfo(arch.c_str(),
|
||||
(str && str[0] == '\0' ? palNullDevice().hwInfo()->machineTarget_ : str), &err);
|
||||
info_ = aclGetTargetInfo(palNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail",
|
||||
device().isa().hsailName(), &err);
|
||||
if (err != ACL_SUCCESS) {
|
||||
LogWarning("aclGetTargetInfo failed");
|
||||
}
|
||||
@@ -531,49 +516,23 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p
|
||||
}
|
||||
|
||||
hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
|
||||
hsa_isa_t isa = {0};
|
||||
uint32_t gfxip = 0;
|
||||
std::string gfx_target(name);
|
||||
if (gfx_target.find("amdgcn-") == 0) {
|
||||
std::string gfxip_version_str = gfx_target.substr(gfx_target.find("gfx") + 3);
|
||||
gfxip = std::atoi(gfxip_version_str.c_str());
|
||||
} else {
|
||||
// FIXME: Old way. To be remove.
|
||||
uint32_t shift = 1;
|
||||
size_t last = gfx_target.length();
|
||||
std::string ver;
|
||||
do {
|
||||
size_t first = gfx_target.find_last_of(':', last);
|
||||
ver = gfx_target.substr(first + 1, last - first);
|
||||
last = first - 1;
|
||||
gfxip += static_cast<uint32_t>(atoi(ver.c_str())) * shift;
|
||||
shift *= 10;
|
||||
} while (shift <= 100);
|
||||
}
|
||||
isa.handle = gfxip;
|
||||
return isa;
|
||||
const amd::Isa* isa_p = amd::Isa::findIsa(name);
|
||||
return {amd::Isa::toHandle(isa_p)};
|
||||
}
|
||||
|
||||
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
|
||||
uint32_t gfxipVersion = program_->palNullDevice().settings().useLightning_
|
||||
? program_->palNullDevice().hwInfo()->gfxipVersionLC_
|
||||
: program_->palNullDevice().hwInfo()->gfxipVersion_;
|
||||
uint32_t majorSrc = gfxipVersion / 10;
|
||||
uint32_t minorSrc = gfxipVersion % 10;
|
||||
|
||||
uint32_t majorTrg = isa.handle / 10;
|
||||
uint32_t minorTrg = isa.handle % 10;
|
||||
|
||||
if (majorSrc != majorTrg) {
|
||||
// The HSA loader uses a handle value of 0 to indicate the ISA is invalid.
|
||||
const amd::Isa* code_object_isa_p = amd::Isa::fromHandle(isa.handle);
|
||||
if (!code_object_isa_p || !code_object_isa_p->runtimePalSupported()) {
|
||||
// The ISA is either not supported because PALHSALoaderContext::IsaFromName
|
||||
// could not find it, or the PAL runtime does not support it.
|
||||
return false;
|
||||
} else if (minorTrg == minorSrc) {
|
||||
return true;
|
||||
} else if (minorTrg < minorSrc) {
|
||||
LogWarning("ISA downgrade for execution!");
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
if (program_->isNull()) {
|
||||
// Cannot load code onto offline devices.
|
||||
return false;
|
||||
}
|
||||
return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa());
|
||||
}
|
||||
|
||||
void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent,
|
||||
@@ -584,7 +543,7 @@ void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_ag
|
||||
// Note: In Linux ::posix_memalign() requires at least 16 bytes for the alignment.
|
||||
align = amd::alignUp(align, 16);
|
||||
void* ptr = amd::Os::alignedMalloc(size, align);
|
||||
if ((ptr != nullptr) && zero) {
|
||||
if (ptr && zero) {
|
||||
memset(ptr, 0, size);
|
||||
}
|
||||
return ptr;
|
||||
@@ -778,9 +737,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
|
||||
return true;
|
||||
}
|
||||
|
||||
hsa_agent_t agent;
|
||||
agent.handle = 1;
|
||||
|
||||
executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr);
|
||||
if (executable_ == nullptr) {
|
||||
buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n";
|
||||
|
||||
@@ -202,7 +202,7 @@ class HSAILProgram : public device::Program {
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options);
|
||||
|
||||
virtual const aclTargetInfo& info(const char* str = "");
|
||||
virtual const aclTargetInfo& info();
|
||||
|
||||
virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize,
|
||||
amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0,
|
||||
@@ -250,7 +250,6 @@ class LightningProgram : public HSAILProgram {
|
||||
LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
machineTarget_ = palNullDevice().hwInfo()->machineTargetLC_;
|
||||
}
|
||||
|
||||
LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) {
|
||||
|
||||
@@ -174,7 +174,7 @@ Settings::Settings() {
|
||||
|
||||
bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
const Pal::GpuMemoryHeapProperties* heaps, const Pal::WorkStationCaps& wscaps,
|
||||
bool reportAsOCL12Device) {
|
||||
bool enableXNACK, bool reportAsOCL12Device) {
|
||||
uint32_t osVer = 0x0;
|
||||
|
||||
// Disable thread trace by default for all devices
|
||||
@@ -202,19 +202,19 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
apuSystem_ = true;
|
||||
}
|
||||
|
||||
enableXNACK_ = enableXNACK;
|
||||
hsailExplicitXnack_ = enableXNACK;
|
||||
|
||||
switch (palProp.revision) {
|
||||
case Pal::AsicRevision::Navi23:
|
||||
case Pal::AsicRevision::Navi22:
|
||||
case Pal::AsicRevision::Navi21:
|
||||
case Pal::AsicRevision::Navi14:
|
||||
case Pal::AsicRevision::Navi12:
|
||||
case Pal::AsicRevision::Navi10:
|
||||
case Pal::AsicRevision::Navi10_A0:
|
||||
case Pal::AsicRevision::Navi23:
|
||||
case Pal::AsicRevision::Navi22:
|
||||
case Pal::AsicRevision::Navi21:
|
||||
gfx10Plus_ = true;
|
||||
useLightning_ = GPU_ENABLE_LC;
|
||||
hsailExplicitXnack_ =
|
||||
static_cast<uint>(palProp.gpuMemoryProperties.flags.pageMigrationEnabled ||
|
||||
palProp.gpuMemoryProperties.flags.iommuv2Support);
|
||||
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
|
||||
if (useLightning_) {
|
||||
enableWave32Mode_ = true;
|
||||
@@ -264,6 +264,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
case Pal::AsicRevision::Polaris10:
|
||||
case Pal::AsicRevision::Polaris11:
|
||||
case Pal::AsicRevision::Polaris12:
|
||||
case Pal::AsicRevision::Polaris22:
|
||||
// Disable tiling aperture on VI+
|
||||
linearPersistentImage_ = true;
|
||||
// Keep this false even though we have support
|
||||
@@ -289,6 +290,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
|
||||
// Fall through ...
|
||||
case Pal::AsicRevision::Bonaire:
|
||||
case Pal::AsicRevision::Hawaii:
|
||||
case Pal::AsicRevision::HawaiiPro:
|
||||
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
|
||||
reportFMAF_ = false;
|
||||
if ((palProp.revision == Pal::AsicRevision::Hawaii) || aiPlus_) {
|
||||
|
||||
@@ -127,6 +127,7 @@ class Settings : public device::Settings {
|
||||
bool create(const Pal::DeviceProperties& palProp, //!< PAL device properties
|
||||
const Pal::GpuMemoryHeapProperties* heaps, //!< PAL heap settings
|
||||
const Pal::WorkStationCaps& wscaps, //!< PAL workstation settings
|
||||
bool enableXNACK, //!< XNACK is enabled on this device
|
||||
bool reportAsOCL12Device = false //!< Report As OpenCL1.2 Device
|
||||
);
|
||||
|
||||
|
||||
@@ -934,7 +934,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
|
||||
amd::Image* srcImage = static_cast<amd::Image*>(srcMemory.owner());
|
||||
amd::Image::Format newFormat(dstImage->getImageFormat());
|
||||
bool swapLayer =
|
||||
(dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10);
|
||||
(dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10);
|
||||
|
||||
// Find unsupported formats
|
||||
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
|
||||
@@ -1126,7 +1126,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
|
||||
amd::Image* srcImage = static_cast<amd::Image*>(srcMemory.owner());
|
||||
amd::Image::Format newFormat(srcImage->getImageFormat());
|
||||
bool swapLayer =
|
||||
(srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10);
|
||||
(srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10);
|
||||
|
||||
// Find unsupported formats
|
||||
for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
|
||||
@@ -1364,14 +1364,14 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
|
||||
|
||||
// Program source origin
|
||||
int32_t srcOrg[4] = {(int32_t)srcOrigin[0], (int32_t)srcOrigin[1], (int32_t)srcOrigin[2], 0};
|
||||
if ((srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10)) {
|
||||
if ((srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10)) {
|
||||
srcOrg[3] = 1;
|
||||
}
|
||||
setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
|
||||
|
||||
// Program destinaiton origin
|
||||
int32_t dstOrg[4] = {(int32_t)dstOrigin[0], (int32_t)dstOrigin[1], (int32_t)dstOrigin[2], 0};
|
||||
if ((dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10)) {
|
||||
if ((dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10)) {
|
||||
dstOrg[3] = 1;
|
||||
}
|
||||
setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg);
|
||||
@@ -2072,7 +2072,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
|
||||
amd::Image* image = static_cast<amd::Image*>(memory.owner());
|
||||
amd::Image::Format newFormat(image->getImageFormat());
|
||||
bool swapLayer =
|
||||
(image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10);
|
||||
(image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10);
|
||||
|
||||
// Program the kernels workload depending on the fill dimensions
|
||||
fillType = FillImage;
|
||||
|
||||
@@ -430,7 +430,7 @@ PerfCounter::PerfCounter(const Device& device, //!< A ROC device object
|
||||
info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id)
|
||||
|
||||
// these block indices are valid for the SI (Gfx8) & Gfx9 devices
|
||||
switch (roc_device_.deviceInfo().gfxipMajor_) {
|
||||
switch (roc_device_.isa().versionMajor()) {
|
||||
case (8):
|
||||
gfxVersion_ = ROC_GFX8;
|
||||
if (blockIndex < viBlockIdOrcaToRocr.size()) {
|
||||
|
||||
@@ -33,49 +33,6 @@ static constexpr uint DeviceQueueMaskSize = 32;
|
||||
//! Set to match the number of pipes, which is 8.
|
||||
static constexpr uint kMaxAsyncQueues = 8;
|
||||
|
||||
typedef uint HsaDeviceId;
|
||||
|
||||
struct AMDDeviceInfo {
|
||||
const char* machineTarget_; //!< Machine target
|
||||
const char* machineTargetLC_;//!< Machine target for LC
|
||||
uint simdPerCU_; //!< Number of SIMDs per CU
|
||||
uint simdWidth_; //!< Number of workitems processed per SIMD
|
||||
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemSizePerCU_; //!< Local memory size per CU
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
uint gfxipMajor_; //!< The core engine GFXIP Major version
|
||||
uint gfxipMinor_; //!< The core engine GFXIP Minor version
|
||||
uint gfxipStepping_; //!< The core engine GFXIP Stepping version
|
||||
uint pciDeviceId_; //!< PCIe device id
|
||||
};
|
||||
|
||||
constexpr HsaDeviceId HSA_INVALID_DEVICE_ID = -1;
|
||||
|
||||
static constexpr AMDDeviceInfo DeviceInfo[] = {
|
||||
/* KAVERI_SPECTRE */ {"Spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0},
|
||||
/* KAVERI_SPOOKY */ {"Spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0},
|
||||
/* HAWAII */ {"Hawaii", "gfx701", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0},
|
||||
/* CARRIZO */ {"Carrizo", "gfx801", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 1, 0},
|
||||
/* TONGA */ {"Tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 2, 0},
|
||||
/* ICELAND */ {"Iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 2, 0},
|
||||
/* FIJI */ {"Fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0},
|
||||
/* ELLESMERE */ {"Ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0},
|
||||
/* BAFFIN */ {"Baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0},
|
||||
/* VEGA10 */ {"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 0, 0},
|
||||
/* VEGA10_HBCC */ {"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 1, 0},
|
||||
/* RAVEN */ {"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 2, 0},
|
||||
/* VEGA12 */ {"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 4, 0},
|
||||
/* VEGA20 */ {"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 6, 0},
|
||||
/* ARCTURUS */ {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 8, 0},
|
||||
/* NAVI10 */ {"gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 0, 0},
|
||||
/* NAVI12 */ {"gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 1, 0},
|
||||
/* NAVI14 */ {"gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 2, 0},
|
||||
/* SIENNA_CICHILD */ {"gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 0, 0},
|
||||
/* NAVY_FLOUNDER */ {"gfx1031", "gfx1031", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 1, 0},
|
||||
/* DIMGREY CAVEFISH*/{"gfx1032", "gfx1032", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 2, 0}
|
||||
};
|
||||
|
||||
} // namespace roc
|
||||
|
||||
#endif
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#ifdef ROCCLR_SUPPORT_NUMA_POLICY
|
||||
#include <numaif.h>
|
||||
#endif // ROCCLR_SUPPORT_NUMA_POLICY
|
||||
@@ -61,9 +62,9 @@
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
namespace {
|
||||
|
||||
inline bool getIsaMeta(const char* targetId, amd_comgr_metadata_node_t& isaMeta) {
|
||||
inline bool getIsaMeta(std::string isaName, amd_comgr_metadata_node_t& isaMeta) {
|
||||
amd_comgr_status_t status;
|
||||
status = amd::Comgr::get_isa_metadata(targetId, &isaMeta);
|
||||
status = amd::Comgr::get_isa_metadata(isaName.c_str(), &isaMeta);
|
||||
return (status == AMD_COMGR_STATUS_SUCCESS) ? true : false;
|
||||
}
|
||||
|
||||
@@ -99,34 +100,13 @@ std::vector<AgentInfo> roc::Device::cpu_agents_;
|
||||
|
||||
address Device::mg_sync_ = nullptr;
|
||||
|
||||
static HsaDeviceId getHsaDeviceId(hsa_agent_t device, uint32_t& pci_id) {
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(device, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &pci_id)) {
|
||||
return HSA_INVALID_DEVICE_ID;
|
||||
bool NullDevice::create(const amd::Isa &isa) {
|
||||
if (!isa.runtimeRocSupported()) {
|
||||
LogPrintfError("Offline HSA device %s is not supported", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
char agent_name[64] = {0};
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(device, HSA_AGENT_INFO_NAME, agent_name)) {
|
||||
return HSA_INVALID_DEVICE_ID;
|
||||
}
|
||||
|
||||
if (::strncmp(agent_name, "gfx", 3) != 0) {
|
||||
return HSA_INVALID_DEVICE_ID;
|
||||
}
|
||||
|
||||
for (uint i = 0; i < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++i) {
|
||||
if (::strcmp(agent_name, DeviceInfo[i].machineTargetLC_) == 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return HSA_INVALID_DEVICE_ID;
|
||||
}
|
||||
|
||||
bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
|
||||
online_ = false;
|
||||
deviceInfo_ = deviceInfo;
|
||||
// Mark the device as GPU type
|
||||
info_.type_ = CL_DEVICE_TYPE_GPU;
|
||||
info_.vendorId_ = 0x1002;
|
||||
@@ -134,24 +114,38 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
|
||||
roc::Settings* hsaSettings = new roc::Settings();
|
||||
settings_ = hsaSettings;
|
||||
if (!hsaSettings ||
|
||||
!hsaSettings->create(false, deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_)) {
|
||||
LogError("Error creating settings for nullptr HSA device");
|
||||
!hsaSettings->create(false, isa.versionMajor(), isa.versionMinor(),
|
||||
isa.xnack() == amd::Isa::Feature::Enabled)) {
|
||||
LogPrintfError("Error creating settings for offline HSA device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ValidateComgr()) {
|
||||
LogError("Code object manager initialization failed!");
|
||||
LogPrintfError("Code object manager initialization failed for offline HSA device %s",
|
||||
isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd::Device::create(isa)) {
|
||||
LogPrintfError("Unable to setup offline HSA device %s", isa.targetId());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Report the device name
|
||||
::strncpy(info_.name_, "AMD HSA Device", sizeof(info_.name_) - 1);
|
||||
::strncpy(info_.name_, isa.targetId(), sizeof(info_.name_) - 1);
|
||||
info_.gfxipMajor_ = isa.versionMajor();
|
||||
info_.gfxipMinor_ = isa.versionMinor();
|
||||
info_.gfxipStepping_ = isa.versionStepping();
|
||||
::strncpy(info_.targetId_, isa.isaName().c_str(), sizeof(info_.targetId_) - 1);
|
||||
info_.extensions_ = getExtensionString();
|
||||
info_.maxWorkGroupSize_ = hsaSettings->maxWorkGroupSize_;
|
||||
::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1);
|
||||
info_.oclcVersion_ = "OpenCL C " OPENCL_C_VERSION_STR " ";
|
||||
info_.spirVersions_ = "";
|
||||
::strncpy(info_.driverVersion_, "1.0 Provisional (hsa)", sizeof(info_.driverVersion_) - 1);
|
||||
std::stringstream ss;
|
||||
ss << AMD_BUILD_STRING " (HSA," << (settings().useLightning_ ? "LC" : "HSAIL");
|
||||
ss << ") [Offline]";
|
||||
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
|
||||
info_.version_ = "OpenCL " OPENCL_VERSION_STR " ";
|
||||
return true;
|
||||
}
|
||||
@@ -160,6 +154,7 @@ Device::Device(hsa_agent_t bkendDevice)
|
||||
: mapCacheOps_(nullptr)
|
||||
, mapCache_(nullptr)
|
||||
, _bkendDevice(bkendDevice)
|
||||
, pciDeviceId_(0)
|
||||
, gpuvm_segment_max_alloc_(0)
|
||||
, alloc_granularity_(0)
|
||||
, context_(nullptr)
|
||||
@@ -311,37 +306,35 @@ bool NullDevice::init() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return without initializing offline device list
|
||||
return true;
|
||||
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
// If there is an HSA enabled device online then skip any offline device
|
||||
std::vector<Device*> devices;
|
||||
devices = getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
|
||||
// Load the offline devices
|
||||
// Iterate through the set of available offline devices
|
||||
for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); id++) {
|
||||
// Create offline devices for all ISAs not already associated with an online
|
||||
// device. This allows code objects to be compiled for all supported ISAs.
|
||||
std::vector<Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) {
|
||||
if (!isa->runtimeRocSupported()) {
|
||||
continue;
|
||||
}
|
||||
bool isOnline = false;
|
||||
// Check if the particular device is online
|
||||
for (unsigned int i = 0; i < devices.size(); i++) {
|
||||
if (::strcmp(static_cast<NullDevice*>(devices[i])->deviceInfo_.machineTarget_,
|
||||
DeviceInfo[id].machineTarget_) == 0) {
|
||||
for (size_t i = 0; i < devices.size(); i++) {
|
||||
if (&(devices[i]->isa()) == isa) {
|
||||
isOnline = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isOnline) {
|
||||
continue;
|
||||
}
|
||||
NullDevice* nullDevice = new NullDevice();
|
||||
if (!nullDevice->create(DeviceInfo[id])) {
|
||||
LogError("Error creating new instance of Device.");
|
||||
delete nullDevice;
|
||||
std::unique_ptr<NullDevice> nullDevice(new NullDevice());
|
||||
if (!nullDevice) {
|
||||
LogPrintfError("Error allocating new instance of offline HSA device %s", isa->targetId());
|
||||
return false;
|
||||
}
|
||||
nullDevice->registerDevice();
|
||||
if (!nullDevice->create(*isa)) {
|
||||
LogPrintfError("Skipping creating new instance of offline HSA sevice %s", isa->targetId());
|
||||
continue;
|
||||
}
|
||||
nullDevice.release()->registerDevice();
|
||||
}
|
||||
#endif // defined(WITH_COMPILER_LIB)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -516,22 +509,11 @@ bool Device::init() {
|
||||
|
||||
for (auto agent : gpu_agents_) {
|
||||
std::unique_ptr<Device> roc_device(new Device(agent));
|
||||
|
||||
if (!roc_device) {
|
||||
LogError("Error creating new instance of Device on then heap.");
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t pci_id;
|
||||
HsaDeviceId deviceId = getHsaDeviceId(agent, pci_id);
|
||||
if (deviceId == HSA_INVALID_DEVICE_ID) {
|
||||
LogPrintfError("Invalid HSA device %x", pci_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
roc_device->deviceInfo_ = DeviceInfo[deviceId];
|
||||
roc_device->deviceInfo_.pciDeviceId_ = pci_id;
|
||||
|
||||
if (!roc_device->create()) {
|
||||
LogError("Error creating new instance of Device.");
|
||||
continue;
|
||||
@@ -585,16 +567,84 @@ void Device::tearDown() {
|
||||
}
|
||||
|
||||
bool Device::create() {
|
||||
char agent_name[64] = {0};
|
||||
if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_NAME, agent_name)) {
|
||||
LogError("Unable to get HSA device name");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID,
|
||||
&pciDeviceId_)) {
|
||||
LogPrintfError("Unable to get PCI ID of HSA device %s", agent_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
struct agent_isas_t {
|
||||
uint count;
|
||||
hsa_isa_t first_isa;
|
||||
} agent_isas = {0, {0}};
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_iterate_isas(_bkendDevice,
|
||||
[](hsa_isa_t isa, void* data) {
|
||||
agent_isas_t* agent_isas = static_cast<agent_isas_t*>(data);
|
||||
if (agent_isas->count++ == 0) {
|
||||
agent_isas->first_isa = isa;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&agent_isas)) {
|
||||
LogPrintfError("Unable to iterate supported ISAs for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
if (agent_isas.count != 1) {
|
||||
LogPrintfError("HSA device %s (PCI ID %x) has %u ISAs but can only support a single ISA",
|
||||
agent_name, pciDeviceId_, agent_isas.count);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t isa_name_length = 0;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_isa_get_info_alt(agent_isas.first_isa, (hsa_isa_info_t)HSA_ISA_INFO_NAME_LENGTH,
|
||||
&isa_name_length)) {
|
||||
LogPrintfError("Unable to get ISA name length for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<char> isa_name(isa_name_length + 1, '\0');
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_isa_get_info_alt(agent_isas.first_isa, (hsa_isa_info_t)HSA_ISA_INFO_NAME,
|
||||
isa_name.data())) {
|
||||
LogPrintfError("Unable to get ISA name for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
const amd::Isa *isa = amd::Isa::findIsa(isa_name.data());
|
||||
if (!isa || !isa->runtimeRocSupported()) {
|
||||
LogPrintfError("Unsupported HSA device %s (PCI ID %x) for ISA %s", agent_name, pciDeviceId_,
|
||||
isa_name.data());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) {
|
||||
LogPrintfError("Unable to get profile for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t coop_groups = 0;
|
||||
// Check cooperative groups for HIP only
|
||||
if (amd::IS_HIP && (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES), &coop_groups))) {
|
||||
if (amd::IS_HIP &&
|
||||
(HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES),
|
||||
&coop_groups))) {
|
||||
LogPrintfError(
|
||||
"Unable to determine if cooperative queues are supported for HSA device %s (PCI ID %x)",
|
||||
agent_name, pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -603,17 +653,23 @@ bool Device::create() {
|
||||
roc::Settings* hsaSettings = new roc::Settings();
|
||||
settings_ = hsaSettings;
|
||||
if (!hsaSettings ||
|
||||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipMajor_,
|
||||
deviceInfo_.gfxipMinor_, coop_groups)) {
|
||||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(),
|
||||
isa->versionMinor(), isa->xnack() == amd::Isa::Feature::Enabled,
|
||||
coop_groups)) {
|
||||
LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ValidateComgr()) {
|
||||
LogError("Code object manager initialization failed!");
|
||||
LogPrintfError("Code object manager initialization failed for HSA device %s (PCI ID %x)",
|
||||
agent_name, pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd::Device::create()) {
|
||||
if (!amd::Device::create(*isa)) {
|
||||
LogPrintfError("Unable to setup device for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -621,6 +677,8 @@ bool Device::create() {
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_BDFID), &hsa_bdf_id)) {
|
||||
LogPrintfError("Unable to determine BFD ID for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -632,6 +690,8 @@ bool Device::create() {
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DOMAIN), &pci_domain_id)) {
|
||||
LogPrintfError("Unable to determine domain ID for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
info_.pciDomainID = pci_domain_id;
|
||||
@@ -650,7 +710,8 @@ bool Device::create() {
|
||||
#endif
|
||||
|
||||
if (populateOCLDeviceConstants() == false) {
|
||||
LogError("populateOCLDeviceConstants failed!");
|
||||
LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name,
|
||||
pciDeviceId_);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -995,35 +1056,11 @@ Memory* Device::getGpuMemory(amd::Memory* mem) const {
|
||||
bool Device::populateOCLDeviceConstants() {
|
||||
info_.available_ = true;
|
||||
|
||||
hsa_isa_t isa = {0};
|
||||
if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_ISA, &isa) != HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t isaNameLength = 0;
|
||||
if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &isaNameLength) != HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((isaNameLength + 1) > sizeof(info_.targetId_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, info_.targetId_) != HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
info_.targetId_[isaNameLength] = '\0';
|
||||
|
||||
char *gfxSubString = ::strstr(info_.targetId_, "gfx");
|
||||
if (nullptr == gfxSubString) {
|
||||
return false;
|
||||
}
|
||||
::strncpy(info_.name_, gfxSubString, sizeof(info_.name_) - 1);
|
||||
|
||||
info_.gfxipMajor_ = deviceInfo_.gfxipMajor_;
|
||||
info_.gfxipMinor_ = deviceInfo_.gfxipMinor_;
|
||||
info_.gfxipStepping_ = deviceInfo_.gfxipStepping_;
|
||||
|
||||
::strncpy(info_.name_, isa().targetId(), sizeof(info_.name_) - 1);
|
||||
info_.gfxipMajor_ = isa().versionMajor();
|
||||
info_.gfxipMinor_ = isa().versionMinor();
|
||||
info_.gfxipStepping_ = isa().versionStepping();
|
||||
::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1);
|
||||
char device_name[64] = {0};
|
||||
if (HSA_STATUS_SUCCESS == hsa_agent_get_info(_bkendDevice,
|
||||
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME,
|
||||
@@ -1072,7 +1109,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
|
||||
//TODO: add the assert statement for Raven
|
||||
if ((info_.gfxipMajor_*100 + info_.gfxipMinor_*10 + info_.gfxipStepping_) != 902) {
|
||||
if (!(isa().versionMajor() == 9 && isa().versionMinor() == 0 && isa().versionStepping() == 2)) {
|
||||
assert(info_.maxEngineClockFrequency_ > 0);
|
||||
}
|
||||
|
||||
@@ -1258,7 +1295,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1);
|
||||
|
||||
// Enable OpenCL 2.0 for Vega10+
|
||||
if (deviceInfo_.gfxipMajor_ >= 9) {
|
||||
if (isa().versionMajor() >= 9) {
|
||||
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " ";
|
||||
} else {
|
||||
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
|
||||
@@ -1394,14 +1431,14 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
if (amd::IS_HIP) {
|
||||
// Report atomics capability based on GFX IP, control on Hawaii
|
||||
if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipMajor_ >= 8) {
|
||||
if (info_.hostUnifiedMemory_ || isa().versionMajor() >= 8) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
}
|
||||
else if (!settings().useLightning_) {
|
||||
// Report atomics capability based on GFX IP, control on Hawaii
|
||||
// and Vega10.
|
||||
if (info_.hostUnifiedMemory_ || (deviceInfo_.gfxipMajor_ == 8)) {
|
||||
if (info_.hostUnifiedMemory_ || (isa().versionMajor() == 8)) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
}
|
||||
@@ -1409,10 +1446,10 @@ bool Device::populateOCLDeviceConstants() {
|
||||
|
||||
if (settings().checkExtension(ClAmdDeviceAttributeQuery)) {
|
||||
info_.simdPerCU_ = settings().enableWgpMode_
|
||||
? (2 * deviceInfo_.simdPerCU_)
|
||||
: deviceInfo_.simdPerCU_;
|
||||
info_.simdWidth_ = deviceInfo_.simdWidth_;
|
||||
info_.simdInstructionWidth_ = deviceInfo_.simdInstructionWidth_;
|
||||
? (2 * isa().simdPerCU())
|
||||
: isa().simdPerCU();
|
||||
info_.simdWidth_ = isa().simdWidth();
|
||||
info_.simdInstructionWidth_ = isa().simdInstructionWidth();
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_WAVEFRONT_SIZE, &info_.wavefrontWidth_)) {
|
||||
return false;
|
||||
@@ -1454,16 +1491,16 @@ bool Device::populateOCLDeviceConstants() {
|
||||
info_.l2CacheSize_ = cache_sizes[1];
|
||||
info_.timeStampFrequency_ = 1000000;
|
||||
info_.globalMemChannelBanks_ = 4;
|
||||
info_.globalMemChannelBankWidth_ = deviceInfo_.memChannelBankWidth_;
|
||||
info_.localMemSizePerCU_ = deviceInfo_.localMemSizePerCU_;
|
||||
info_.localMemBanks_ = deviceInfo_.localMemBanks_;
|
||||
info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth();
|
||||
info_.localMemSizePerCU_ = isa().localMemSizePerCU();
|
||||
info_.localMemBanks_ = isa().localMemBanks();
|
||||
info_.numAsyncQueues_ = kMaxAsyncQueues;
|
||||
info_.numRTQueues_ = info_.numAsyncQueues_;
|
||||
info_.numRTCUs_ = info_.maxComputeUnits_;
|
||||
|
||||
//TODO: set to true once thread trace support is available
|
||||
info_.threadTraceEnable_ = false;
|
||||
info_.pcieDeviceId_ = deviceInfo_.pciDeviceId_;
|
||||
info_.pcieDeviceId_ = pciDeviceId_;
|
||||
info_.cooperativeGroups_ = settings().enableCoopGroups_;
|
||||
info_.cooperativeMultiDeviceGroups_ = settings().enableCoopMultiDeviceGroups_;
|
||||
}
|
||||
@@ -1481,7 +1518,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
|
||||
// Get Values from from Comgr
|
||||
amd_comgr_metadata_node_t isaMeta;
|
||||
if (getIsaMeta(info_.targetId_, isaMeta)) {
|
||||
if (getIsaMeta(std::move(isa().isaName()), isaMeta)) {
|
||||
std::string vgprValue;
|
||||
info_.availableVGPRs_ = (getValueFromIsaMeta(isaMeta, "AddressableNumVGPRs", vgprValue))
|
||||
? (atoi(vgprValue.c_str()) * info_.simdPerCU_)
|
||||
@@ -1595,14 +1632,11 @@ bool Device::bindExternalDevice(uint flags, void* const gfxDevice[], void* gfxCo
|
||||
return false;
|
||||
}
|
||||
|
||||
bool match = true;
|
||||
match &= info_.deviceTopology_.pcie.bus == info.pci_bus;
|
||||
match &= info_.deviceTopology_.pcie.device == info.pci_device;
|
||||
match &= info_.deviceTopology_.pcie.function == info.pci_function;
|
||||
match &= info_.vendorId_ == info.vendor_id;
|
||||
match &= deviceInfo_.pciDeviceId_ == info.device_id;
|
||||
return info_.deviceTopology_.pcie.bus == info.pci_bus &&
|
||||
info_.deviceTopology_.pcie.device == info.pci_device &&
|
||||
info_.deviceTopology_.pcie.function == info.pci_function &&
|
||||
info_.vendorId_ == info.vendor_id && pciDeviceId_ == info.device_id;
|
||||
|
||||
return match;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ class NullDevice : public amd::Device {
|
||||
NullDevice(){};
|
||||
|
||||
//! create the device
|
||||
bool create(const AMDDeviceInfo& deviceInfo);
|
||||
bool create(const amd::Isa &isa);
|
||||
|
||||
//! Initialise all the offline devices that can be used for compilation
|
||||
static bool init();
|
||||
@@ -126,7 +126,6 @@ class NullDevice : public amd::Device {
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr);
|
||||
const AMDDeviceInfo& deviceInfo() const { return deviceInfo_; }
|
||||
|
||||
// List of dummy functions which are disabled for NullDevice
|
||||
|
||||
@@ -232,8 +231,6 @@ class NullDevice : public amd::Device {
|
||||
static bool destroyCompiler();
|
||||
//! Handle to the the compiler
|
||||
static Compiler* compilerHandle_;
|
||||
//! Device Id for an HsaDevice
|
||||
AMDDeviceInfo deviceInfo_;
|
||||
|
||||
private:
|
||||
static constexpr bool offlineDevice_ = true;
|
||||
@@ -515,6 +512,7 @@ class Device : public NullDevice {
|
||||
std::vector<Device*> enabled_p2p_devices_; //!< List of user enabled P2P devices for this device
|
||||
mutable std::mutex lock_allow_access_; //!< To serialize allow_access calls
|
||||
hsa_agent_t _bkendDevice;
|
||||
uint32_t pciDeviceId_;
|
||||
hsa_agent_t* p2p_agents_list_;
|
||||
hsa_profile_t agent_profile_;
|
||||
hsa_amd_memory_pool_t group_segment_;
|
||||
|
||||
@@ -1049,7 +1049,7 @@ bool Image::createInteropImage() {
|
||||
}
|
||||
|
||||
if (obj->getGLTarget() == GL_TEXTURE_CUBE_MAP) {
|
||||
desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipMajor_);
|
||||
desc.setFace(obj->getCubemapFace(), dev().isa().versionMajor());
|
||||
}
|
||||
|
||||
hsa_status_t err =
|
||||
|
||||
@@ -234,9 +234,8 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
|
||||
return true;
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) {
|
||||
machineTarget_ = rocNullDevice().deviceInfo().machineTarget_;
|
||||
}
|
||||
HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner)
|
||||
: roc::Program(device, owner) {}
|
||||
|
||||
HSAILProgram::~HSAILProgram() {
|
||||
#if defined(WITH_COMPILER_LIB)
|
||||
@@ -440,7 +439,6 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner)
|
||||
: roc::Program(device, owner) {
|
||||
isLC_ = true;
|
||||
isHIP_ = (owner.language() == amd::Program::HIP);
|
||||
machineTarget_ = rocNullDevice().deviceInfo().machineTargetLC_;
|
||||
}
|
||||
|
||||
bool LightningProgram::createBinary(amd::option::Options* options) {
|
||||
|
||||
@@ -74,7 +74,7 @@ class Program : public device::Program {
|
||||
);
|
||||
virtual bool createBinary(amd::option::Options* options) = 0;
|
||||
|
||||
virtual const aclTargetInfo& info(const char* str = "") { return info_; }
|
||||
virtual const aclTargetInfo& info() { return info_; }
|
||||
|
||||
protected:
|
||||
//! Disable default copy constructor
|
||||
|
||||
@@ -93,7 +93,8 @@ Settings::Settings() {
|
||||
barrier_sync_ = (!flagIsDefault(ROC_BARRIER_SYNC)) ? ROC_BARRIER_SYNC : true;
|
||||
}
|
||||
|
||||
bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups) {
|
||||
bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, bool enableXNACK,
|
||||
bool coop_groups) {
|
||||
customHostAllocator_ = false;
|
||||
|
||||
if (fullProfile) {
|
||||
@@ -105,7 +106,8 @@ bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coo
|
||||
pinnedXferSize_ = std::max(pinnedXferSize_, pinnedMinXferSize_);
|
||||
stagedXferSize_ = std::max(stagedXferSize_, pinnedMinXferSize_ + 4 * Ki);
|
||||
}
|
||||
enableXNACK_ = apuSystem_ ? 1 : 0 ; // enable xnack for APU system
|
||||
enableXNACK_ = enableXNACK;
|
||||
hsailExplicitXnack_ = enableXNACK;
|
||||
|
||||
// Enable extensions
|
||||
enableExtension(ClKhrByteAddressableStore);
|
||||
|
||||
@@ -89,7 +89,8 @@ class Settings : public device::Settings {
|
||||
Settings();
|
||||
|
||||
//! Creates settings
|
||||
bool create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups = false);
|
||||
bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, bool enableXNACK,
|
||||
bool coop_groups = false);
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
|
||||
Ссылка в новой задаче
Block a user