rocr: Generic ISA targets support

Change-Id: I6a0341ec9c1ec1e710143676b80a8a3c1a78f725


[ROCm/ROCR-Runtime commit: 0c18ff22e1]
Этот коммит содержится в:
Chris Freehill
2024-10-17 16:36:40 -05:00
родитель dd037425ed
Коммит 8fe7c40390
17 изменённых файлов: 369 добавлений и 203 удалений
+18 -2
Просмотреть файл
@@ -236,6 +236,18 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
hsa_status_t (*callback)(hsa_region_t region, void* data),
void* data) const = 0;
// @brief Invoke the user provided callback for each isa supported by
// this agent.
//
// @param [in] callback User provided callback function.
// @param [in] data User provided pointer as input for @p callback.
//
// @retval ::HSA_STATUS_SUCCESS if the callback function for each traversed
// isa returns ::HSA_STATUS_SUCCESS.
virtual hsa_status_t IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const = 0;
// @brief Invoke the callback for each cache useable by this agent.
virtual hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
void* data) const = 0;
@@ -278,8 +290,11 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
// @brief Returns an array of regions owned by the agent.
virtual const std::vector<const core::MemoryRegion*>& regions() const = 0;
// @details Returns the agent's instruction set architecture.
virtual const Isa* isa() const = 0;
// @brief Returns the ISA's supported by the agent.
// @details The returned vector is a list of pointers to the supported ISA,
// ordered from most specific (and performant) to most generic. For CPU
// and AIE agents, this list will be empty.
virtual const std::vector<const core::Isa *>& supported_isas() const = 0;
virtual uint64_t HiveId() const { return 0; }
@@ -343,6 +358,7 @@ protected:
}
hsa_agent_t public_handle_;
std::vector<const core::Isa *> supported_isas_;
private:
// @brief Node id.
+8 -1
Просмотреть файл
@@ -72,6 +72,10 @@ public:
void *data),
void *value) const override;
hsa_status_t IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const override;
hsa_status_t GetInfo(hsa_agent_info_t attribute, void *value) const override;
hsa_status_t QueueCreate(size_t size, hsa_queue_type32_t queue_type,
@@ -80,7 +84,10 @@ public:
uint32_t group_segment_size,
core::Queue **queue) override;
const core::Isa *isa() const override { return nullptr; }
// @brief Override from core::Agent.
const std::vector<const core::Isa*>& supported_isas() const override {
return supported_isas_;
}
const std::vector<const core::MemoryRegion *> &regions() const override {
return regions_;
+8 -3
Просмотреть файл
@@ -90,6 +90,10 @@ class CpuAgent : public core::Agent {
void* data),
void* data) const override;
hsa_status_t IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const override;
// @brief Override from core::Agent.
hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
void* value) const override;
@@ -127,9 +131,10 @@ class CpuAgent : public core::Agent {
return regions_;
}
// @brief OVerride from core::Agent.
const core::Isa* isa() const override { return NULL; }
// @brief Override from core::Agent.
const std::vector<const core::Isa*>& supported_isas() const override {
return supported_isas_;
}
private:
// @brief Query the driver to get the region list owned by this agent.
void InitRegionList();
+8 -3
Просмотреть файл
@@ -275,6 +275,10 @@ class GpuAgent : public GpuAgentInt {
void* data),
void* data) const override;
hsa_status_t IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const override;
// @brief Override from core::Agent.
hsa_status_t IterateCache(hsa_status_t (*callback)(hsa_cache_t cache, void* data),
void* value) const override;
@@ -381,8 +385,8 @@ class GpuAgent : public GpuAgentInt {
return regions_;
}
// @brief Override from core::Agent.
const core::Isa* isa() const override { return isa_; }
const std::vector<const core::Isa *>& supported_isas() const override {
return supported_isas_;}
// @brief Override from AMD::GpuAgentInt.
__forceinline bool is_kv_device() const override { return is_kv_device_; }
@@ -432,7 +436,8 @@ class GpuAgent : public GpuAgentInt {
__forceinline bool AsyncScratchReclaimEnabled() const override {
// TODO: Need to update min CP FW ucode version once it is released
return (core::Runtime::runtime_singleton_->flag().enable_scratch_async_reclaim() &&
isa()->GetMajorVersion() == 9 && isa()->GetMinorVersion() == 4 &&
supported_isas()[0]->GetMajorVersion() == 9 &&
supported_isas()[0]->GetMinorVersion() == 4 &&
properties_.EngineId.ui32.uCode > 999);
};
+1 -5
Просмотреть файл
@@ -163,11 +163,7 @@ public:
virtual hsa_isa_t IsaFromName(const char *name) = 0;
// This function will be deleted in a future patch. Use the overload
// that takes a generic version instead.
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) = 0;
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa, unsigned genericVersion) { return IsaSupportedByAgent(agent, isa); }
virtual bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa, unsigned genericVersion) = 0;
virtual void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) = 0;
+1 -1
Просмотреть файл
@@ -56,7 +56,7 @@ class LoaderContext final : public rocr::amd::hsa::loader::Context {
hsa_isa_t IsaFromName(const char *name) override;
bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa) override;
bool IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t code_object_isa, unsigned codeGenericVersion) override;
void* SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, size_t size, size_t align, bool zero) override;
+12 -5
Просмотреть файл
@@ -117,12 +117,16 @@ class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> {
/// @returns True if @p code_object_isa and @p agent_isa are compatible,
/// false otherwise.
static bool IsCompatible(const Isa &code_object_isa, const Isa &agent_isa);
static bool IsCompatible(const Isa &code_object_isa,
const Isa &agent_isa, unsigned int codeGenericVersion);
/// @returns This Isa's version.
const Version &GetVersion() const {
return version_;
}
/// @returns This Isa's generic target.
const std::string & GetIsaGeneric() const {return generic_;}
/// @returns SRAM ECC feature status.
IsaFeature GetSramecc() const {
@@ -188,13 +192,15 @@ class Isa final: public amd::hsa::common::Signed<0xB13594F2BD8F212D> {
private:
/// @brief Default constructor.
Isa()
: targetid_(nullptr),
version_(Version(-1, -1, -1)),
: version_(Version(-1, -1, -1)),
sramecc_(IsaFeature::Unsupported),
xnack_(IsaFeature::Unsupported) {}
// @brief Isa's target ID name.
const char* targetid_;
std::string targetid_;
// @brief Isa's generic version, if it exists. "" otherwise.
std::string generic_;
/// @brief Isa's version.
Version version_;
@@ -223,7 +229,8 @@ class IsaRegistry final {
static const Isa *GetIsa(const Isa::Version &version,
IsaFeature sramecc = IsaFeature::Any,
IsaFeature xnack = IsaFeature::Any);
static const std::unordered_map<std::string, unsigned int> &
GetSupportedGenericVersions();
private:
/// @brief IsaRegistry's map type.
typedef std::unordered_map<std::string, std::reference_wrapper<const Isa>> IsaMap;
+11
Просмотреть файл
@@ -91,6 +91,17 @@ hsa_status_t AieAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache,
return HSA_STATUS_ERROR_INVALID_CACHE;
}
hsa_status_t AieAgent::IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const {
AMD::callback_t<decltype(callback)> call(callback);
for (const auto& isa : supported_isas()) {
hsa_status_t stat = call(core::Isa::Handle(isa), data);
if (stat != HSA_STATUS_SUCCESS) return stat;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t AieAgent::GetInfo(hsa_agent_info_t attribute, void *value) const {
const size_t attribute_ = static_cast<size_t>(attribute);
+18 -15
Просмотреть файл
@@ -100,7 +100,7 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, Scr
// Values written to the HW doorbell are modulo the doubled size.
// This allows the HW to accept (doorbell == last_doorbell + queue_size).
// This workaround is required for GFXIP 7 and GFXIP 8 ASICs.
const core::Isa* isa = agent_->isa();
const core::Isa* isa = agent_->supported_isas()[0];
queue_full_workaround_ =
(isa->GetMajorVersion() == 7 || isa->GetMajorVersion() == 8)
? 1
@@ -208,7 +208,7 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, Scr
assert(amd_queue_.private_segment_aperture_base_hi != 0 && "No private region found.");
}
if (agent_->isa()->GetMajorVersion() >= 11)
if (agent_->supported_isas()[0]->GetMajorVersion() >= 11)
queue_scratch_.mem_alignment_size = 256;
else
queue_scratch_.mem_alignment_size = 1024;
@@ -1016,8 +1016,9 @@ void AqlQueue::HandleInsufficientScratch(hsa_signal_value_t& error_code,
// For gfx10+ devices we must attempt to assign the smaller of 256 lanes or 16 groups to each
// engine.
if (agent_->isa()->GetMajorVersion() >= 10 && maxGroupsPerEngine < 16 &&
lanes_per_group * maxGroupsPerEngine < 256) {
if (agent_->supported_isas()[0]->GetMajorVersion() >= 10 &&
maxGroupsPerEngine < 16 &&
lanes_per_group * maxGroupsPerEngine < 256) {
uint64_t groups_per_interleave = (256 + lanes_per_group - 1) / lanes_per_group;
maxGroupsPerEngine = Min(groups_per_interleave, 16ul);
}
@@ -1118,7 +1119,7 @@ void AqlQueue::HandleInsufficientScratch(hsa_signal_value_t& error_code,
if (scratch.large) {
amd_queue_.queue_properties |= AMD_QUEUE_PROPERTIES_USE_SCRATCH_ONCE;
// Set system release fence to flush scratch stores with older firmware versions.
if ((agent_->isa()->GetMajorVersion() == 8) && (agent_->GetMicrocodeVersion() < 729)) {
if ((agent_->supported_isas()[0]->GetMajorVersion() == 8) && (agent_->GetMicrocodeVersion() < 729)) {
pkt->dispatch.header &=
~(((1 << HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE) - 1)
<< HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE);
@@ -1339,7 +1340,7 @@ bool AqlQueue::ExceptionHandler(hsa_signal_value_t error_code, void* arg) {
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
// generated by hsa-runtime.
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_core_dump &&
queue->agent_->isa()->GetMajorVersion() != 11) {
queue->agent_->supported_isas()[0]->GetMajorVersion() != 11) {
if (pcs::PcsRuntime::instance()->SessionsActive())
fprintf(stderr, "GPU core dump skipped because PC Sampling active\n");
@@ -1412,7 +1413,7 @@ hsa_status_t AqlQueue::SetCUMasking(uint32_t num_cu_mask_count, const uint32_t*
if ((!cu_mask_.empty()) || (num_cu_mask_count != 0) || (!global_mask.empty())) {
// Devices with WGPs must conform to even-indexed contiguous pairwise CU enablement.
if (agent_->isa()->GetMajorVersion() >= 10) {
if (agent_->supported_isas()[0]->GetMajorVersion() >= 10) {
for (int i = 0; i < mask.size() * 32; i += 2) {
uint32_t cu_pair = (mask[i / 32] >> (i % 32)) & 0x3;
if (cu_pair && cu_pair != 0x3) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
@@ -1481,7 +1482,8 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b, hsa_fence_scope
constexpr uint32_t ib_jump_size_dw = 4;
uint32_t ib_jump_cmd[ib_jump_size_dw] = {
PM4_HDR(PM4_HDR_IT_OPCODE_INDIRECT_BUFFER, ib_jump_size_dw, agent_->isa()->GetMajorVersion()),
PM4_HDR(PM4_HDR_IT_OPCODE_INDIRECT_BUFFER, ib_jump_size_dw,
agent_->supported_isas()[0]->GetMajorVersion()),
PM4_INDIRECT_BUFFER_DW1_IB_BASE_LO(uint32_t(uintptr_t(pm4_ib_buf_) >> 2)),
PM4_INDIRECT_BUFFER_DW2_IB_BASE_HI(uint32_t(uintptr_t(pm4_ib_buf_) >> 32)),
(PM4_INDIRECT_BUFFER_DW3_IB_SIZE(uint32_t(cmd_size_b / sizeof(uint32_t))) |
@@ -1493,7 +1495,7 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b, hsa_fence_scope
hsa_signal_t local_signal = {0};
hsa_status_t err;
if (agent_->isa()->GetMajorVersion() <= 8) {
if (agent_->supported_isas()[0]->GetMajorVersion() <= 8) {
// Construct a set of PM4 to fit inside the AQL packet slot.
uint32_t slot_dw_idx = 0;
@@ -1504,7 +1506,8 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b, hsa_fence_scope
uint32_t* nop_pad = &slot_data[slot_dw_idx];
slot_dw_idx += nop_pad_size_dw;
nop_pad[0] = PM4_HDR(PM4_HDR_IT_OPCODE_NOP, nop_pad_size_dw, agent_->isa()->GetMajorVersion());
nop_pad[0] = PM4_HDR(PM4_HDR_IT_OPCODE_NOP, nop_pad_size_dw,
agent_->supported_isas()[0]->GetMajorVersion());
for (uint32_t i = 1; i < nop_pad_size_dw; ++i) {
nop_pad[i] = 0;
@@ -1523,15 +1526,15 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b, hsa_fence_scope
assert(slot_dw_idx + rel_mem_size_dw <= slot_size_dw && "PM4 exceeded queue slot size");
uint32_t* rel_mem = &slot_data[slot_dw_idx];
rel_mem[0] =
PM4_HDR(PM4_HDR_IT_OPCODE_RELEASE_MEM, rel_mem_size_dw, agent_->isa()->GetMajorVersion());
rel_mem[0] = PM4_HDR(PM4_HDR_IT_OPCODE_RELEASE_MEM, rel_mem_size_dw,
agent_->supported_isas()[0]->GetMajorVersion());
rel_mem[1] = PM4_RELEASE_MEM_DW1_EVENT_INDEX(PM4_RELEASE_MEM_EVENT_INDEX_AQL);
rel_mem[2] = 0;
rel_mem[3] = 0;
rel_mem[4] = 0;
rel_mem[5] = 0;
rel_mem[6] = 0;
} else if (agent_->isa()->GetMajorVersion() >= 9) {
} else if (agent_->supported_isas()[0]->GetMajorVersion() >= 9) {
// Construct an AQL packet to jump to the PM4 IB.
struct amd_aql_pm4_ib {
uint16_t header;
@@ -1582,7 +1585,7 @@ void AqlQueue::ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b, hsa_fence_scope
doorbell->StoreRelease(write_idx);
// Wait for the packet to be consumed.
if (agent_->isa()->GetMajorVersion() <= 8) {
if (agent_->supported_isas()[0]->GetMajorVersion() <= 8) {
while (queue->LoadReadIndexRelaxed() <= write_idx)
os::YieldThread();
@@ -1863,7 +1866,7 @@ void AqlQueue::FillComputeTmpRingSize_Gfx12() {
// @brief Define the Scratch Buffer Descriptor and related parameters
// that enable kernel access scratch memory
void AqlQueue::InitScratchSRD() {
switch (agent_->isa()->GetMajorVersion()) {
switch (agent_->supported_isas()[0]->GetMajorVersion()) {
case 12:
FillBufRsrcWord0();
FillBufRsrcWord1_Gfx11();
+14 -11
Просмотреть файл
@@ -151,15 +151,16 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
}
// Some GFX9 devices require a minimum of 64 DWORDS per ring buffer submission.
if (agent_->isa()->GetVersion() >= core::Isa::Version(9, 0, 0) &&
(agent_->isa()->GetVersion() <= core::Isa::Version(9, 0, 4) ||
agent_->isa()->GetVersion() == core::Isa::Version(9, 0, 12))) {
if (agent_->supported_isas()[0]->GetVersion() >= core::Isa::Version(9, 0, 0) &&
(agent_->supported_isas()[0]->GetVersion() <= core::Isa::Version(9, 0, 4) ||
agent_->supported_isas()[0]->GetVersion() == core::Isa::Version(9, 0, 12))) {
min_submission_size_ = 256;
}
const core::Runtime::LinkInfo& link = core::Runtime::runtime_singleton_->GetLinkInfo(
agent_->node_id(), core::Runtime::runtime_singleton_->cpu_agents()[0]->node_id());
if (agent_->isa()->GetVersion() == core::Isa::Version(7, 0, 1)) {
const core::Runtime::LinkInfo& link =
core::Runtime::runtime_singleton_->GetLinkInfo( agent_->node_id(),
core::Runtime::runtime_singleton_->cpu_agents()[0]->node_id());
if (agent_->supported_isas()[0]->GetVersion() == core::Isa::Version(7, 0, 1)) {
platform_atomic_support_ = false;
} else {
platform_atomic_support_ = link.info.atomic_support_64bit;
@@ -169,8 +170,8 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
// gfx90a can support xGMI host to device connections so bypass HDP flush
// in this case.
// gfx101x seems to have issues with HDP flushes
if (agent_->isa()->GetMajorVersion() >= 9 &&
!(agent_->isa()->GetMajorVersion() == 10 && agent_->isa()->GetMinorVersion() == 1)) {
if (agent_->supported_isas()[0]->GetMajorVersion() >= 9 &&
!(agent_->supported_isas()[0]->GetMajorVersion() == 10 && agent_->supported_isas()[0]->GetMinorVersion() == 1)) {
hdp_flush_support_ = link.info.link_type != HSA_AMD_LINK_INFO_TYPE_XGMI;
}
@@ -556,7 +557,8 @@ BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::SubmitCopyRe
throw AMD::hsa_exception(HSA_STATUS_ERROR_INVALID_ARGUMENT, "Copy rect slice needed.");
// GFX12 or later use a different packet format that is incompatible (fields changed in size and location).
const bool isGFX12Plus = (agent_->isa()->GetMajorVersion() >= 12);
const bool isGFX12Plus =
(agent_->supported_isas()[0]->GetMajorVersion() >= 12);
// Common and GFX12 packet must match in size to use same code for vector/append.
static_assert(sizeof(SDMA_PKT_COPY_LINEAR_RECT) == sizeof(SDMA_PKT_COPY_LINEAR_RECT_GFX12), "");
@@ -777,7 +779,7 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildFe
packet_addr->HEADER_UNION.op = SDMA_OP_FENCE;
if (agent_->isa()->GetMajorVersion() >= 10) {
if (agent_->supported_isas()[0]->GetMajorVersion() >= 10) {
packet_addr->HEADER_UNION.mtype = 3;
}
@@ -847,7 +849,8 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildCo
};
// GFX12 or later use a different packet format that is incompatible (fields changed in size and location).
const bool isGFX12Plus = (agent_->isa()->GetMajorVersion() >= 12);
const bool isGFX12Plus =
(agent_->supported_isas()[0]->GetMajorVersion() >= 12);
// Limits in terms of element count
const uint32_t max_pitch = 1 << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::pitch_bits : SDMA_PKT_COPY_LINEAR_RECT::pitch_bits);
+11
Просмотреть файл
@@ -181,6 +181,17 @@ hsa_status_t CpuAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache,
return HSA_STATUS_SUCCESS;
}
hsa_status_t CpuAgent::IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const {
AMD::callback_t<decltype(callback)> call(callback);
for (const auto& isa : supported_isas()) {
hsa_status_t stat = call(core::Isa::Handle(isa), data);
if (stat != HSA_STATUS_SUCCESS) return stat;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t CpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
// agent, and vendor name size limit
+16
Просмотреть файл
@@ -180,6 +180,11 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props, bool xna
assert(isa_ != nullptr && "ISA registry inconsistency.");
supported_isas_.push_back(isa_);
if (!isa_->GetIsaGeneric().empty()) {
supported_isas_.push_back(core::IsaRegistry::GetIsa(isa_->GetIsaGeneric()));
}
// Check if the device is Kaveri, only on GPU device.
if (isa_->GetMajorVersion() == 7 && isa_->GetMinorVersion() == 0 &&
isa_->GetStepping() == 0) {
@@ -657,6 +662,17 @@ hsa_status_t GpuAgent::IterateCache(hsa_status_t (*callback)(hsa_cache_t cache,
return HSA_STATUS_SUCCESS;
}
hsa_status_t GpuAgent::IterateSupportedIsas(
hsa_status_t (*callback)(hsa_isa_t isa, void* data),
void* data) const {
AMD::callback_t<decltype(callback)> call(callback);
for (const auto& isa : supported_isas()) {
hsa_status_t stat = call(core::Isa::Handle(isa), data);
if (stat != HSA_STATUS_SUCCESS) return stat;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t GpuAgent::VisitRegion(bool include_peer,
hsa_status_t (*callback)(hsa_region_t region,
void* data),
+16 -8
Просмотреть файл
@@ -385,30 +385,38 @@ hsa_isa_t LoaderContext::IsaFromName(const char *name) {
}
bool LoaderContext::IsaSupportedByAgent(hsa_agent_t agent,
hsa_isa_t code_object_isa) {
std::pair<hsa_isa_t, bool> comparison_data(code_object_isa, false);
hsa_isa_t code_object_isa,
unsigned codeGenericVersion) {
struct callBackData {
std::pair<hsa_isa_t, bool> comparison_data;
const unsigned int codeGenericV;
} cbData = {{code_object_isa, false}, codeGenericVersion};
auto IsIsaEquivalent = [](hsa_isa_t agent_isa_h, void *data) {
assert(data);
std::pair<hsa_isa_t, bool> *data_pair =
reinterpret_cast<decltype(&comparison_data)>(data);
struct callBackData *inOutCB = reinterpret_cast<decltype(&cbData)>(data);
std::pair<hsa_isa_t, bool> *data_pair = &inOutCB->comparison_data;
const unsigned int codeGenericV = inOutCB->codeGenericV;
assert(data_pair);
assert(data_pair->second != true);
assert(!data_pair->second);
const core::Isa *agent_isa = core::Isa::Object(agent_isa_h);
assert(agent_isa);
const core::Isa *code_object_isa = core::Isa::Object(data_pair->first);
assert(code_object_isa);
data_pair->second = core::Isa::IsCompatible(*code_object_isa, *agent_isa);
data_pair->second = core::Isa::IsCompatible(*code_object_isa, *agent_isa, codeGenericV);
return data_pair->second ? HSA_STATUS_INFO_BREAK : HSA_STATUS_SUCCESS;
};
hsa_status_t status = HSA::hsa_agent_iterate_isas(agent, IsIsaEquivalent, &comparison_data);
hsa_status_t status = HSA::hsa_agent_iterate_isas(agent, IsIsaEquivalent, &cbData);
if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) {
return false;
}
return comparison_data.second;
return cbData.comparison_data.second;
}
void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
+6 -5
Просмотреть файл
@@ -137,10 +137,11 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac
// Check for sramecc incompatibility due to sramecc not being reported correctly in kfd before
// 1.4.
if (gpu->isa()->IsSrameccSupported() && (kfd_version.KernelInterfaceMajorVersion <= 1 &&
kfd_version.KernelInterfaceMinorVersion < 4)) {
if (gpu->supported_isas()[0]->IsSrameccSupported() &&
(kfd_version.KernelInterfaceMajorVersion <= 1 &&
kfd_version.KernelInterfaceMinorVersion < 4)) {
// gfx906 has both sramecc modes in use. Suppress the device.
if ((gpu->isa()->GetProcessorName() == "gfx906") &&
if ((gpu->supported_isas()[0]->GetProcessorName() == "gfx906") &&
core::Runtime::runtime_singleton_->flag().check_sramecc_validity()) {
char name[64];
gpu->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, name);
@@ -153,7 +154,7 @@ GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnac
}
// gfx908 always has sramecc set to on in vbios. Set mode bit to on and recreate the device.
if (gpu->isa()->GetProcessorName() == "gfx908") {
if (gpu->supported_isas()[0]->GetProcessorName() == "gfx908") {
node_prop.Capability.ui32.SRAM_EDCSupport = 1;
delete gpu;
gpu = new GpuAgent(node_id, node_prop, xnack_mode,
@@ -260,7 +261,7 @@ void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) {
*/
static void SurfaceGpuList(std::vector<int32_t>& gpu_list, bool xnack_mode, bool enabled) {
// Process user visible Gpu devices
int32_t invalidIdx = -1;
const int32_t invalidIdx = -1;
int32_t list_sz = gpu_list.size();
HsaNodeProperties node_prop = {0};
for (int32_t idx = 0; idx < list_sz; idx++) {
+2 -7
Просмотреть файл
@@ -1573,12 +1573,7 @@ hsa_status_t hsa_agent_iterate_isas(
const core::Agent *agent_object = core::Agent::Convert(agent);
IS_VALID(agent_object);
const Isa *isa_object = agent_object->isa();
if (!isa_object) {
return HSA_STATUS_SUCCESS;
}
return callback(Isa::Handle(isa_object), data);
return agent_object->IterateSupportedIsas(callback, data);
CATCH;
}
@@ -1708,7 +1703,7 @@ hsa_status_t hsa_isa_compatible(
const Isa *agent_isa_object = Isa::Object(agent_isa);
IS_VALID(agent_isa_object);
*result = Isa::IsCompatible(*code_object_isa_object, *agent_isa_object);
*result = Isa::IsCompatible(*code_object_isa_object, *agent_isa_object, 0);
return HSA_STATUS_SUCCESS;
CATCH;
}
+215 -134
Просмотреть файл
@@ -41,6 +41,7 @@
////////////////////////////////////////////////////////////////////////////////
#include "core/inc/isa.h"
#include "core/util/utils.h"
#include <algorithm>
#include <cstring>
@@ -71,11 +72,33 @@ bool Wavefront::GetInfo(
/* static */
bool Isa::IsCompatible(const Isa &code_object_isa,
const Isa &agent_isa) {
if (code_object_isa.GetVersion() != agent_isa.GetVersion())
return false;
const Isa &agent_isa, unsigned int codeGenericVersion) {
assert(code_object_isa.IsSrameccSupported() == agent_isa.IsSrameccSupported() && agent_isa.GetSramecc() != IsaFeature::Any);
bool code_obj_isa_is_generic = false;
auto generic_it = IsaRegistry::GetSupportedGenericVersions().find(
code_object_isa.GetIsaName());
if (generic_it != IsaRegistry::GetSupportedGenericVersions().end()) {
code_obj_isa_is_generic = true;
}
if (code_obj_isa_is_generic) {
// Verify the generic code object corresponds to the generic for
// this isa agent.
if (agent_isa.GetIsaGeneric() != code_object_isa.GetIsaName()) {
return false;
}
// Verify the generic code object version is greater than or equal to
// the generic version for this isa agent.
if (codeGenericVersion < generic_it->second) {
return false;
}
} else if (code_object_isa.GetVersion() != agent_isa.GetVersion()) {
return false;
}
assert(code_object_isa.IsSrameccSupported() == agent_isa.IsSrameccSupported()
&& agent_isa.GetSramecc() != IsaFeature::Any);
if ((code_object_isa.GetSramecc() == IsaFeature::Enabled ||
code_object_isa.GetSramecc() == IsaFeature::Disabled) &&
code_object_isa.GetSramecc() != agent_isa.GetSramecc())
@@ -91,13 +114,16 @@ bool Isa::IsCompatible(const Isa &code_object_isa,
}
std::string Isa::GetProcessorName() const {
std::string processor(targetid_);
return processor.substr(0, processor.find(':'));
return targetid_.substr(0, targetid_.find(':'));
}
static __forceinline std::string prepend_isa_prefix(const std::string &isa_name) {
constexpr char hsa_isa_name_prefix[] = "amdgcn-amd-amdhsa--";
return hsa_isa_name_prefix + isa_name;
}
std::string Isa::GetIsaName() const {
constexpr char hsa_isa_name_prefix[] = "amdgcn-amd-amdhsa--";
return std::string(hsa_isa_name_prefix) + targetid_;
return prepend_isa_prefix(targetid_);
}
bool Isa::GetInfo(const hsa_isa_info_t &attribute, void *value) const {
@@ -219,146 +245,201 @@ const Isa *IsaRegistry::GetIsa(const Isa::Version &version, IsaFeature sramecc,
}
const std::unordered_map<std::string, unsigned int> &
IsaRegistry::GetSupportedGenericVersions() {
static const
std::unordered_map<std::string, unsigned int> min_gen_versions = {
{prepend_isa_prefix("gfx9-generic"), 1},
{prepend_isa_prefix("gfx9-generic:xnack-"), 1},
{prepend_isa_prefix("gfx9-generic:xnack+"), 1},
{prepend_isa_prefix("gfx9-generic:sramecc-"), 1},
{prepend_isa_prefix("gfx9-generic:sramecc+"), 1},
{prepend_isa_prefix("gfx9-generic:sramecc-:xnack-"), 1},
{prepend_isa_prefix("gfx9-generic:sramecc+:xnack+"), 1},
{prepend_isa_prefix("gfx9-generic:sramecc-:xnack-"), 1},
{prepend_isa_prefix("gfx9-generic:sramecc+:xnack+"), 1},
{prepend_isa_prefix("gfx10-1-generic"), 1},
{prepend_isa_prefix("gfx10-1-generic:xnack-"), 1},
{prepend_isa_prefix("gfx10-1-generic:xnack+"), 1},
{prepend_isa_prefix("gfx10-3-generic"), 1},
{prepend_isa_prefix("gfx11-generic"), 1},
{prepend_isa_prefix("gfx12-generic"), 1}
};
return min_gen_versions;
}
const IsaRegistry::IsaMap& IsaRegistry::GetSupportedIsas() {
// agent, and vendor name length limit excluding terminating nul character.
constexpr size_t hsa_name_size = 63;
// FIXME: Use static_assert when C++17 used.
#define ISAREG_ENTRY_GEN(name, maj, min, stp, sramecc, xnack, wavefrontsize) \
assert(std::char_traits<char>::length(name) <= hsa_name_size); \
static Isa amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize; \
amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize.targetid_ = name; \
amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize.version_ = Isa::Version(maj, min, stp); \
amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize.sramecc_ = sramecc; \
amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize.xnack_ = xnack; \
amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize.wavefront_.num_threads_ = wavefrontsize; \
supported_isas.insert(std::make_pair( \
amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize.GetIsaName(), \
std::ref(amd_amdgpu_##maj##min##stp##_SRAMECC_##sramecc##_XNACK_##xnack##_WAVEFRONTSIZE_##wavefrontsize))); \
// agent, and vendor name length limit excluding terminating nul character.
constexpr size_t hsa_name_size = 63;
static IsaMap supported_isas;
if (supported_isas.size() > 0) {
return supported_isas;
}
auto parse_out_minor_ver = [&](const std::string& genericname) -> int32_t {
size_t dot_pos = genericname.find('.');
int32_t min;
if (dot_pos != std::string::npos) {
std::string minor_version_str = genericname.substr(dot_pos + 1);
size_t dash_pos = minor_version_str.find('-');
if (dash_pos != std::string::npos) {
minor_version_str = minor_version_str.substr(0, dash_pos);
}
min = std::stoi(minor_version_str);
} else {
min = 0xFF;
}
return min;
};
// FIXME: Use static_assert when C++17 used.
#define ISAREG_ENTRY_GEN(name, maj, min, stp, sramecc, xnack, wavefrontsize, gen_name) \
{ \
assert(std::char_traits<char>::length(name) <= hsa_name_size); \
static Isa isa_val; \
isa_val.targetid_ = name; \
isa_val.version_ = Isa::Version(maj, min, stp); \
isa_val.sramecc_ = sramecc; \
isa_val.xnack_ = xnack; \
isa_val.wavefront_.num_threads_ = wavefrontsize; \
std::string genericname(gen_name); \
if (genericname.size() != 0) { \
isa_val.generic_ = prepend_isa_prefix(genericname); \
if (supported_isas.find(genericname) == supported_isas.end()) { \
Isa isa_generic_val; \
isa_generic_val.targetid_ = genericname; \
isa_generic_val.version_ = Isa::Version(maj, parse_out_minor_ver(genericname), 0xFF); \
isa_generic_val.sramecc_ = sramecc; \
isa_generic_val.xnack_ = xnack; \
isa_generic_val.wavefront_.num_threads_ = wavefrontsize; \
supported_isas.insert(std::make_pair( \
isa_generic_val.GetIsaName(), \
isa_generic_val)); \
} \
} \
supported_isas.insert(std::make_pair( \
isa_val.GetIsaName(), \
std::ref(isa_val))); \
}
const IsaFeature unsupported = IsaFeature::Unsupported;
const IsaFeature any = IsaFeature::Any;
const IsaFeature disabled = IsaFeature::Disabled;
const IsaFeature enabled = IsaFeature::Enabled;
// Target ID Version SRAMECC XNACK
ISAREG_ENTRY_GEN("gfx700", 7, 0, 0, unsupported, unsupported, 64)
ISAREG_ENTRY_GEN("gfx701", 7, 0, 1, unsupported, unsupported, 64)
ISAREG_ENTRY_GEN("gfx702", 7, 0, 2, unsupported, unsupported, 64)
ISAREG_ENTRY_GEN("gfx801", 8, 0, 1, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx801:xnack-", 8, 0, 1, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx801:xnack+", 8, 0, 1, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx802", 8, 0, 2, unsupported, unsupported, 64)
ISAREG_ENTRY_GEN("gfx803", 8, 0, 3, unsupported, unsupported, 64)
ISAREG_ENTRY_GEN("gfx805", 8, 0, 5, unsupported, unsupported, 64)
ISAREG_ENTRY_GEN("gfx810", 8, 1, 0, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx810:xnack-", 8, 1, 0, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx810:xnack+", 8, 1, 0, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx900", 9, 0, 0, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx900:xnack-", 9, 0, 0, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx900:xnack+", 9, 0, 0, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx902", 9, 0, 2, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx902:xnack-", 9, 0, 2, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx902:xnack+", 9, 0, 2, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx904", 9, 0, 4, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx904:xnack-", 9, 0, 4, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx904:xnack+", 9, 0, 4, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx906", 9, 0, 6, any, any, 64)
ISAREG_ENTRY_GEN("gfx906:xnack-", 9, 0, 6, any, disabled, 64)
ISAREG_ENTRY_GEN("gfx906:xnack+", 9, 0, 6, any, enabled, 64)
ISAREG_ENTRY_GEN("gfx906:sramecc-", 9, 0, 6, disabled, any, 64)
ISAREG_ENTRY_GEN("gfx906:sramecc+", 9, 0, 6, enabled, any, 64)
ISAREG_ENTRY_GEN("gfx906:sramecc-:xnack-", 9, 0, 6, disabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx906:sramecc-:xnack+", 9, 0, 6, disabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx906:sramecc+:xnack-", 9, 0, 6, enabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx906:sramecc+:xnack+", 9, 0, 6, enabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx908", 9, 0, 8, any, any, 64)
ISAREG_ENTRY_GEN("gfx908:xnack-", 9, 0, 8, any, disabled, 64)
ISAREG_ENTRY_GEN("gfx908:xnack+", 9, 0, 8, any, enabled, 64)
ISAREG_ENTRY_GEN("gfx908:sramecc-", 9, 0, 8, disabled, any, 64)
ISAREG_ENTRY_GEN("gfx908:sramecc+", 9, 0, 8, enabled, any, 64)
ISAREG_ENTRY_GEN("gfx908:sramecc-:xnack-", 9, 0, 8, disabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx908:sramecc-:xnack+", 9, 0, 8, disabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx908:sramecc+:xnack-", 9, 0, 8, enabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx908:sramecc+:xnack+", 9, 0, 8, enabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx909", 9, 0, 9, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx909:xnack-", 9, 0, 9, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx909:xnack+", 9, 0, 9, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx90a", 9, 0, 10, any, any, 64)
ISAREG_ENTRY_GEN("gfx90a:xnack-", 9, 0, 10, any, disabled, 64)
ISAREG_ENTRY_GEN("gfx90a:xnack+", 9, 0, 10, any, enabled, 64)
ISAREG_ENTRY_GEN("gfx90a:sramecc-", 9, 0, 10, disabled, any, 64)
ISAREG_ENTRY_GEN("gfx90a:sramecc+", 9, 0, 10, enabled, any, 64)
ISAREG_ENTRY_GEN("gfx90a:sramecc-:xnack-", 9, 0, 10, disabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx90a:sramecc-:xnack+", 9, 0, 10, disabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx90a:sramecc+:xnack-", 9, 0, 10, enabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx90a:sramecc+:xnack+", 9, 0, 10, enabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx90c", 9, 0, 12, unsupported, any, 64)
ISAREG_ENTRY_GEN("gfx90c:xnack-", 9, 0, 12, unsupported, disabled, 64)
ISAREG_ENTRY_GEN("gfx90c:xnack+", 9, 0, 12, unsupported, enabled, 64)
ISAREG_ENTRY_GEN("gfx940", 9, 4, 0, any, any, 64)
ISAREG_ENTRY_GEN("gfx940:xnack-", 9, 4, 0, any, disabled, 64)
ISAREG_ENTRY_GEN("gfx940:xnack+", 9, 4, 0, any, enabled, 64)
ISAREG_ENTRY_GEN("gfx940:sramecc-", 9, 4, 0, disabled, any, 64)
ISAREG_ENTRY_GEN("gfx940:sramecc+", 9, 4, 0, enabled, any, 64)
ISAREG_ENTRY_GEN("gfx940:sramecc-:xnack-", 9, 4, 0, disabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx940:sramecc-:xnack+", 9, 4, 0, disabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx940:sramecc+:xnack-", 9, 4, 0, enabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx940:sramecc+:xnack+", 9, 4, 0, enabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx941", 9, 4, 1, any, any, 64)
ISAREG_ENTRY_GEN("gfx941:xnack-", 9, 4, 1, any, disabled, 64)
ISAREG_ENTRY_GEN("gfx941:xnack+", 9, 4, 1, any, enabled, 64)
ISAREG_ENTRY_GEN("gfx941:sramecc-", 9, 4, 1, disabled, any, 64)
ISAREG_ENTRY_GEN("gfx941:sramecc+", 9, 4, 1, enabled, any, 64)
ISAREG_ENTRY_GEN("gfx941:sramecc-:xnack-", 9, 4, 1, disabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx941:sramecc-:xnack+", 9, 4, 1, disabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx941:sramecc+:xnack-", 9, 4, 1, enabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx941:sramecc+:xnack+", 9, 4, 1, enabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx942", 9, 4, 2, any, any, 64)
ISAREG_ENTRY_GEN("gfx942:xnack-", 9, 4, 2, any, disabled, 64)
ISAREG_ENTRY_GEN("gfx942:xnack+", 9, 4, 2, any, enabled, 64)
ISAREG_ENTRY_GEN("gfx942:sramecc-", 9, 4, 2, disabled, any, 64)
ISAREG_ENTRY_GEN("gfx942:sramecc+", 9, 4, 2, enabled, any, 64)
ISAREG_ENTRY_GEN("gfx942:sramecc-:xnack-", 9, 4, 2, disabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx942:sramecc-:xnack+", 9, 4, 2, disabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx942:sramecc+:xnack-", 9, 4, 2, enabled, disabled, 64)
ISAREG_ENTRY_GEN("gfx942:sramecc+:xnack+", 9, 4, 2, enabled, enabled, 64)
ISAREG_ENTRY_GEN("gfx1010", 10, 1, 0, unsupported, any, 32)
ISAREG_ENTRY_GEN("gfx1010:xnack-", 10, 1, 0, unsupported, disabled, 32)
ISAREG_ENTRY_GEN("gfx1010:xnack+", 10, 1, 0, unsupported, enabled, 32)
ISAREG_ENTRY_GEN("gfx1011", 10, 1, 1, unsupported, any, 32)
ISAREG_ENTRY_GEN("gfx1011:xnack-", 10, 1, 1, unsupported, disabled, 32)
ISAREG_ENTRY_GEN("gfx1011:xnack+", 10, 1, 1, unsupported, enabled, 32)
ISAREG_ENTRY_GEN("gfx1012", 10, 1, 2, unsupported, any, 32)
ISAREG_ENTRY_GEN("gfx1012:xnack-", 10, 1, 2, unsupported, disabled, 32)
ISAREG_ENTRY_GEN("gfx1012:xnack+", 10, 1, 2, unsupported, enabled, 32)
ISAREG_ENTRY_GEN("gfx1013", 10, 1, 3, unsupported, any, 32)
ISAREG_ENTRY_GEN("gfx1013:xnack-", 10, 1, 3, unsupported, disabled, 32)
ISAREG_ENTRY_GEN("gfx1013:xnack+", 10, 1, 3, unsupported, enabled, 32)
ISAREG_ENTRY_GEN("gfx1030", 10, 3, 0, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1031", 10, 3, 1, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1032", 10, 3, 2, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1033", 10, 3, 3, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1034", 10, 3, 4, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1035", 10, 3, 5, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1036", 10, 3, 6, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1100", 11, 0, 0, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1101", 11, 0, 1, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1102", 11, 0, 2, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1103", 11, 0, 3, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1150", 11, 5, 0, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1151", 11, 5, 1, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1152", 11, 5, 2, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1200", 12, 0, 0, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx1201", 12, 0, 1, unsupported, unsupported, 32)
ISAREG_ENTRY_GEN("gfx700", 7, 0, 0, unsupported, unsupported, 64, "")
ISAREG_ENTRY_GEN("gfx701", 7, 0, 1, unsupported, unsupported, 64, "")
ISAREG_ENTRY_GEN("gfx702", 7, 0, 2, unsupported, unsupported, 64, "")
ISAREG_ENTRY_GEN("gfx801", 8, 0, 1, unsupported, any, 64, "")
ISAREG_ENTRY_GEN("gfx801:xnack-", 8, 0, 1, unsupported, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx801:xnack+", 8, 0, 1, unsupported, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx802", 8, 0, 2, unsupported, unsupported, 64, "")
ISAREG_ENTRY_GEN("gfx803", 8, 0, 3, unsupported, unsupported, 64, "")
ISAREG_ENTRY_GEN("gfx805", 8, 0, 5, unsupported, unsupported, 64, "")
ISAREG_ENTRY_GEN("gfx810", 8, 1, 0, unsupported, any, 64, "")
ISAREG_ENTRY_GEN("gfx810:xnack-", 8, 1, 0, unsupported, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx810:xnack+", 8, 1, 0, unsupported, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx900", 9, 0, 0, unsupported, any, 64, "gfx9-generic")
ISAREG_ENTRY_GEN("gfx900:xnack-", 9, 0, 0, unsupported, disabled, 64, "gfx9-generic:xnack-")
ISAREG_ENTRY_GEN("gfx900:xnack+", 9, 0, 0, unsupported, enabled, 64, "gfx9-generic:xnack+")
ISAREG_ENTRY_GEN("gfx902", 9, 0, 2, unsupported, any, 64, "gfx9-generic")
ISAREG_ENTRY_GEN("gfx902:xnack-", 9, 0, 2, unsupported, disabled, 64, "gfx9-generic:xnack-")
ISAREG_ENTRY_GEN("gfx902:xnack+", 9, 0, 2, unsupported, enabled, 64, "gfx9-generic:xnack+")
ISAREG_ENTRY_GEN("gfx904", 9, 0, 4, unsupported, any, 64, "gfx9-generic")
ISAREG_ENTRY_GEN("gfx904:xnack-", 9, 0, 4, unsupported, disabled, 64, "gfx9-generic:xnack-")
ISAREG_ENTRY_GEN("gfx904:xnack+", 9, 0, 4, unsupported, enabled, 64, "gfx9-generic:xnack+")
ISAREG_ENTRY_GEN("gfx906", 9, 0, 6, any, any, 64, "gfx9-generic")
ISAREG_ENTRY_GEN("gfx906:xnack-", 9, 0, 6, any, disabled, 64, "gfx9-generic:xnack-")
ISAREG_ENTRY_GEN("gfx906:xnack+", 9, 0, 6, any, enabled, 64, "gfx9-generic:xnack+")
ISAREG_ENTRY_GEN("gfx906:sramecc-", 9, 0, 6, disabled, any, 64, "gfx9-generic:sramecc-")
ISAREG_ENTRY_GEN("gfx906:sramecc+", 9, 0, 6, enabled, any, 64, "gfx9-generic:sramecc+")
ISAREG_ENTRY_GEN("gfx906:sramecc-:xnack-", 9, 0, 6, disabled, disabled, 64, "gfx9-generic:sramecc-:xnack-")
ISAREG_ENTRY_GEN("gfx906:sramecc-:xnack+", 9, 0, 6, disabled, enabled, 64, "gfx9-generic:sramecc-:xnack+")
ISAREG_ENTRY_GEN("gfx906:sramecc+:xnack-", 9, 0, 6, enabled, disabled, 64, "gfx9-generic:sramecc+:xnack-")
ISAREG_ENTRY_GEN("gfx906:sramecc+:xnack+", 9, 0, 6, enabled, enabled, 64, "gfx9-generic:sramecc+:xnack+")
ISAREG_ENTRY_GEN("gfx908", 9, 0, 8, any, any, 64, "")
ISAREG_ENTRY_GEN("gfx908:xnack-", 9, 0, 8, any, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx908:xnack+", 9, 0, 8, any, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx908:sramecc-", 9, 0, 8, disabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx908:sramecc+", 9, 0, 8, enabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx908:sramecc-:xnack-", 9, 0, 8, disabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx908:sramecc-:xnack+", 9, 0, 8, disabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx908:sramecc+:xnack-", 9, 0, 8, enabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx908:sramecc+:xnack+", 9, 0, 8, enabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx909", 9, 0, 9, unsupported, any, 64, "gfx9-generic")
ISAREG_ENTRY_GEN("gfx909:xnack-", 9, 0, 9, unsupported, disabled, 64, "gfx9-generic:xnack-")
ISAREG_ENTRY_GEN("gfx909:xnack+", 9, 0, 9, unsupported, enabled, 64, "gfx9-generic:xnack+")
ISAREG_ENTRY_GEN("gfx90a", 9, 0, 10, any, any, 64, "")
ISAREG_ENTRY_GEN("gfx90a:xnack-", 9, 0, 10, any, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx90a:xnack+", 9, 0, 10, any, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx90a:sramecc-", 9, 0, 10, disabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx90a:sramecc+", 9, 0, 10, enabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx90a:sramecc-:xnack-", 9, 0, 10, disabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx90a:sramecc-:xnack+", 9, 0, 10, disabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx90a:sramecc+:xnack-", 9, 0, 10, enabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx90a:sramecc+:xnack+", 9, 0, 10, enabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx90c", 9, 0, 12, unsupported, any, 64, "gfx9-generic")
ISAREG_ENTRY_GEN("gfx90c:xnack-", 9, 0, 12, unsupported, disabled, 64, "gfx9-generic:xnack-")
ISAREG_ENTRY_GEN("gfx90c:xnack+", 9, 0, 12, unsupported, enabled, 64, "gfx9-generic:xnack+")
ISAREG_ENTRY_GEN("gfx940", 9, 4, 0, any, any, 64, "")
ISAREG_ENTRY_GEN("gfx940:xnack-", 9, 4, 0, any, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx940:xnack+", 9, 4, 0, any, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx940:sramecc-", 9, 4, 0, disabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx940:sramecc+", 9, 4, 0, enabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx940:sramecc-:xnack-", 9, 4, 0, disabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx940:sramecc-:xnack+", 9, 4, 0, disabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx940:sramecc+:xnack-", 9, 4, 0, enabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx940:sramecc+:xnack+", 9, 4, 0, enabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx941", 9, 4, 1, any, any, 64, "")
ISAREG_ENTRY_GEN("gfx941:xnack-", 9, 4, 1, any, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx941:xnack+", 9, 4, 1, any, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx941:sramecc-", 9, 4, 1, disabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx941:sramecc+", 9, 4, 1, enabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx941:sramecc-:xnack-", 9, 4, 1, disabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx941:sramecc-:xnack+", 9, 4, 1, disabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx941:sramecc+:xnack-", 9, 4, 1, enabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx941:sramecc+:xnack+", 9, 4, 1, enabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx942", 9, 4, 2, any, any, 64, "")
ISAREG_ENTRY_GEN("gfx942:xnack-", 9, 4, 2, any, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx942:xnack+", 9, 4, 2, any, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx942:sramecc-", 9, 4, 2, disabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx942:sramecc+", 9, 4, 2, enabled, any, 64, "")
ISAREG_ENTRY_GEN("gfx942:sramecc-:xnack-", 9, 4, 2, disabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx942:sramecc-:xnack+", 9, 4, 2, disabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx942:sramecc+:xnack-", 9, 4, 2, enabled, disabled, 64, "")
ISAREG_ENTRY_GEN("gfx942:sramecc+:xnack+", 9, 4, 2, enabled, enabled, 64, "")
ISAREG_ENTRY_GEN("gfx1010", 10, 1, 0, unsupported, any, 32, "gfx10-1-generic")
ISAREG_ENTRY_GEN("gfx1010:xnack-", 10, 1, 0, unsupported, disabled, 32, "gfx10-1-generic:xnack-")
ISAREG_ENTRY_GEN("gfx1010:xnack+", 10, 1, 0, unsupported, enabled, 32, "gfx10-1-generic:xnack+")
ISAREG_ENTRY_GEN("gfx1011", 10, 1, 1, unsupported, any, 32, "gfx10-1-generic")
ISAREG_ENTRY_GEN("gfx1011:xnack-", 10, 1, 1, unsupported, disabled, 32, "gfx10-1-generic:xnack-")
ISAREG_ENTRY_GEN("gfx1011:xnack+", 10, 1, 1, unsupported, enabled, 32, "gfx10-1-generic:xnack+")
ISAREG_ENTRY_GEN("gfx1012", 10, 1, 2, unsupported, any, 32, "gfx10-1-generic")
ISAREG_ENTRY_GEN("gfx1012:xnack-", 10, 1, 2, unsupported, disabled, 32, "gfx10-1-generic:xnack-")
ISAREG_ENTRY_GEN("gfx1012:xnack+", 10, 1, 2, unsupported, enabled, 32, "gfx10-1-generic:xnack+")
ISAREG_ENTRY_GEN("gfx1013", 10, 1, 3, unsupported, any, 32, "gfx10-1-generic")
ISAREG_ENTRY_GEN("gfx1013:xnack-", 10, 1, 3, unsupported, disabled, 32, "gfx10-1-generic:xnack-")
ISAREG_ENTRY_GEN("gfx1013:xnack+", 10, 1, 3, unsupported, enabled, 32, "gfx10-1-generic:xnack+")
ISAREG_ENTRY_GEN("gfx1030", 10, 3, 0, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1031", 10, 3, 1, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1032", 10, 3, 2, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1033", 10, 3, 3, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1034", 10, 3, 4, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1035", 10, 3, 5, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1036", 10, 3, 6, unsupported, unsupported, 32, "gfx10-3-generic")
ISAREG_ENTRY_GEN("gfx1100", 11, 0, 0, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1101", 11, 0, 1, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1102", 11, 0, 2, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1103", 11, 0, 3, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1150", 11, 5, 0, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1151", 11, 5, 1, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1152", 11, 5, 2, unsupported, unsupported, 32, "gfx11-generic")
ISAREG_ENTRY_GEN("gfx1200", 12, 0, 0, unsupported, unsupported, 32, "gfx12-generic")
ISAREG_ENTRY_GEN("gfx1201", 12, 0, 1, unsupported, unsupported, 32, "gfx12-generic")
#undef ISAREG_ENTRY_GEN
return supported_isas;
}
+4 -3
Просмотреть файл
@@ -787,7 +787,7 @@ hsa_status_t Runtime::GetSystemInfo(hsa_system_info_t attribute, void* value) {
case HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: {
bool ret = true;
for(auto agent : gpu_agents_)
ret &= (agent->isa()->GetXnack() == IsaFeature::Enabled);
ret &= (agent->supported_isas()[0]->GetXnack() == IsaFeature::Enabled);
*(bool*)value = ret;
break;
}
@@ -1942,8 +1942,9 @@ bool Runtime::VMFaultHandler(hsa_signal_value_t val, void* arg) {
}
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
// generated by hsa-runtime.
if (faulty_agent && faulty_agent->isa()->GetMajorVersion() != 11 &&
!runtime_singleton_->KfdVersion().supports_core_dump) {
if (faulty_agent &&
faulty_agent->supported_isas()[0]->GetMajorVersion() != 11 &&
!runtime_singleton_->KfdVersion().supports_core_dump) {
if (pcs::PcsRuntime::instance()->SessionsActive())
fprintf(stderr, "GPU core dump skipped because PC Sampling active\n");