From 902cf1a2390c73d3548c886924ca51d3fb5f52d2 Mon Sep 17 00:00:00 2001 From: Tony Tye Date: Sun, 10 Jan 2021 12:17:06 +0000 Subject: [PATCH] Update code object handling for GSL, PAL and ROCm - Correct GSL path to report targets using the TargetID syntax. - Correct GSL path to check compatibility of code objects when loading. - Add concept of an device isa and create a registery used by ROCm, PAL and GSL. - Support XNACK and SRAMECC target features consistently for PAL and ROCm. - Correct logic for NullDevices and asserts to avoid memory coruption. - Allow all NullDevices to be created for HIP. - Numerous other code improvements. Change-Id: I40abf3d2b22249c1492d1af5919665f8184f4e0e [ROCm/clr commit: c7e8d91e1487953cf7fbc64b0e3250155a7b0450] --- projects/clr/rocclr/device/device.cpp | 173 +++++++- projects/clr/rocclr/device/device.hpp | 221 +++++++++- projects/clr/rocclr/device/devkernel.cpp | 2 +- projects/clr/rocclr/device/devprogram.cpp | 41 +- projects/clr/rocclr/device/devprogram.hpp | 6 +- projects/clr/rocclr/device/gpu/gpubinary.cpp | 2 +- projects/clr/rocclr/device/gpu/gpudefs.hpp | 151 ------- projects/clr/rocclr/device/gpu/gpudevice.cpp | 313 ++++++++------ projects/clr/rocclr/device/gpu/gpudevice.hpp | 18 +- projects/clr/rocclr/device/gpu/gpukernel.cpp | 9 +- projects/clr/rocclr/device/gpu/gpuprogram.cpp | 130 +----- projects/clr/rocclr/device/gpu/gpuprogram.hpp | 4 +- projects/clr/rocclr/device/gpu/gpuscsi.cpp | 2 +- projects/clr/rocclr/device/pal/paldefs.hpp | 70 ---- projects/clr/rocclr/device/pal/paldevice.cpp | 387 ++++++++---------- projects/clr/rocclr/device/pal/paldevice.hpp | 10 +- projects/clr/rocclr/device/pal/palprogram.cpp | 76 +--- projects/clr/rocclr/device/pal/palprogram.hpp | 3 +- .../clr/rocclr/device/pal/palsettings.cpp | 16 +- .../clr/rocclr/device/pal/palsettings.hpp | 1 + projects/clr/rocclr/device/rocm/rocblit.cpp | 10 +- .../clr/rocclr/device/rocm/roccounters.cpp | 2 +- projects/clr/rocclr/device/rocm/rocdefs.hpp | 43 -- projects/clr/rocclr/device/rocm/rocdevice.cpp | 274 +++++++------ projects/clr/rocclr/device/rocm/rocdevice.hpp | 6 +- projects/clr/rocclr/device/rocm/rocmemory.cpp | 2 +- .../clr/rocclr/device/rocm/rocprogram.cpp | 6 +- .../clr/rocclr/device/rocm/rocprogram.hpp | 2 +- .../clr/rocclr/device/rocm/rocsettings.cpp | 6 +- .../clr/rocclr/device/rocm/rocsettings.hpp | 3 +- 30 files changed, 1020 insertions(+), 969 deletions(-) diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 593a7f3e35..f868961bac 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -23,6 +23,11 @@ #include "utils/options.hpp" #include "comgrctx.hpp" +#include +#include +#include +#include + #if defined(WITH_HSA_DEVICE) #include "device/rocm/rocdevice.hpp" extern amd::AppProfile* rocCreateAppProfile(); @@ -65,6 +70,11 @@ extern void DeviceUnload(); #include #include +namespace { + +constexpr char hsaIsaNamePrefix[] = "amdgcn-amd-amdhsa--"; + +} // namespace namespace device { extern const char* BlitSourceCode; @@ -77,6 +87,166 @@ bool VirtualDevice::ActiveWait() const { namespace amd { +std::pair Isa::supportedIsas() { + constexpr amd::Isa::Feature NONE = amd::Isa::Feature::Unsupported; + constexpr amd::Isa::Feature ANY = amd::Isa::Feature::Any; + constexpr amd::Isa::Feature OFF = amd::Isa::Feature::Disabled; + constexpr amd::Isa::Feature ON = amd::Isa::Feature::Enabled; + + static constexpr Isa supportedIsas_[] = { + + // NOTE: Add new targets by adding rows for each permutation of the SRAMECC + // and XNACK target feature values. If the target does not support the + // feature then only NONE is used. If it supports the feature than include + // rows for ANY, OFF and ON (but not NONE). + // + // Use the Target ID syntax. This comprises the processor name, followed by + // the target feature settings in alphebetic order separated by ':'. If a + // target feature is omitted it means either it is not supported, or it has + // the ANY value. If the target feature is disabled then use a '-' suffix, + // and if enabled use a '+' suffix. + // + // If the HSAIL or AMD IL compilers do not support the target, then use + // nullptr for the ID. + // + // -------------------- Compiler -------------------- ------- Runtime ----- ---- IP ---- --- Target --- ---------- Target Properties ---------- + // Supported Version Features Mem + // SIMD Channel LDS LDS + // SIMD/ SIMD Instr Bank Size/ Mem + // Target ID HSAIL ID AMD IL ID ROC PAL GSL Maj/Min/Stp SRAMECC XNACK CU Width Width Width CU Banks + {"gfx600", "Tahiti", "Tahiti", false, false, true, 6, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx601", "Pitcairn", "Pitcairn", false, false, true, 6, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Capeverde + {"gfx602", "Oland", "Oland", false, false, true, 6, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Hainan + {"gfx700", "Kaveri", "Kalindi", true, true, true, 7, 0, 0, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Spectre, Spooky, Kalindi + {"gfx701", "Hawaii", "Hawaii", true, true, true, 7, 0, 1, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaiipro + {"gfx702", nullptr, nullptr, true, true, true, 7, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Hawaii (can execute Hawiipro code) + {"gfx703", nullptr, nullptr, false, false, false, 7, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Mullins + {"gfx704", "Bonaire", "Bonaire", false, true, true, 7, 0, 4, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx705", "Mullins", "Mullins", false, true, true, 7, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Actually Godavari + {"gfx801", nullptr, nullptr, true, false, false, 8, 0, 1, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx801:xnack-", "Carrizo", "Carrizo", true, true, true, 8, 0, 1, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx801:xnack+", nullptr, nullptr, true, false, false, 8, 0, 1, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx802", "Tonga", "Tonga", true, true, true, 8, 0, 2, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Iceland + {"gfx803", "Fiji", "Fiji", true, true, true, 8, 0, 3, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Also Ellesmere/Polaris10, Baffin/Polaris11, Polaris12, Polaris22/VegaM + {"gfx805", nullptr, nullptr, true, false, false, 8, 0, 5, NONE, NONE, 4, 16, 1, 256, 64 * Ki, 32}, // Tongapro + {"gfx810", nullptr, nullptr, true, false, false, 8, 1, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx810:xnack-", "Stoney", "Stoney", true, true, true, 8, 1, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx810:xnack+", nullptr, nullptr, true, false, false, 8, 1, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx900", "gfx901", nullptr, true, true, false, 9, 0, 0, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Greenland + {"gfx900:xnack-", "gfx900", nullptr, true, true, !IS_BRAHMA, 9, 0, 0, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx900:xnack+", "gfx901", nullptr, true, true, false, 9, 0, 0, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx902", "gfx903", nullptr, true, true, false, 9, 0, 2, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven + {"gfx902:xnack-", "gfx902", nullptr, true, true, !IS_BRAHMA, 9, 0, 2, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx902:xnack+", "gfx903", nullptr, true, true, false, 9, 0, 2, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx904", "gfx905", nullptr, true, true, false, 9, 0, 4, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega12 + {"gfx904:xnack-", "gfx904", nullptr, true, true, !IS_BRAHMA, 9, 0, 4, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx904:xnack+", "gfx905", nullptr, true, true, false, 9, 0, 4, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906", "gfx907", nullptr, true, true, false, 9, 0, 6, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Vega20 + {"gfx906:sramecc-", "gfx907", nullptr, true, true, !IS_BRAHMA & false, 9, 0, 6, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:sramecc+", nullptr, nullptr, true, false, false, 9, 0, 6, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:xnack-", "gfx906", nullptr, true, true, true, 9, 0, 6, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:xnack+", "gfx907", nullptr, true, true, false, 9, 0, 6, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:sramecc-:xnack-", "gfx906", nullptr, true, true, true, 9, 0, 6, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:sramecc-:xnack+", "gfx907", nullptr, true, true, false, 9, 0, 6, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:sramecc+:xnack-", nullptr, nullptr, true, false, false, 9, 0, 6, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx906:sramecc+:xnack+", nullptr, nullptr, true, false, false, 9, 0, 6, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:sramecc-", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:sramecc+", nullptr, nullptr, true, false, false, 9, 0, 8, ON, ANY, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ANY, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:sramecc-:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:sramecc-:xnack+", nullptr, nullptr, true, false, false, 9, 0, 8, OFF, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:sramecc+:xnack-", nullptr, nullptr, true, false, false, 9, 0, 8, ON, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx908:sramecc+:xnack+", nullptr, nullptr, true, false, false, 9, 0, 8, ON, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx909", nullptr, nullptr, false, false, false, 9, 0, 9, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Raven2 (can execute Raven code) + {"gfx909:xnack-", nullptr, nullptr, false, false, !IS_BRAHMA & false, 9, 0, 9, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx909:xnack+", nullptr, nullptr, false, false, false, 9, 0, 9, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx90c", nullptr, nullptr, false, false, false, 9, 0, 12, NONE, ANY, 4, 16, 1, 256, 64 * Ki, 32}, // Also Renoir + {"gfx90c:xnack-", nullptr, nullptr, false, false, !IS_BRAHMA & false, 9, 0, 12, NONE, OFF, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx90c:xnack+", nullptr, nullptr, false, false, false, 9, 0, 12, NONE, ON, 4, 16, 1, 256, 64 * Ki, 32}, + {"gfx1010", nullptr, nullptr, true, false, false, 10, 1, 0, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1010:xnack-", "gfx1010", nullptr, true, true, false, 10, 1, 0, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1010:xnack+", nullptr, nullptr, true, false, false, 10, 1, 0, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1011", nullptr, nullptr, true, false, false, 10, 1, 1, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1011:xnack-", "gfx1011", nullptr, true, true, false, 10, 1, 1, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1011:xnack+", nullptr, nullptr, true, false, false, 10, 1, 1, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1012", nullptr, nullptr, true, false, false, 10, 1, 2, NONE, ANY, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1012:xnack-", "gfx1012", nullptr, true, true, false, 10, 1, 2, NONE, OFF, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1012:xnack+", nullptr, nullptr, true, false, false, 10, 1, 2, NONE, ON, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1030", nullptr, nullptr, true, false, false, 10, 3, 0, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1031", nullptr, nullptr, true, false, false, 10, 3, 1, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1032", nullptr, nullptr, true, false, false, 10, 3, 2, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32}, + {"gfx1033", nullptr, nullptr, false, false, false, 10, 3, 3, NONE, NONE, 2, 32, 1, 256, 64 * Ki, 32} + }; + return std::make_pair(std::begin(supportedIsas_), std::end(supportedIsas_)); +} + +std::string Isa::processorName() const { + std::string processor(targetId_); + return processor.substr(0, processor.find(':')); +} + +std::string Isa::isaName() const { + return std::string(hsaIsaNamePrefix) + targetId(); +} + +bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) { + if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || + codeObjectIsa.versionMinor() != agentIsa.versionMinor() || + codeObjectIsa.versionStepping() != agentIsa.versionStepping()) + return false; + + assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() && + agentIsa.sramecc() != Feature::Any); + if ((codeObjectIsa.sramecc() == Feature::Enabled || + codeObjectIsa.sramecc() == Feature::Disabled) && + codeObjectIsa.sramecc() != agentIsa.sramecc()) + return false; + + assert(codeObjectIsa.isXnackSupported() == agentIsa.isXnackSupported() && + agentIsa.xnack() != Feature::Any); + if ((codeObjectIsa.xnack() == Feature::Enabled || codeObjectIsa.xnack() == Feature::Disabled) && + codeObjectIsa.xnack() != agentIsa.xnack()) + return false; + + return true; +} + +const Isa* Isa::findIsa(const char *isaName) { + if (!isaName) + return nullptr; + const char* prefix = std::strstr(isaName, hsaIsaNamePrefix); + if (prefix != isaName) + return nullptr; + const char *targetId = isaName + std::strlen(hsaIsaNamePrefix); + auto supportedIsas_ = supportedIsas(); + auto isaIter = std::find_if(supportedIsas_.first, supportedIsas_.second, [&](const Isa& isa) { + return std::strcmp(targetId, isa.targetId_) == 0; + }); + return isaIter == supportedIsas_.second ? nullptr : isaIter; +} + +const Isa* Isa::findIsa(uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping, + Isa::Feature sramecc, Isa::Feature xnack) { + auto supportedIsas_ = supportedIsas(); + auto isaIter = std::find_if(supportedIsas_.first, supportedIsas_.second, [&](const Isa& isa) { + return versionMajor == isa.versionMajor_ && versionMinor == isa.versionMinor_ && + versionStepping == isa.versionStepping_ && + (isa.sramecc_ == amd::Isa::Feature::Unsupported || isa.sramecc_ == sramecc) && + (isa.xnack_ == amd::Isa::Feature::Unsupported || isa.xnack_ == xnack); + }); + return isaIter == supportedIsas_.second ? nullptr : isaIter; +} + +const Isa* Isa::begin() { + return supportedIsas().first; +} + +const Isa* Isa::end() { + return supportedIsas().second; +} + std::vector* Device::devices_ = nullptr; AppProfile Device::appProfile_; @@ -300,8 +470,9 @@ bool Device::ValidateComgr() { return true; } -bool Device::create() { +bool Device::create(const Isa &isa) { assert(!vaCacheAccess_ && !vaCacheMap_); + isa_ = &isa; vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true); if (nullptr == vaCacheAccess_) { return false; diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 6b1d281616..3850b6cf91 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -37,6 +37,7 @@ #include "acl.h" #include "hwdebug.hpp" +#include #include #include #include @@ -79,6 +80,7 @@ class SvmUnmapMemoryCommand; class SvmPrefetchAsyncCommand; class TransferBufferFileCommand; class HwDebugManager; +class Isa; class Device; struct KernelParameterDescriptor; struct Coord3D; @@ -408,7 +410,7 @@ struct Info : public amd::EmbeddedObject { //! Device name string char name_[0x40]; - //! Target ID string + //! Target triple plus target ID string char targetId_[0x40]; //! Vendor name string @@ -1238,6 +1240,214 @@ class MemObjMap : public AllStatic { static amd::Monitor AllocatedLock_; //!< amd monitor locker }; +/// @brief Instruction Set Architecture properties. +class Isa { + public: + + /// @brief Isa's target feature setting type. + enum class Feature : uint8_t { + Unsupported, + Any, + Disabled, + Enabled, + }; + + //! Return a non-zero uint64_t value that uniquely identifies the device. + //! This can be used when a scalar value handle to the device is require. + static uint64_t toHandle(const Isa *isa) { + static_assert(reinterpret_cast(static_cast(nullptr)) == 0, + "nullptr value is not 0"); + static_assert(sizeof(isa) <= sizeof(uint64_t), "Handle size does not match pointer size"); + return isa ? reinterpret_cast(isa) : 0; + } + + //! Return the device corresponding to a handle returned by Isa::handle, + //! or nullptr if the handle is 0. This can be used when a scalar value + //! handle for a device is provided. + static const Isa* fromHandle(uint64_t handle) { + static_assert(reinterpret_cast(static_cast(nullptr)) == 0, + "nullptr value is not 0"); + static_assert(sizeof(handle) <= sizeof(uint64_t), "Handle size does not match pointer size"); + return handle ? reinterpret_cast(handle) : nullptr; + } + + /// @returns This Isa's target triple and target ID name. + std::string isaName() const; + + /// @returns This Isa's processor name. + std::string processorName() const; + + /// @returns This Isa's target ID name. + const char *targetId() const { + return targetId_; + } + + /// @returns This Isa's name to use with the HSAIL compiler. + const char *hsailName() const { + return hsailId_; + } + + /// @returns This Isa's name to use with the AMD IL compiler. + const char *amdIlName() const { + return amdIlId_; + } + + /// @returns If the ROCm runtime supports the ISA. + bool runtimeRocSupported() const { + return runtimeRocSupported_; + } + + /// @returns If the PAL runtime supports the ISA. + bool runtimePalSupported() const { + return runtimePalSupported_; + } + + /// @returns If the GSL runtime supports the ISA. + bool runtimeGslSupported() const { + return runtimeGslSupported_; + } + + /// @returns SRAM ECC feature status. + const Feature &sramecc() const { + return sramecc_; + } + + /// @returns XNACK feature status. + const Feature &xnack() const { + return xnack_; + } + + /// @returns True if SRAMECC feature is supported, false otherwise. + bool isSrameccSupported() const { + return sramecc_ != Feature::Unsupported; + } + + /// @returns True if XNACK feature is supported, false otherwise. + bool isXnackSupported() const { + return xnack_ != Feature::Unsupported; + } + + /// @returns This Isa's major version. + uint32_t versionMajor() const { + return versionMajor_; + } + + /// @returns This Isa's minor version. + uint32_t versionMinor() const { + return versionMinor_; + } + + /// @returns This Isa's stepping version. + uint32_t versionStepping() const { + return versionStepping_; + } + + /// @returns This Isa's number of SIMDs per CU. + uint32_t simdPerCU() const { + return simdPerCU_; + } + + /// @returns This Isa's + uint32_t simdWidth() const { + return simdWidth_; + } + + /// @returns This Isa's number of instructions processed per SIMD. + uint32_t simdInstructionWidth() const { + return simdInstructionWidth_; + } + + /// @returns This Isa's memory channel bank width. + uint32_t memChannelBankWidth() const { + return memChannelBankWidth_; + } + + /// @returns This Isa's local memory size per CU. + uint32_t localMemSizePerCU() const { + return localMemSizePerCU_; + } + + /// @returns This Isa's number of banks of local memory. + uint32_t localMemBanks() const { + return localMemBanks_; + } + + /// @returns True if @p codeObjectIsa and @p agentIsa are compatible, + /// false otherwise. + static bool isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa); + + /// @returns Isa for requested @p isaName, null pointer if not supported. + static const Isa* findIsa(const char *isaName); + + /// @returns Isa for requested @p version, null pointer if not supported. + static const Isa* findIsa(uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping, + Feature sramecc = Feature::Any, Feature xnack = Feature::Any); + + /// @returns Iterator for first isa. + static const Isa* begin(); + + /// @returns Iterator for one past the end isa. + static const Isa* end(); + + private: + + constexpr Isa(const char* targetId, const char* hsailId, const char* amdIlId, + bool runtimeRocSupported, bool runtimePalSupported, bool runtimeGslSupported, + uint32_t versionMajor, uint32_t versionMinor, uint32_t versionStepping, + Feature sramecc, Feature xnack, uint32_t simdPerCU, uint32_t simdWidth, + uint32_t simdInstructionWidth, uint32_t memChannelBankWidth, + uint32_t localMemSizePerCU, uint32_t localMemBanks) + : targetId_(targetId), + hsailId_(hsailId), + amdIlId_(amdIlId), + runtimeRocSupported_(runtimeRocSupported), + runtimePalSupported_(runtimePalSupported), + runtimeGslSupported_(runtimeGslSupported), + versionMajor_(versionMajor), + versionMinor_(versionMinor), + versionStepping_(versionStepping), + sramecc_(sramecc), + xnack_(xnack), + simdPerCU_(simdPerCU), + simdWidth_(simdWidth), + simdInstructionWidth_(simdInstructionWidth), + memChannelBankWidth_(memChannelBankWidth), + localMemSizePerCU_(localMemSizePerCU), + localMemBanks_(localMemBanks) {} + + // @brief Returns the begin and end iterators for the suppported ISAs. + static std::pair supportedIsas(); + + // @brief Isa's target ID name. Used for LLVM COde Object Manager + // compilations. + const char* targetId_; + + // @brief Isa's HSAIL name. Used for the Compiler Library for HSAIL + // compilation using the Shader Compiler Finalizer. Empty string if + // unsupported. + const char* hsailId_; + + // @brief Isa's AMD IL name. Used for the Compiler Library for AMD IL + // compilation using the Shader Compiler. Empty string if unsupported. + const char* amdIlId_; + + bool runtimeRocSupported_; //!< ROCm runtime is supported. + bool runtimePalSupported_; //!< PAL runtime is supported. + bool runtimeGslSupported_; //!< GSL runtime is supported. + uint32_t versionMajor_; //!< Isa's major version. + uint32_t versionMinor_; //!< Isa's minor version. + uint32_t versionStepping_; //!< Isa's stepping version. + Feature sramecc_; //!< SRAMECC feature. + Feature xnack_; //!< XNACK feature. + uint32_t simdPerCU_; //!< Number of SIMDs per CU. + uint32_t simdWidth_; //!< Number of workitems processed per SIMD. + uint32_t simdInstructionWidth_; //!< Number of instructions processed per SIMD. + uint32_t memChannelBankWidth_; //!< Memory channel bank width. + uint32_t localMemSizePerCU_; //!< Local memory size per CU. + uint32_t localMemBanks_; //!< Number of banks of local memory. + +}; // class Isa + /*! \addtogroup Runtime * @{ * @@ -1300,7 +1510,7 @@ class Device : public RuntimeObject { virtual ~Device(); //! Initializes abstraction layer device object - bool create(); + bool create(const Isa &isa); uint retain() { // Overwrite the RuntimeObject::retain(). @@ -1476,6 +1686,12 @@ class Device : public RuntimeObject { //! Returns TRUE if the device is available for computations bool isOnline() const { return online_; } + //! Returns device isa. + const Isa &isa() const { + assert(isa_); + return *isa_; + } + //! Return a non-zero uint64_t value that uniquely identifies the device. //! This can be used when a scalar value handle to the device is require. static uint64_t toHandle(const Device *device) { @@ -1611,6 +1827,7 @@ class Device : public RuntimeObject { static Memory* p2p_stage_; //!< Staging resources private: + const Isa *isa_; //!< Device isa bool IsTypeMatching(cl_device_type type, bool offlineDevices); #if defined(WITH_HSA_DEVICE) diff --git a/projects/clr/rocclr/device/devkernel.cpp b/projects/clr/rocclr/device/devkernel.cpp index 7cb64a6179..47f94c6246 100644 --- a/projects/clr/rocclr/device/devkernel.cpp +++ b/projects/clr/rocclr/device/devkernel.cpp @@ -1166,7 +1166,7 @@ bool Kernel::SetAvailableSgprVgpr() { bool hasVgprMeta = false; amd_comgr_status_t status = amd::Comgr::get_isa_metadata( - prog().device().info().targetId_, &isaMeta); + prog().device().isa().isaName().c_str(), &isaMeta); if (status == AMD_COMGR_STATUS_SUCCESS) { hasIsaMeta = true; diff --git a/projects/clr/rocclr/device/devprogram.cpp b/projects/clr/rocclr/device/devprogram.cpp index 97db40b883..4ad849744a 100644 --- a/projects/clr/rocclr/device/devprogram.cpp +++ b/projects/clr/rocclr/device/devprogram.cpp @@ -82,7 +82,6 @@ Program::Program(amd::Device& device, amd::Program& owner) lastBuildOptionsArg_(), buildStatus_(CL_BUILD_NONE), buildError_(CL_SUCCESS), - machineTarget_(nullptr), globalVariableTotalSize_(0), programOptions_(nullptr) { @@ -286,7 +285,7 @@ amd_comgr_status_t Program::createAction(const amd_comgr_language_t oclver, } if (status == AMD_COMGR_STATUS_SUCCESS) { - status = amd::Comgr::action_info_set_isa_name(*action, device().info().targetId_); + status = amd::Comgr::action_info_set_isa_name(*action, device().isa().isaName().c_str()); } if (status == AMD_COMGR_STATUS_SUCCESS) { @@ -719,8 +718,14 @@ bool Program::compileImplHSAIL(const std::string& sourceCode, acl_error errorCode; aclTargetInfo target; - std::string arch = LP64_SWITCH("hsail", "hsail64"); - target = aclGetTargetInfo(arch.c_str(), machineTarget_, &errorCode); + const char* arch = LP64_SWITCH("hsail", "hsail64"); + const char* hsailName = device().isa().hsailName(); + if (!hsailName) { + // HSAIL compiler does not support device's ISA. + LogPrintfError("HSAIL compiler does not support %s", device().isa().targetId()); + return false; + } + target = aclGetTargetInfo(arch, hsailName, &errorCode); // end if asic info is ready // We dump the source code for each program (param: headers) @@ -1107,7 +1112,7 @@ bool Program::linkImplLC(amd::option::Options* options) { linkOptions.push_back("correctly_rounded_sqrt"); } if (options->oVariables->DenormsAreZero || AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 || - (device().info().gfxipMajor_ < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) { + (device().isa().versionMajor() < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) { linkOptions.push_back("daz_opt"); } if (options->oVariables->FiniteMathOnly || options->oVariables->FastRelaxedMath) { @@ -1365,9 +1370,7 @@ bool Program::initBuild(amd::option::Options* options) { return false; } - const char* devName = machineTarget_; - options->setPerBuildInfo((devName && (devName[0] != '\0')) ? devName : "gpu", - clBinary()->getEncryptCode(), true); + options->setPerBuildInfo(device().isa().targetId(), clBinary()->getEncryptCode(), true); // Elf Binary setup std::string outFileName; @@ -1703,17 +1706,26 @@ int32_t Program::build(const std::string& sourceCode, const char* origOptions, // ================================================================================================ std::vector Program::ProcessOptions(amd::option::Options* options) { - std::string scratchStr; std::vector optionsVec; if (!isLC()) { optionsVec.push_back("-D__AMD__=1"); - scratchStr.clear(); - optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("__=1")); + std::string processorName = device().isa().processorName(); + const char* hsailName = device().isa().hsailName(); + const char* amdIlName = device().isa().amdIlName(); - scratchStr.clear(); - optionsVec.push_back(scratchStr.append("-D__").append(machineTarget_).append("=1")); + optionsVec.push_back(std::string("-D__") + processorName + "__=1"); + optionsVec.push_back(std::string("-D__") + processorName + "=1"); + if (hsailName && (strcmp(hsailName, processorName.c_str()) != 0)) { + optionsVec.push_back(std::string("-D__") + hsailName + "__=1"); + optionsVec.push_back(std::string("-D__") + hsailName + "=1"); + } + if (amdIlName && (strcmp(amdIlName, processorName.c_str()) != 0) && + (!hsailName || strcmp(amdIlName, hsailName) != 0)) { + optionsVec.push_back(std::string("-D__") + amdIlName + "__=1"); + optionsVec.push_back(std::string("-D__") + amdIlName + "=1"); + } // Set options for the standard device specific options // All our devices support these options now @@ -1785,8 +1797,7 @@ std::vector Program::ProcessOptions(amd::option::Options* options) } } else { for (auto e : extensions) { - scratchStr.clear(); - optionsVec.push_back(scratchStr.append("-D").append(e).append("=1")); + optionsVec.push_back(std::string("-D") + e + "=1"); } } } diff --git a/projects/clr/rocclr/device/devprogram.hpp b/projects/clr/rocclr/device/devprogram.hpp index c7569772c6..5f7532770c 100644 --- a/projects/clr/rocclr/device/devprogram.hpp +++ b/projects/clr/rocclr/device/devprogram.hpp @@ -117,7 +117,6 @@ class Program : public amd::HeapObject { int32_t buildStatus_; //!< build status. int32_t buildError_; //!< build error - const char* machineTarget_; //!< Machine target for this program aclTargetInfo info_; //!< The info target for this binary. size_t globalVariableTotalSize_; amd::option::Options* programOptions_; @@ -233,9 +232,6 @@ class Program : public amd::HeapObject { const uint32_t codeObjectVer() const { return codeObjectVer_; } #endif - //! Get the machine target for the program - const char* machineTarget() const { return machineTarget_; } - //! Check if program is HIP based const bool isHIP() const { return (isHIP_ == 1); } @@ -293,7 +289,7 @@ class Program : public amd::HeapObject { void releaseClBinary(); //! return target info - virtual const aclTargetInfo& info(const char* str = "") = 0; + virtual const aclTargetInfo& info() = 0; virtual bool setKernels( amd::option::Options* options, void* binary, size_t binSize, diff --git a/projects/clr/rocclr/device/gpu/gpubinary.cpp b/projects/clr/rocclr/device/gpu/gpubinary.cpp index 4f303f80e4..563453d594 100644 --- a/projects/clr/rocclr/device/gpu/gpubinary.cpp +++ b/projects/clr/rocclr/device/gpu/gpubinary.cpp @@ -66,7 +66,7 @@ bool ClBinary::loadKernels(NullProgram& program, bool* hasRecompiled) { if (platform == amd::Elf::COMPLIB_PLATFORM) { // BIF 3.0 uint32_t flag; - aclTargetInfo tgtInfo = aclGetTargetInfo("amdil", dev().hwInfo()->targetName_, NULL); + aclTargetInfo tgtInfo = aclGetTargetInfo("amdil", dev().isa().amdIlName(), NULL); if (!elfIn()->getFlags(flag)) { LogError("The OCL binary image loading failed: incorrect format"); return false; diff --git a/projects/clr/rocclr/device/gpu/gpudefs.hpp b/projects/clr/rocclr/device/gpu/gpudefs.hpp index 74a27a58f1..d5e4f7dbcd 100644 --- a/projects/clr/rocclr/device/gpu/gpudefs.hpp +++ b/projects/clr/rocclr/device/gpu/gpudefs.hpp @@ -80,157 +80,6 @@ static constexpr uint HsaSamplerObjectAlignment = 16; //! HSA path specific defines for images static constexpr uint DeviceQueueMaskSize = 32; -//! Defines all supported ASIC families -enum AsicFamilies { Family7xx, Family8xx, FamilyTotal }; - -// FIXME: Change to use 2 digit for major/minor/stepping. -enum gfx_handle { - gfx600 = 600, - gfx601 = 601, - gfx602 = 602, - gfx700 = 700, - gfx701 = 701, - gfx702 = 702, - gfx704 = 704, - gfx705 = 705, - gfx800 = 800, - gfx801 = 801, - gfx802 = 802, - gfx803 = 803, - gfx810 = 810, - gfx900 = 900, - gfx902 = 902, - gfx904 = 904, - gfx906 = 906, - gfx909 = 909, - gfx90c = 9012 -}; - -// FIXME: Does this need updating? -struct AMDDeviceInfo { - uint machine_; //!< Machine target ID - const char* targetName_; //!< Target name - const char* machineTarget_; //!< Machine target - uint simdPerCU_; //!< Number of SIMDs per CU - uint simdWidth_; //!< Number of workitems processed per SIMD - uint simdInstructionWidth_; //!< Number of instructions processed per SIMD - uint memChannelBankWidth_; //!< Memory channel bank width - uint localMemSizePerCU_; //!< Local memory size per CU - uint localMemBanks_; //!< Number of banks of local memory - uint gfxipVersion_; //!< The core engine GFXIP version - uint gfxipMajor_; //!< The core engine GFXIP Major version - uint gfxipMinor_; //!< The core engine GFXIP Minor version - uint gfxipStepping_; //!< The core engine GFXIP Stepping version -}; - -static constexpr AMDDeviceInfo DeviceInfo[] = { - // Machine targetName machineTarget - /* CAL_TARGET_600 */ {ED_ATI_CAL_MACHINE_R600_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_610 */ {ED_ATI_CAL_MACHINE_R610_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_630 */ {ED_ATI_CAL_MACHINE_R630_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_670 */ {ED_ATI_CAL_MACHINE_R670_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_7XX */ {ED_ATI_CAL_MACHINE_R770_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_770 */ {ED_ATI_CAL_MACHINE_R770_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_710 */ {ED_ATI_CAL_MACHINE_R710_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_730 */ {ED_ATI_CAL_MACHINE_R730_ISA, "", "", 0, 0, 0, 0, 0, 0, 0}, - /* CAL_TARGET_CYPRESS */ {ED_ATI_CAL_MACHINE_CYPRESS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, - 400, 4, 0, 0}, - /* CAL_TARGET_JUNIPER */ {ED_ATI_CAL_MACHINE_JUNIPER_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, - 400, 4, 0, 0}, - /* CAL_TARGET_REDWOOD */ {ED_ATI_CAL_MACHINE_REDWOOD_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16, - 400, 4, 0, 0}, - /* CAL_TARGET_CEDAR */ {ED_ATI_CAL_MACHINE_CEDAR_ISA, "", "", 1, 8, 5, 256, 32 * Ki, 16, 400, 4, 0, 0}, - /* CAL_TARGET_SUMO */ {ED_ATI_CAL_MACHINE_SUMO_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16, 400, 4, 0, 0}, - /* CAL_TARGET_SUPERSUMO*/ {ED_ATI_CAL_MACHINE_SUPERSUMO_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 16, - 400, 4, 0, 0}, - /* CAL_TARGET_WRESTLER*/ {ED_ATI_CAL_MACHINE_WRESTLER_ISA, "", "", 1, 8, 5, 256, 32 * Ki, 16, - 400, 4, 0, 0}, - /* CAL_TARGET_CAYMAN */ {ED_ATI_CAL_MACHINE_CAYMAN_ISA, "", "", 1, 16, 4, 256, 32 * Ki, 32, - 500, 5, 0, 0}, - /* CAL_TARGET_KAUAI */ {ED_ATI_CAL_MACHINE_KAUAI_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0}, - /* CAL_TARGET_BARTS */ {ED_ATI_CAL_MACHINE_BARTS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0}, - /* CAL_TARGET_TURKS */ {ED_ATI_CAL_MACHINE_TURKS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, 400, 4, 0, 0}, - /* CAL_TARGET_CAICOS */ {ED_ATI_CAL_MACHINE_CAICOS_ISA, "", "", 1, 16, 5, 256, 32 * Ki, 32, - 400, 4, 0, 0}, - /* CAL_TARGET_TAHITI */ {ED_ATI_CAL_MACHINE_TAHITI_ISA, "Tahiti", "tahiti", 4, 16, 1, 256, - 64 * Ki, 32, gfx600, 6, 0, 0}, - /* CAL_TARGET_PITCAIRN */ {ED_ATI_CAL_MACHINE_PITCAIRN_ISA, "Pitcairn", "pitcairn", 4, 16, 1, - 256, 64 * Ki, 32, gfx601, 6, 0, 1}, - /* CAL_TARGET_CAPEVERDE */ {ED_ATI_CAL_MACHINE_CAPEVERDE_ISA, "Capeverde", "capeverde", 4, 16, - 1, 256, 64 * Ki, 32, gfx601, 6, 0, 1}, - /* CAL_TARGET_DEVASTATOR */ {ED_ATI_CAL_MACHINE_DEVASTATOR_ISA, "", "", 1, 16, 4, 256, 32 * Ki, - 32, 500, 5, 0, 0}, - /* CAL_TARGET_SCRAPPER */ {ED_ATI_CAL_MACHINE_SCRAPPER_ISA, "", "", 1, 16, 4, 256, 32 * Ki, 32, - 500, 5, 0, 0}, - /* CAL_TARGET_OLAND */ {ED_ATI_CAL_MACHINE_OLAND_ISA, "Oland", "oland", 4, 16, 1, 256, 64 * Ki, - 32, gfx602, 6, 0, 2}, - /* CAL_TARGET_BONAIRE */ {ED_ATI_CAL_MACHINE_BONAIRE_ISA, "Bonaire", "bonaire", 4, 16, 1, 256, - 64 * Ki, 32, gfx704, 7, 0, 4}, - /* CAL_TARGET_SPECTRE */ {ED_ATI_CAL_MACHINE_SPECTRE_ISA, "Spectre", "spectre", 4, 16, 1, 256, - 64 * Ki, 32, gfx700, 7, 0, 0}, - /* CAL_TARGET_SPOOKY */ {ED_ATI_CAL_MACHINE_SPOOKY_ISA, "Spooky", "spooky", 4, 16, 1, 256, - 64 * Ki, 32, gfx700, 7, 0, 0}, - /* CAL_TARGET_KALINDI */ {ED_ATI_CAL_MACHINE_KALINDI_ISA, "Kalindi", "kalindi", 4, 16, 1, 256, - 64 * Ki, 32, gfx700, 7, 0, 0}, - /* CAL_TARGET_HAINAN */ {ED_ATI_CAL_MACHINE_HAINAN_ISA, "Hainan", "hainan", 4, 16, 1, 256, - 64 * Ki, 32, gfx602, 6, 0, 2}, - /* CAL_TARGET_HAWAII */ {ED_ATI_CAL_MACHINE_HAWAII_ISA, "Hawaii", "hawaii", 4, 16, 1, 256, - 64 * Ki, 32, gfx701, 7, 0, 1}, - /* CAL_TARGET_ICELAND */ {ED_ATI_CAL_MACHINE_ICELAND_ISA, "Iceland", "iceland", 4, 16, 1, 256, - 64 * Ki, 32, gfx802, 8, 0, 2}, - /* CAL_TARGET_TONGA */ {ED_ATI_CAL_MACHINE_TONGA_ISA, "Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, - 32, gfx802, 8, 0, 2}, - /* CAL_TARGET_MULLINS */ {ED_ATI_CAL_MACHINE_GODAVARI_ISA, "Mullins", "mullins", 4, 16, 1, 256, - 64 * Ki, 32, gfx705, 7, 0, 5}, - /* CAL_TARGET_FIJI */ {ED_ATI_CAL_MACHINE_FIJI_ISA, "Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, - gfx803, 8, 0, 3}, - /* CAL_TARGET_CARRIZO */ {ED_ATI_CAL_MACHINE_CARRIZO_ISA, "Carrizo", "carrizo", 4, 16, 1, 256, - 64 * Ki, 32, gfx801, 8, 0, 1}, - /* CAL_TARGET_ELLESMERE */ {ED_ATI_CAL_MACHINE_ELLESMERE_ISA, "Ellesmere", "ellesmere", 4, 16, - 1, 256, 64 * Ki, 32, gfx803, 8, 0, 3}, - /* CAL_TARGET_BAFFIN */ {ED_ATI_CAL_MACHINE_BAFFIN_ISA, "Baffin", "baffin", 4, 16, 1, 256, - 64 * Ki, 32, gfx803, 8, 0, 3}, - /* CAL_TARGET_GREENLAND */ {ED_ATI_CAL_MACHINE_GREENLAND_ISA, IF(IS_BRAHMA, "", "gfx900"), - IF(IS_BRAHMA, "", "gfx900"), 4, 16, 1, 256, 64 * Ki, 32, gfx900, 9, 0, 0}, - /* CAL_TARGET_STONEY */ {ED_ATI_CAL_MACHINE_STONEY_ISA, "Stoney", "stoney", 4, 16, 1, 256, - 64 * Ki, 32, gfx810, 8, 1, 0}, - /* CAL_TARGET_LEXA */ {ED_ATI_CAL_MACHINE_LEXA_ISA, "gfx803", "gfx803", 4, 16, 1, 256, 64 * Ki, - 32, gfx803, 8, 0, 3}, - /* CAL_TARGET_RAVEN */ {ED_ATI_CAL_MACHINE_RAVEN_ISA, IF(IS_BRAHMA, "", "gfx902"), - IF(IS_BRAHMA, "", "gfx902"), 4, 16, 1, 256, 64 * Ki, 32, gfx902, 9, 0, 2}, - /* CAL_TARGET_RAVEN2 */ {ED_ATI_CAL_MACHINE_RAVEN2_ISA, IF(IS_BRAHMA, "", "gfx909"), - IF(IS_BRAHMA, "", "gfx902"), 4, 16, 1, 256, 64 * Ki, 32, gfx909, 9, 0, 9}, - /* CAL_TARGET_RENOIR */{ ED_ATI_CAL_MACHINE_RENOIR_ISA, IF(IS_BRAHMA, "", "gfx90c"), - IF(IS_BRAHMA, "", "gfx90c"), 4, 16, 1, 256, 64 * Ki, 32, gfx90c, 9, 0, 12}, - /* CAL_TARGET_POLARIS22 */ {ED_ATI_CAL_MACHINE_POLARIS22_ISA, IF(IS_BRAHMA, "", "gfx803"), - IF(IS_BRAHMA, "", "gfx803"), 4, 16, 1, 256, 64 * Ki, 32, gfx803, 8, 0, 3}, - /* CAL_TARGET_VEGA12 */{ ED_ATI_CAL_MACHINE_VEGA12_ISA, IF(IS_BRAHMA, "", "gfx904"), - IF(IS_BRAHMA, "", "gfx904"), 4, 16, 1, 256, 64 * Ki, 32, gfx904, 9, 0, 4}, - /* CAL_TARGET_VEGA20 */{ ED_ATI_CAL_MACHINE_VEGA20_ISA, IF(IS_BRAHMA, "", "gfx906"), - IF(IS_BRAHMA, "", "gfx906"), 4, 16, 1, 256, 64 * Ki, 32, gfx906, 9, 0, 6 }, -}; - -// FIXME: These need updating to new Target ID format. Or is all this code nw -// obsolete and should be deleted? How is XNACK and SRAMECC settings supported? -static constexpr const char* Gfx600 = "amdgcn-amd-amdhsa--gfx600"; -static constexpr const char* Gfx601 = "amdgcn-amd-amdhsa--gfx601"; -static constexpr const char* Gfx602 = "amdgcn-amd-amdhsa--gfx602"; -static constexpr const char* Gfx700 = "amdgcn-amd-amdhsa--gfx700"; -static constexpr const char* Gfx701 = "amdgcn-amd-amdhsa--gfx701"; -static constexpr const char* Gfx702 = "amdgcn-amd-amdhsa--gfx702"; -static constexpr const char* Gfx704 = "amdgcn-amd-amdhsa--gfx704"; -static constexpr const char* Gfx705 = "amdgcn-amd-amdhsa--gfx705"; -static constexpr const char* Gfx801 = "amdgcn-amd-amdhsa--gfx801:xnack+"; -static constexpr const char* Gfx802 = "amdgcn-amd-amdhsa--gfx802"; -static constexpr const char* Gfx803 = "amdgcn-amd-amdhsa--gfx803"; -static constexpr const char* Gfx810 = "amdgcn-amd-amdhsa--gfx810:xnack+"; -static constexpr const char* Gfx900 = "amdgcn-amd-amdhsa--gfx900:xnack-"; -static constexpr const char* Gfx902 = "amdgcn-amd-amdhsa--gfx902:xnack+"; -static constexpr const char* Gfx904 = "amdgcn-amd-amdhsa--gfx904:xnack-"; -static constexpr const char* Gfx906 = "amdgcn-amd-amdhsa--gfx906:xnack-"; -static constexpr const char* Gfx909 = "amdgcn-amd-amdhsa--gfx909:xnack+"; -static constexpr const char* Gfx90c = "amdgcn-amd-amdhsa--gfx90c:xnack+"; - // Supported OpenCL versions enum OclVersion { OpenCL10, OpenCL11, OpenCL12, OpenCL20, OpenCL21 }; diff --git a/projects/clr/rocclr/device/gpu/gpudevice.cpp b/projects/clr/rocclr/device/gpu/gpudevice.cpp index 2aed1afc2e..c9946d094e 100644 --- a/projects/clr/rocclr/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/device/gpu/gpudevice.cpp @@ -58,6 +58,93 @@ #include #include +namespace { + +//! Define the mapping from CAL asic enumeration values to the +//! compiler gfx major/minor/stepping version. +struct CalDevice { + uint32_t gfxipMajor_; //!< The core engine GFXIP Major version + uint32_t gfxipMinor_; //!< The core engine GFXIP Minor version + uint32_t gfxipStepping_; //!< The core engine GFXIP Stepping version + CALMachineType calMachine_; //!< CAL machine type + const char* calName_; //!< CAL device name + CALtarget calTarget_; //!< CAL target + bool preferPal_; //!< Prefer to use PAL if GPU_ENABLE_PAL=2 + bool nullUseDouble_; //!< Use double precision for a NullDevice + bool nullUseOpenCL200_; //!< Use OpenCL 2.0 for a NullDevice +}; + +static constexpr CalDevice supportedCalDevices[] = { +// Prefer - NullDevice - +// GFX Version GSL Machine CAL Name CAL Target PAL double OCL200 + {6, 0, 0, ED_ATI_CAL_MACHINE_TAHITI_ISA, "Tahiti", CAL_TARGET_TAHITI, false, true, false}, + {6, 0, 1, ED_ATI_CAL_MACHINE_PITCAIRN_ISA, "Pitcairn", CAL_TARGET_PITCAIRN, false, true, false}, + {6, 0, 1, ED_ATI_CAL_MACHINE_CAPEVERDE_ISA, "Capeverde", CAL_TARGET_CAPEVERDE, false, true, false}, + {6, 0, 2, ED_ATI_CAL_MACHINE_OLAND_ISA, "Oland", CAL_TARGET_OLAND, false, true, false}, + {6, 0, 2, ED_ATI_CAL_MACHINE_HAINAN_ISA, "Hainan", CAL_TARGET_HAINAN, false, true, false}, + {7, 0, 0, ED_ATI_CAL_MACHINE_KALINDI_ISA, "Kalindi", CAL_TARGET_KALINDI, false, true, true }, + {7, 0, 0, ED_ATI_CAL_MACHINE_SPECTRE_ISA, "Spectre", CAL_TARGET_SPECTRE, false, true, true }, + {7, 0, 0, ED_ATI_CAL_MACHINE_SPOOKY_ISA, "Spooky", CAL_TARGET_SPOOKY, false, true, true }, + {7, 0, 2, ED_ATI_CAL_MACHINE_HAWAII_ISA, "Hawaii", CAL_TARGET_HAWAII, false, true, true }, // Also Hawaiipro (generated code is for Hawaiipro) + {7, 0, 4, ED_ATI_CAL_MACHINE_BONAIRE_ISA, "Bonaire", CAL_TARGET_BONAIRE, false, true, true }, + {7, 0, 5, ED_ATI_CAL_MACHINE_GODAVARI_ISA, "Mullins", CAL_TARGET_GODAVARI, false, true, true }, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins. + {8, 0, 1, ED_ATI_CAL_MACHINE_CARRIZO_ISA, "Carrizo", CAL_TARGET_CARRIZO, false, true, true }, // Also Bristol Ridge + {8, 0, 2, ED_ATI_CAL_MACHINE_ICELAND_ISA, "Iceland", CAL_TARGET_ICELAND, false, true, true }, + {8, 0, 2, ED_ATI_CAL_MACHINE_TONGA_ISA, "Tonga", CAL_TARGET_TONGA, false, true, true }, // Also Tongapro (generated code is for Tonga) + {8, 0, 3, ED_ATI_CAL_MACHINE_FIJI_ISA, "Fiji", CAL_TARGET_FIJI, false, true, true }, + {8, 0, 3, ED_ATI_CAL_MACHINE_ELLESMERE_ISA, "Ellesmere", CAL_TARGET_ELLESMERE, false, true, true }, // Polaris10 + {8, 0, 3, ED_ATI_CAL_MACHINE_BAFFIN_ISA, "Baffin", CAL_TARGET_BAFFIN, false, true, true }, // Polaris11 + {8, 0, 3, ED_ATI_CAL_MACHINE_LEXA_ISA, "gfx803", CAL_TARGET_LEXA, false, true, true }, // Polaris12 +#if !defined(BRAHMA) + {8, 0, 3, ED_ATI_CAL_MACHINE_POLARIS22_ISA, "gfx803", CAL_TARGET_POLARIS22, false, true, true }, +#endif + {8, 1, 0, ED_ATI_CAL_MACHINE_STONEY_ISA, "Stoney", CAL_TARGET_STONEY, false, true, true }, +#if !defined(BRAHMA) + {9, 0, 0, ED_ATI_CAL_MACHINE_GREENLAND_ISA, "gfx900", CAL_TARGET_GREENLAND, true, true, true }, // Vega10 + {9, 0, 2, ED_ATI_CAL_MACHINE_RAVEN_ISA, "gfx902", CAL_TARGET_RAVEN, true, true, true }, + {9, 0, 4, ED_ATI_CAL_MACHINE_VEGA12_ISA, "gfx904", CAL_TARGET_VEGA12, true, true, true }, + {9, 0, 6, ED_ATI_CAL_MACHINE_VEGA20_ISA, "gfx906", CAL_TARGET_VEGA20, true, true, true }, + {9, 0, 9, ED_ATI_CAL_MACHINE_RAVEN2_ISA, "gfx909", CAL_TARGET_RAVEN2, true, true, true }, + {9, 0, 12, ED_ATI_CAL_MACHINE_RENOIR_ISA, "gfx90c", CAL_TARGET_RENOIR, true, true, true }, +#endif +}; +static_assert(CAL_TARGET_LAST == CAL_TARGET_VEGA20, "Add new CAL targets to mapping"); + +static std::tuple findIsa( + CALtarget calTarget, bool sramecc, bool xnack) { + auto calDeviceIter = + std::find_if(std::begin(supportedCalDevices), std::end(supportedCalDevices), + [&](const CalDevice& calDevice) { return calDevice.calTarget_ == calTarget; }); + if (calDeviceIter == std::end(supportedCalDevices)) { + return std::make_tuple(nullptr, static_cast(0), nullptr, false, false, false); + } + const amd::Isa* isa = amd::Isa::findIsa( + calDeviceIter->gfxipMajor_, calDeviceIter->gfxipMinor_, calDeviceIter->gfxipStepping_, + sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled, + xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled); + return std::make_tuple(isa, calDeviceIter->calMachine_, calDeviceIter->calName_, + calDeviceIter->preferPal_, calDeviceIter->nullUseDouble_, + calDeviceIter->nullUseOpenCL200_); +} + +static std::tuple findCal( + uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxipStepping) { + auto calDeviceIter = std::find_if(std::begin(supportedCalDevices), std::end(supportedCalDevices), + [&](const CalDevice& calDevice) { + return calDevice.gfxipMajor_ == gfxipMajor && + calDevice.gfxipMinor_ == gfxipMinor && + calDevice.gfxipStepping_ == gfxipStepping; + }); + if (calDeviceIter == std::end(supportedCalDevices)) { + return std::make_tuple(false, static_cast(0), static_cast(0), + nullptr, false, false, false); + } + return std::make_tuple(true, calDeviceIter->calMachine_, calDeviceIter->calTarget_, + calDeviceIter->calName_, calDeviceIter->preferPal_, + calDeviceIter->nullUseDouble_, calDeviceIter->nullUseOpenCL200_); +} + +} // namespace bool DeviceLoad() { bool ret = false; @@ -79,126 +166,102 @@ aclCompiler* NullDevice::hsaCompiler_; AppProfile Device::appProfile_; NullDevice::NullDevice() - : amd::Device(), calTarget_(static_cast(0)), hwInfo_(NULL) {} + : amd::Device(), + calTarget_(static_cast(0)), + calMachine_(static_cast(0)), + calName_(nullptr) {} bool NullDevice::init() { - std::vector devices; - - devices = getDevices(CL_DEVICE_TYPE_GPU, false); - - // Loop through all supported devices and create each of them - for (uint id = CAL_TARGET_TAHITI; id <= CAL_TARGET_LAST; ++id) { - bool foundActive = false; - bool foundDuplicate = false; - - if (gpu::DeviceInfo[id].targetName_[0] == '\0') { + // Create offline devices for all ISAs not already associated with an online + // device. This allows code objects to be compiled for all supported ISAs. + std::vector devices = getDevices(CL_DEVICE_TYPE_GPU, false); + for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) { + if (!isa->runtimeGslSupported()) { continue; } - - // Loop through all active devices and see if we match one - for (uint i = 0; i < devices.size(); ++i) { - if (static_cast(devices[i])->calTarget() == static_cast(id)) { - foundActive = true; + bool isOnline = false; + // Check if the particular device is online + for (size_t i = 0; i < devices.size(); i++) { + if (&(devices[i]->isa()) == isa) { + isOnline = true; break; } } - - // Don't report an offline device if it's active - if (foundActive) { + if (isOnline) { continue; } - // Loop through all previous devices in the DeviceInfo list and compare them with the - // current entry to see if the current entry was listed previously in the DeviceInfo, - // if so, then it means the current entry already has been added in the offline device list - for (uint j = 0; j < id; ++j) { - if (gpu::DeviceInfo[j].targetName_[0] == '\0') { - continue; - } - if (strcmp(gpu::DeviceInfo[j].targetName_, gpu::DeviceInfo[id].targetName_) == 0) { - foundDuplicate = true; - break; - } + bool found; + CALMachineType calMachine; + CALtarget calTarget; + const char* calName; + bool preferPal; + bool nullUseDouble; + bool nullUseOpenCL200; + std::tie(found, calMachine, calTarget, calName, preferPal, nullUseDouble, nullUseOpenCL200) = + findCal(isa->versionMajor(), isa->versionMinor(), isa->versionStepping()); + if (!found) { + // GSL does not support this asic. + continue; } - // Don't report an offline device twice - if (foundDuplicate) { - continue; + std::unique_ptr nullDevice(new NullDevice()); + if (!nullDevice) { + LogPrintfError("Error allocating new instance of offline CAL Device %s", isa->targetId()); + return false; } - - NullDevice* dev = new NullDevice(); - if (NULL != dev) { - if (!dev->create(static_cast(id))) { - delete dev; - } else { - dev->registerDevice(); - } + if (!nullDevice->create(calName, *isa, calTarget, preferPal, nullUseDouble, nullUseOpenCL200)) { + // Skip over unsupported devices + LogPrintfError("Skipping creating new instance of offline CAL Device %s", isa->targetId()); + continue; } + nullDevice.release()->registerDevice(); } - return true; } -bool NullDevice::create(CALtarget target) { - CALdeviceattribs calAttr = {0}; - gslMemInfo memInfo = {0}; - - online_ = false; - - calTarget_ = calAttr.target = target; - hwInfo_ = &DeviceInfo[calTarget_]; - - assert((target >= CAL_TARGET_TAHITI) && (target != CAL_TARGET_SCRAPPER) && - (target != CAL_TARGET_DEVASTATOR)); - - if ((GPU_ENABLE_PAL == 2) && usePal()) { +bool NullDevice::create(const char* calName, const amd::Isa& isa, CALtarget target, + bool preferPal, bool doublePrecision, bool openCL200) { + if (!isa.runtimeGslSupported()) { + LogPrintfError("Offline CAL device %s is not supported", isa.targetId()); + return false; + } + if ((GPU_ENABLE_PAL == 2) && isa.runtimePalSupported() && preferPal) { + LogPrintfError("Skipping as GPU_ENABLE_PAL=2 indicating to use PAL for offline CAL device %s", + isa.targetId()); return false; } + online_ = false; + calTarget_ = target; + calName_ = calName; + + // sets up vaCacheAccess_ and vaCacheMap_. + if (!amd::Device::create(isa)) { + LogPrintfError("Unable to setup offline device for CAL device %s", isa.targetId()); + return false; + } + + CALdeviceattribs calAttr = {0}; + calAttr.target = calTarget(); // Force double if it could be supported - switch (target) { - case CAL_TARGET_PITCAIRN: - case CAL_TARGET_CAPEVERDE: - case CAL_TARGET_TAHITI: - case CAL_TARGET_OLAND: - case CAL_TARGET_HAINAN: - calAttr.doublePrecision = CAL_TRUE; - break; - case CAL_TARGET_BONAIRE: - case CAL_TARGET_SPECTRE: - case CAL_TARGET_SPOOKY: - case CAL_TARGET_KALINDI: - case CAL_TARGET_HAWAII: - case CAL_TARGET_ICELAND: - case CAL_TARGET_TONGA: - case CAL_TARGET_FIJI: - case CAL_TARGET_GODAVARI: - case CAL_TARGET_CARRIZO: - case CAL_TARGET_ELLESMERE: - case CAL_TARGET_BAFFIN: - case CAL_TARGET_GREENLAND: - case CAL_TARGET_STONEY: - case CAL_TARGET_LEXA: - case CAL_TARGET_RAVEN: - case CAL_TARGET_RAVEN2: - case CAL_TARGET_RENOIR: - case CAL_TARGET_POLARIS22: - case CAL_TARGET_VEGA12: - case CAL_TARGET_VEGA20: - calAttr.doublePrecision = CAL_TRUE; - calAttr.isOpenCL200Device = CAL_TRUE; - break; - default: - break; + if (doublePrecision) { + calAttr.doublePrecision = CAL_TRUE; + } + // Use OpenCL 2.0 if supported + if (openCL200) { + calAttr.isOpenCL200Device = CAL_TRUE; } settings_ = new gpu::Settings(); gpu::Settings* gpuSettings = reinterpret_cast(settings_); // Create setting for the offline target if ((gpuSettings == NULL) || !gpuSettings->create(calAttr)) { + LogPrintfError("GPU settings failed for offline device for CAL device %s", isa.targetId()); return false; } + gslMemInfo memInfo = {0}; // Report 512MB for all offline devices memInfo.cardMemAvailableBytes = 512 * Mi; memInfo.cardLargestFreeBlockBytes = 512 * Mi; @@ -243,7 +306,7 @@ bool NullDevice::create(CALtarget target) { acl_error error; hsaCompiler_ = aclCompilerInit(&opts, &error); if (error != ACL_SUCCESS) { - LogError("Error initializing the compiler"); + LogPrintfError("Error initializing the compiler for offline CAL device %s", isa.targetId()); return false; } } @@ -494,14 +557,11 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.platform_ = AMD_PLATFORM; - if ((calTarget() == CAL_TARGET_CARRIZO) && ASICREV_IS_CARRIZO_BRISTOL(calAttr.asicRevision)) { - const static char* bristol = "Bristol Ridge"; - ::strncpy(info_.name_, bristol, sizeof(info_.name_) - 1); - } else { - ::strncpy(info_.name_, hwInfo()->targetName_, sizeof(info_.name_) - 1); - } + ::strncpy(info_.name_, calName_, sizeof(info_.name_) - 1); + ::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1); ::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1); - ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING); + ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (GSL)%s", + isOnline() ? "" : " [Offline]"); info_.profile_ = "FULL_PROFILE"; if (settings().oclVersion_ >= OpenCL20) { @@ -584,19 +644,19 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.deviceTopology_.pcie.device = (calAttr.pciTopologyInformation & (0x1F << 3)) >> 3; info_.deviceTopology_.pcie.function = (calAttr.pciTopologyInformation & 0x07); - info_.simdPerCU_ = hwInfo()->simdPerCU_; + info_.simdPerCU_ = isa().simdPerCU(); info_.cuPerShaderArray_ = calAttr.numberOfCUsperShaderArray; - info_.simdWidth_ = hwInfo()->simdWidth_; - info_.simdInstructionWidth_ = hwInfo()->simdInstructionWidth_; + info_.simdWidth_ = isa().simdWidth(); + info_.simdInstructionWidth_ = isa().simdInstructionWidth(); info_.wavefrontWidth_ = calAttr.wavefrontSize; info_.globalMemChannelBanks_ = calAttr.numMemBanks; - info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_; - info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_; - info_.localMemBanks_ = hwInfo()->localMemBanks_; - info_.gfxipMajor_ = hwInfo()->gfxipMajor_; - info_.gfxipMinor_ = hwInfo()->gfxipMinor_; - info_.gfxipStepping_ = hwInfo()->gfxipStepping_; + info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth(); + info_.localMemSizePerCU_ = isa().localMemSizePerCU(); + info_.localMemBanks_ = isa().localMemBanks(); + info_.gfxipMajor_ = isa().versionMajor(); + info_.gfxipMinor_ = isa().versionMinor(); + info_.gfxipStepping_ = isa().versionStepping(); info_.numAsyncQueues_ = numComputeRings; @@ -607,7 +667,7 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.pcieDeviceId_ = calAttr.pcieDeviceID; info_.pcieRevisionId_ = calAttr.pcieRevisionID; - info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * hwInfo()->simdPerCU_ * 10; + info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * isa().simdPerCU() * 10; } } @@ -849,10 +909,6 @@ Device::~Device() { extern const char* SchedulerSourceCode; bool Device::create(CALuint ordinal, CALuint numOfDevices) { - if (!amd::Device::create()) { - return false; - } - appProfile_.init(); bool smallMemSystem = false; @@ -882,19 +938,40 @@ bool Device::create(CALuint ordinal, CALuint numOfDevices) { // Update CAL target calTarget_ = getAttribs().target; - hwInfo_ = &DeviceInfo[calTarget_]; - if ((GPU_ENABLE_PAL == 2) && usePal()) { + // XNACK should be set for PageMigration or IOMMUv2 support. + bool isXNACKSupported = false; + + // SRAMECC should be set for ecc protected GPRs. + bool isSRAMECCSupported = false; + + const amd::Isa* isa; + bool preferPal; + std::tie(isa, calMachine_, calName_, preferPal, std::ignore, std::ignore) = + findIsa(calTarget(), isSRAMECCSupported, isXNACKSupported); + + if ((calTarget() == CAL_TARGET_CARRIZO) && ASICREV_IS_CARRIZO_BRISTOL(getAttribs().asicRevision)) { + calName_ = "Bristol Ridge"; + } + + if (!isa) { + LogPrintfError("Unsupported CAL device #%d", calTarget()); + return false; + } + if (!isa->runtimeGslSupported()) { + LogPrintfError("Unsupported CAL device with ISA %s", isa->targetId()); + return false; + } + if ((GPU_ENABLE_PAL == 2) && isa->runtimePalSupported() && preferPal) { + LogPrintfError("Skipping as GPU_ENABLE_PAL=2 indicating to use PAL for CAL device %s", + isa->targetId()); return false; } -#if defined(BRAHMA) - if (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN || - calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_POLARIS22 || - calTarget_ == CAL_TARGET_RENOIR) { + if (!amd::Device::create(*isa)) { + LogPrintfError("Unable to setup device for CAL device %s", isa->targetId()); return false; } -#endif // Creates device settings settings_ = new gpu::Settings(); diff --git a/projects/clr/rocclr/device/gpu/gpudevice.hpp b/projects/clr/rocclr/device/gpu/gpudevice.hpp index 5b2b11373a..91543293aa 100644 --- a/projects/clr/rocclr/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/device/gpu/gpudevice.hpp @@ -67,7 +67,12 @@ class NullDevice : public amd::Device { NullDevice(); //! Creates an offline device with the specified target - bool create(CALtarget target //!< GPU device identifier + bool create(const char* calName, //!< GPU device name + const amd::Isa& isa, //!< GPU device isa + CALtarget target, //!< GPU device identifier + bool preferPal, //!< GPU prefer to use PAL if GPU_ENABLE_PAL=2 + bool doublePrecision, //!< Use double precision + bool openCL200 //!< Use OpenCL 2.0 ); //! Instantiate a new virtual device @@ -115,7 +120,7 @@ class NullDevice : public amd::Device { CALtarget calTarget() const { return calTarget_; } - const AMDDeviceInfo* hwInfo() const { return hwInfo_; } + CALMachineType calMachine() const { return calMachine_; } //! Empty implementation on Null device virtual bool globalFreeMemory(size_t* freeMemory) const { return false; } @@ -131,12 +136,6 @@ class NullDevice : public amd::Device { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } protected: - bool usePal() const { - return (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN || - calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_RENOIR || - calTarget_ >= CAL_TARGET_VEGA12); - } - //! Answer the question: "Should HSAIL Program be created?", //! based on the given options. bool isHsailProgram(amd::option::Options* options = NULL); @@ -150,7 +149,8 @@ class NullDevice : public amd::Device { ); CALtarget calTarget_; //!< GPU device identifier - const AMDDeviceInfo* hwInfo_; //!< Device HW info structure + CALMachineType calMachine_; //!< GPU machine identifier + const char* calName_; //!< GPU device name }; //! Forward declarations diff --git a/projects/clr/rocclr/device/gpu/gpukernel.cpp b/projects/clr/rocclr/device/gpu/gpukernel.cpp index 6059cb3372..b6160a2010 100644 --- a/projects/clr/rocclr/device/gpu/gpukernel.cpp +++ b/projects/clr/rocclr/device/gpu/gpukernel.cpp @@ -643,11 +643,8 @@ bool NullKernel::create(const std::string& code, const std::string& metadata, if ((binaryCode == NULL) && (binarySize == 0) && !code.empty()) { acl_error err; - std::string arch = "amdil"; - if (nullDev().settings().use64BitPtr_) { - arch += "64"; - } - aclTargetInfo info = aclGetTargetInfo(arch.c_str(), nullDev().hwInfo()->targetName_, &err); + aclTargetInfo info = aclGetTargetInfo(nullDev().settings().use64BitPtr_ ? "amdil64" : "amdil", + nullDev().isa().amdIlName(), &err); if (err != ACL_SUCCESS) { LogWarning("aclGetTargetInfo failed"); return false; @@ -1007,7 +1004,7 @@ bool NullKernel::createMultiBinary(uint* imageSize, void** image, const void* is constBuffers[constBufferCount++].index = nullProg().glbCb()[i]; } - encoding.machine = nullDev().hwInfo()->machine_; + encoding.machine = nullDev().calMachine(); encoding.type = ED_ATI_CAL_TYPE_COMPUTE; encoding.inputCount = inputResourceCount; encoding.outputCount = outputCount; diff --git a/projects/clr/rocclr/device/gpu/gpuprogram.cpp b/projects/clr/rocclr/device/gpu/gpuprogram.cpp index d57f07624d..baeb800263 100644 --- a/projects/clr/rocclr/device/gpu/gpuprogram.cpp +++ b/projects/clr/rocclr/device/gpu/gpuprogram.cpp @@ -41,14 +41,10 @@ namespace gpu { -const aclTargetInfo& NullProgram::info(const char* str) { +const aclTargetInfo& NullProgram::info() { acl_error err; - std::string arch = "amdil"; - if (dev().settings().use64BitPtr_) { - arch += "64"; - } - info_ = aclGetTargetInfo(arch.c_str(), - (str && str[0] == '\0' ? dev().hwInfo()->targetName_ : str), &err); + info_ = aclGetTargetInfo(gpuNullDevice().settings().use64BitPtr_ ? "amdil64" : "amdil", + device().isa().amdIlName(), &err); if (err != ACL_SUCCESS) { LogWarning("aclGetTargetInfo failed"); } @@ -1507,7 +1503,6 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) executable_(NULL), loaderContext_(this) { assert(device.isOnline()); - machineTarget_ = gpuNullDevice().hwInfo()->targetName_; loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } @@ -1520,7 +1515,6 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) loaderContext_(this) { assert(!device.isOnline()); isNull_ = true; - machineTarget_ = gpuNullDevice().hwInfo()->targetName_; // Cannot load onto a NullDevice. loader_ = nullptr; @@ -1769,14 +1763,10 @@ void HSAILProgram::fillResListWithKernels(std::vector& memList) c } } -const aclTargetInfo& HSAILProgram::info(const char* str) { +const aclTargetInfo& HSAILProgram::info() { acl_error err; - std::string arch = "hsail"; - if (dev().settings().use64BitPtr_) { - arch = "hsail64"; - } - info_ = aclGetTargetInfo(arch.c_str(), - (str && str[0] == '\0' ? gpuNullDevice().hwInfo()->targetName_ : str), &err); + info_ = aclGetTargetInfo(gpuNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail", + device().isa().hsailName(), &err); if (err != ACL_SUCCESS) { LogWarning("aclGetTargetInfo failed"); } @@ -1802,107 +1792,23 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) { } hsa_isa_t ORCAHSALoaderContext::IsaFromName(const char* name) { - hsa_isa_t isa = {0}; - if (!strcmp(Gfx600, name)) { - isa.handle = gfx600; - return isa; - } - if (!strcmp(Gfx601, name)) { - isa.handle = gfx601; - return isa; - } - if (!strcmp(Gfx602, name)) { - isa.handle = gfx602; - return isa; - } - if (!strcmp(Gfx700, name)) { - isa.handle = gfx700; - return isa; - } - if (!strcmp(Gfx701, name)) { - isa.handle = gfx701; - return isa; - } - if (!strcmp(Gfx702, name)) { - isa.handle = gfx702; - return isa; - } - if (!strcmp(Gfx705, name)) { - isa.handle = gfx702; - return isa; - } - if (!strcmp(Gfx801, name)) { - isa.handle = gfx801; - return isa; - } - if (!strcmp(Gfx802, name)) { - isa.handle = gfx802; - return isa; - } - if (!strcmp(Gfx803, name)) { - isa.handle = gfx803; - return isa; - } - if (!strcmp(Gfx810, name)) { - isa.handle = gfx810; - return isa; - } - if (!strcmp(Gfx900, name)) { - isa.handle = gfx900; - return isa; - } - if (!strcmp(Gfx902, name)) { - isa.handle = gfx902; - return isa; - } - if (!strcmp(Gfx904, name)) { - isa.handle = gfx904; - return isa; - } - if (!strcmp(Gfx906, name)) { - isa.handle = gfx906; - return isa; - } - if (!strcmp(Gfx909, name)) { - isa.handle = gfx909; - return isa; - } - if (!strcmp(Gfx90c, name)) { - isa.handle = gfx90c; - return isa; - } - - return isa; + const amd::Isa* isa_p = amd::Isa::findIsa(name); + return {amd::Isa::toHandle(isa_p)}; } bool ORCAHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) { - uint dev_gfxip = program_->gpuNullDevice().hwInfo()->gfxipVersion_; - uint isa_gfxip = isa.handle; - switch (dev_gfxip) { - case gfx700: - case gfx704: - case gfx801: - case gfx802: - case gfx803: - case gfx810: - case gfx900: - case gfx902: - case gfx904: - case gfx906: - case gfx909: - case gfx90c: - return isa_gfxip == dev_gfxip; - case gfx701: - case gfx702: - // gfx701 only differs from gfx702 by faster fp operations and can be loaded on either device. - return isa_gfxip == gfx701|| isa_gfxip == gfx702; - case gfx600: - case gfx601: - case gfx602: - default: - LogPrintfError("Unsupported gfxip version gfx%d", dev_gfxip); + // The HSA loader uses a handle value of 0 to indicate the ISA is invalid. + const amd::Isa* code_object_isa_p = amd::Isa::fromHandle(isa.handle); + if (!code_object_isa_p || !code_object_isa_p->runtimeGslSupported()) { + // The ISA is either not supported because ORCAHSALoaderContext::IsaFromName + // could not find it, or the PAL runtime does not support it. return false; } + if (program_->isNull()) { + // Cannot load code onto offline devices. + return false; + } + return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa()); } void* ORCAHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, diff --git a/projects/clr/rocclr/device/gpu/gpuprogram.hpp b/projects/clr/rocclr/device/gpu/gpuprogram.hpp index ad25f3e3b0..f78eec7e89 100644 --- a/projects/clr/rocclr/device/gpu/gpuprogram.hpp +++ b/projects/clr/rocclr/device/gpu/gpuprogram.hpp @@ -272,7 +272,7 @@ class NullProgram : public device::Program { std::vector printf_; //!< Format strings for GPU printf support std::vector glbCb_; //!< Global constant buffers - virtual const aclTargetInfo& info(const char* str = ""); + virtual const aclTargetInfo& info(); virtual bool saveBinaryAndSetType(type_t type) { return true; } @@ -503,7 +503,7 @@ class HSAILProgram : public device::Program { virtual bool createBinary(amd::option::Options* options); - virtual const aclTargetInfo& info(const char* str = ""); + virtual const aclTargetInfo& info(); private: //! Disable default copy constructor diff --git a/projects/clr/rocclr/device/gpu/gpuscsi.cpp b/projects/clr/rocclr/device/gpu/gpuscsi.cpp index 6333c23ded..8eb502e786 100644 --- a/projects/clr/rocclr/device/gpu/gpuscsi.cpp +++ b/projects/clr/rocclr/device/gpu/gpuscsi.cpp @@ -122,7 +122,7 @@ bool NullKernel::siCreateHwInfo(const void* shader, AMUabiAddEncoding& encoding) i++; newInfos[i].address = AMU_ABI_WAVEFRONT_SIZE; - newInfos[i].value = nullDev().hwInfo()->simdWidth_ * 4; // options.WavefrontSize; + newInfos[i].value = nullDev().isa().simdWidth() * 4; // options.WavefrontSize; i++; newInfos[i].address = AMU_ABI_LDS_SIZE_AVAIL; diff --git a/projects/clr/rocclr/device/pal/paldefs.hpp b/projects/clr/rocclr/device/pal/paldefs.hpp index 65c2e34ca4..98c83c71ce 100644 --- a/projects/clr/rocclr/device/pal/paldefs.hpp +++ b/projects/clr/rocclr/device/pal/paldefs.hpp @@ -132,76 +132,6 @@ static constexpr uint HsaSamplerObjectAlignment = 16; //! HSA path specific defines for images static constexpr uint DeviceQueueMaskSize = 32; -struct AMDDeviceInfo { - const char* machineTarget_; //!< Machine target - const char* machineTargetLC_; //!< Machine target for LC - uint simdWidth_; //!< Number of workitems processed per SIMD - uint memChannelBankWidth_; //!< Memory channel bank width - uint localMemBanks_; //!< Number of banks of local memory - uint gfxipVersionLC_; //!< The core engine GFXIP version for LC - uint gfxipVersion_; //!< The core engine GFXIP version - bool xnackEnabled_; //!< Enable XNACK feature - Pal::AsicRevision asicRevision_; //!< PAL AsicRevision - bool xnackSupported_; //!< XNACK is supported - bool srameccSumpported_; //!< SRAMECC is supported -}; - -static constexpr AMDDeviceInfo UnknownDevice = {"", "", 16, 256, 32, 0, 0, false}; - -static constexpr AMDDeviceInfo DeviceInfo[] = { - /* Unknown */ UnknownDevice, - /* Tahiti */ {"", "", 16, 256, 32, 600, 600, false}, - /* Pitcairn */ {"", "", 16, 256, 32, 600, 600, false}, - /* Capeverde */ {"", "", 16, 256, 32, 700, 700, false}, - /* Oland */ {"", "", 16, 256, 32, 600, 600, false}, - /* Hainan */ {"", "", 16, 256, 32, 600, 600, false}, - - /* Bonaire */ {"Bonaire", "", 16, 256, 32, 700, 700, false}, - /* Hawaii */ {"Hawaii", "", 16, 256, 32, 701, 701, false}, - /* Hawaii */ {"", "", 16, 256, 32, 701, 701, false}, - /* Hawaii */ {"", "", 16, 256, 32, 701, 701, false}, - - /* Kalindi */ {"Kalindi", "", 16, 256, 32, 702, 702, false}, - /* Godavari */ {"Mullins", "", 16, 256, 32, 702, 702, false}, - /* Spectre */ {"Spectre", "", 16, 256, 32, 701, 701, false}, - /* Spooky */ {"Spooky", "", 16, 256, 32, 701, 701, false}, - - /* Carrizo */ {"Carrizo", "", 16, 256, 32, 801, 801, false}, - /* Bristol */ {"Bristol Ridge", "", 16, 256, 32, 801, 801, false}, - /* Stoney */ {"Stoney", "", 16, 256, 32, 810, 810, false}, - - /* Iceland */ {"Iceland", "gfx802", 16, 256, 32, 802, 800, false}, - /* Tonga */ {"Tonga", "gfx802", 16, 256, 32, 802, 800, false}, - /* Fiji */ {"Fiji", "gfx803", 16, 256, 32, 803, 804, false}, - /* Ellesmere */ {"Ellesmere", "gfx803", 16, 256, 32, 803, 804, false}, - /* Baffin */ {"Baffin", "gfx803", 16, 256, 32, 803, 804, false}, - /* Lexa */ {"gfx804", "gfx803", 16, 256, 32, 803, 804, false}, -}; - -static constexpr AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = { - /* Vega10 */ {"gfx900", "gfx900", 16, 256, 32, 900, 900, false, Pal::AsicRevision::Vega10, true, false}, - /* Vega10 XNACK */ {"gfx901", "gfx900", 16, 256, 32, 900, 901, true, Pal::AsicRevision::Vega10, true, false}, - /* Vega12 */ {"gfx904", "gfx904", 16, 256, 32, 904, 904, false, Pal::AsicRevision::Vega12, true, false}, - /* Vega12 XNACK */ {"gfx905", "gfx904", 16, 256, 32, 904, 905, true, Pal::AsicRevision::Vega12, true, false}, - /* Vega20 */ {"gfx906", "gfx906", 16, 256, 32, 906, 906, false, Pal::AsicRevision::Vega20, true, true}, - /* Vega20 XNACK */ {"gfx907", "gfx906", 16, 256, 32, 906, 907, true, Pal::AsicRevision::Vega20, true, true}, - /* Raven */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Raven, true, false}, - /* Raven XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Raven, true, false}, - /* Raven2 */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Raven2, true, false}, - /* Raven2 XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Raven2, true, false}, - /* Renoir */ {"gfx902", "gfx902", 16, 256, 32, 902, 902, false, Pal::AsicRevision::Renoir, true, false}, - /* Renoir XNACK */ {"gfx903", "gfx902", 16, 256, 32, 902, 903, true, Pal::AsicRevision::Renoir, true, false}, - /* Navi10 */ {"gfx1010", "gfx1010", 32, 256, 32, 1010, 1010, false, Pal::AsicRevision::Navi10, true, false}, - /* Navi10 XNACK */ {"gfx1010", "gfx1010", 32, 256, 32, 1010, 1010, true, Pal::AsicRevision::Navi10, true, false}, - /* Navi12 */ {"gfx1011", "gfx1011", 32, 256, 32, 1011, 1011, false, Pal::AsicRevision::Navi12, true, false}, - /* Navi12 XNACK */ {"gfx1011", "gfx1011", 32, 256, 32, 1011, 1011, true, Pal::AsicRevision::Navi12, true, false}, - /* Navi14 */ {"gfx1012", "gfx1012", 32, 256, 32, 1012, 1012, false, Pal::AsicRevision::Navi14, true, false}, - /* Navi14 XNACK */ {"gfx1012", "gfx1012", 32, 256, 32, 1012, 1012, true, Pal::AsicRevision::Navi14, true, false}, - /* Navi21 */ {"gfx1030", "gfx1030", 32, 256, 32, 1030, 1030, false, Pal::AsicRevision::Navi21, false, false}, - /* Navi22 */ {"gfx1031", "gfx1031", 32, 256, 32, 1031, 1031, false, Pal::AsicRevision::Navi22, false, false}, - /* Navi23 */ {"gfx1032", "gfx1032", 32, 256, 32, 1032, 1032, false, Pal::AsicRevision::Navi23, false, false}, -}; - // Supported OpenCL versions enum OclVersion { OpenCL10 = 0x10, diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 6399f64bb8..a22d83d0c0 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -48,11 +48,100 @@ #endif // _WIN32 #include +#include #include #include #include #include #include +#include + +namespace { + +//! Define the mapping from PAL asic revision enumeration values to the +//! compiler gfx major/minor/stepping version. +struct PalDevice { + uint32_t gfxipMajor_; //!< The core engine GFXIP Major version + uint32_t gfxipMinor_; //!< The core engine GFXIP Minor version + uint32_t gfxipStepping_; //!< The core engine GFXIP Stepping version + Pal::GfxIpLevel gfxIpLevel_; //!< PAL gfx IP level + const char* palName_; //!< PAL device name + Pal::AsicRevision asicRevision_; //!< PAL AsicRevision +}; + +static constexpr PalDevice supportedPalDevices[] = { +// GFX Version PAL GFX IP Level PAL Name PAL ASIC Revision + {6, 0, 0, Pal::GfxIpLevel::GfxIp6, "Tahiti", Pal::AsicRevision::Tahiti}, + {6, 0, 1, Pal::GfxIpLevel::GfxIp6, "Pitcairn", Pal::AsicRevision::Pitcairn}, + {6, 0, 1, Pal::GfxIpLevel::GfxIp6, "Capeverde", Pal::AsicRevision::Capeverde}, + {6, 0, 2, Pal::GfxIpLevel::GfxIp6, "Oland", Pal::AsicRevision::Oland}, + {6, 0, 2, Pal::GfxIpLevel::GfxIp6, "Hainan", Pal::AsicRevision::Hainan}, + {7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Kalindi", Pal::AsicRevision::Kalindi}, + {7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spectre", Pal::AsicRevision::Spectre}, + {7, 0, 0, Pal::GfxIpLevel::GfxIp7, "Spooky", Pal::AsicRevision::Spooky}, + {7, 0, 1, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::HawaiiPro}, + {7, 0, 2, Pal::GfxIpLevel::GfxIp7, "Hawaii", Pal::AsicRevision::Hawaii}, + {7, 0, 4, Pal::GfxIpLevel::GfxIp7, "Bonaire", Pal::AsicRevision::Bonaire}, + {7, 0, 5, Pal::GfxIpLevel::GfxIp7, "Mullins", Pal::AsicRevision::Godavari}, // FIXME: Why is this compiled as Mullins yet reported as Godavari? Add gfx703 to support Mullins. + {8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Carrizo", Pal::AsicRevision::Carrizo}, + {8, 0, 1, Pal::GfxIpLevel::GfxIp8, "Bristol Ridge", Pal::AsicRevision::Bristol}, + {8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Iceland", Pal::AsicRevision::Iceland}, + {8, 0, 2, Pal::GfxIpLevel::GfxIp8, "Tonga", Pal::AsicRevision::Tonga}, // Also Tongapro (generated code is for Tonga) + {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Fiji", Pal::AsicRevision::Fiji}, + {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Ellesmere", Pal::AsicRevision::Polaris10}, // Ellesmere + {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "Baffin", Pal::AsicRevision::Polaris11}, // Baffin + {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris12}, // Lexa + {8, 0, 3, Pal::GfxIpLevel::GfxIp8, "gfx803", Pal::AsicRevision::Polaris22}, + {8, 1, 0, Pal::GfxIpLevel::GfxIp8_1, "Stoney", Pal::AsicRevision::Stoney}, + {9, 0, 0, Pal::GfxIpLevel::GfxIp9, "gfx900", Pal::AsicRevision::Vega10}, + {9, 0, 2, Pal::GfxIpLevel::GfxIp9, "gfx902", Pal::AsicRevision::Raven}, + {9, 0, 4, Pal::GfxIpLevel::GfxIp9, "gfx904", Pal::AsicRevision::Vega12}, + {9, 0, 6, Pal::GfxIpLevel::GfxIp9, "gfx906", Pal::AsicRevision::Vega20}, + {9, 0, 9, Pal::GfxIpLevel::GfxIp9, "gfx909", Pal::AsicRevision::Raven2}, + {9, 0, 12, Pal::GfxIpLevel::GfxIp9, "gfx90c", Pal::AsicRevision::Renoir}, + {10, 1, 0, Pal::GfxIpLevel::GfxIp10_1, "gfx1010", Pal::AsicRevision::Navi10}, + {10, 1, 1, Pal::GfxIpLevel::GfxIp10_1, "gfx1011", Pal::AsicRevision::Navi12}, + {10, 1, 2, Pal::GfxIpLevel::GfxIp10_1, "gfx1012", Pal::AsicRevision::Navi14}, + {10, 3, 0, Pal::GfxIpLevel::GfxIp10_3, "gfx1030", Pal::AsicRevision::Navi21}, + {10, 3, 1, Pal::GfxIpLevel::GfxIp10_3, "gfx1031", Pal::AsicRevision::Navi22}, + {10, 3, 2, Pal::GfxIpLevel::GfxIp10_3, "gfx1032", Pal::AsicRevision::Navi23}, +#if PAL_BUILD_VAN_GOGH + {10, 3, 3, Pal::GfxIpLevel::GfxIp10_3, "", Pal::AsicRevision::VanGogh}, +#endif +}; + +static std::tuple findIsa(Pal::AsicRevision asicRevision, + bool sramecc, bool xnack) { + auto palDeviceIter = std::find_if( + std::begin(supportedPalDevices), std::end(supportedPalDevices), + [&](const PalDevice& palDevice) { return palDevice.asicRevision_ == asicRevision; }); + if (palDeviceIter == std::end(supportedPalDevices)) { + return std::make_tuple(nullptr, nullptr); + } + const amd::Isa* isa = amd::Isa::findIsa( + palDeviceIter->gfxipMajor_, palDeviceIter->gfxipMinor_, palDeviceIter->gfxipStepping_, + sramecc ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled, + xnack ? amd::Isa::Feature::Enabled : amd::Isa::Feature::Disabled); + return std::make_tuple(isa, palDeviceIter->palName_); +} + +static std::tuple findPal(uint32_t gfxipMajor, + uint32_t gfxipMinor, + uint32_t gfxipStepping) { + auto palDeviceIter = std::find_if(std::begin(supportedPalDevices), std::end(supportedPalDevices), + [&](const PalDevice& palDevice) { + return palDevice.gfxipMajor_ == gfxipMajor && + palDevice.gfxipMinor_ == gfxipMinor && + palDevice.gfxipStepping_ == gfxipStepping; + }); + if (palDeviceIter == std::end(supportedPalDevices)) { + return std::make_tuple(Pal::GfxIpLevel::None, Pal::AsicRevision::Unknown, nullptr); + } + return std::make_tuple(palDeviceIter->gfxIpLevel_, palDeviceIter->asicRevision_, + palDeviceIter->palName_); +} + +} // namespace bool PalDeviceLoad() { bool ret = false; @@ -76,185 +165,63 @@ Pal::IPlatform* Device::platform_; NullDevice::Compiler* NullDevice::compiler_; AppProfile Device::appProfile_; -NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), hwInfo_(nullptr) {} +NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), palName_(nullptr) {} bool NullDevice::init() { - std::vector devices; - std::string driverVersion; - devices = getDevices(CL_DEVICE_TYPE_GPU, false); - -// TODO: Currently PAL only supports for GFXIP9+. -// Comment out this section for SWDEV-146950 since Kalindi and Mullins -// does not works for LC offline compilation without knowing which GFXIP -// should be used for them. -#if defined(WITH_COMPILER_LIB) - - // Loop through all supported devices and create each of them - for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++id) { - bool foundActive = false; - Pal::AsicRevision revision = static_cast(id); - - if (pal::DeviceInfo[id].machineTarget_[0] == '\0') { + // Create offline devices for all ISAs not already associated with an online + // device. This allows code objects to be compiled for all supported ISAs. + std::vector devices = getDevices(CL_DEVICE_TYPE_GPU, false); + for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) { + if (!isa->runtimePalSupported()) { + continue; + } + bool isOnline = false; + // Check if the particular device is online + for (size_t i = 0; i < devices.size(); i++) { + if (&(devices[i]->isa()) == isa) { + isOnline = true; + break; + } + } + if (isOnline) { continue; } - // Loop through all active PAL devices and see if we match one - for (uint i = 0; i < devices.size(); ++i) { - driverVersion = static_cast(devices[i])->info().driverVersion_; - if (driverVersion.find("PAL") != std::string::npos) { - if (static_cast(devices[i])->asicRevision() == revision) { - foundActive = true; - break; - } - } - } - - // Don't report an offline device if it's active - if (foundActive) { + Pal::GfxIpLevel gfxIpLevel; + Pal::AsicRevision asicRevision; + const char* palName; + std::tie(gfxIpLevel, asicRevision, palName) = + findPal(isa->versionMajor(), isa->versionMinor(), isa->versionStepping()); + if (asicRevision == Pal::AsicRevision::Unknown) { + // PAL does not support this asic. continue; } - NullDevice* dev = new NullDevice(); - if (nullptr != dev) { - if (!dev->create(id, Pal::GfxIpLevel::_None)) { - delete dev; - } else { - dev->registerDevice(); - } + std::unique_ptr nullDevice(new NullDevice()); + if (!nullDevice) { + LogPrintfError("Error allocating new instance of offline PAL Device %s", isa->targetId()); + return false; } + if (!nullDevice->create(palName, *isa, gfxIpLevel, asicRevision)) { + // Skip over unsupported devices + LogPrintfError("Skipping creating new instance of offline PAL Device %s", isa->targetId()); + continue; + } + nullDevice.release()->registerDevice(); } -#endif // defined(WITH_COMPILER_LIB) - - // Loop through all supported devices and create each of them - for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) { - bool foundActive = false; - bool foundDuplicate = false; - uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_; - - if (pal::Gfx9PlusSubDeviceInfo[id].machineTarget_[0] == '\0') { - continue; - } - - // Loop through all active PAL devices and see if we match one - for (uint i = 0; i < devices.size(); ++i) { - driverVersion = static_cast(devices[i])->info().driverVersion_; - if (driverVersion.find("PAL") != std::string::npos) { - gfxipVersion = devices[i]->settings().useLightning_ - ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ - : pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_; - uint gfxIpCurrent = devices[i]->settings().useLightning_ - ? static_cast(devices[i])->hwInfo()->gfxipVersionLC_ - : static_cast(devices[i])->hwInfo()->gfxipVersion_; - if (gfxIpCurrent == gfxipVersion) { - foundActive = true; - break; - } - } - } - - // Don't report an offline device if it's active - if (foundActive) { - continue; - } - - // Loop through all previous devices in the Gfx9PlusSubDeviceInfo list - // and compare them with the current entry to see if the current entry - // was listed previously in the Gfx9PlusSubDeviceInfo, if so, then it - // means the current entry already has been added in the offline device list - for (uint j = 0; j < id; ++j) { - if (pal::Gfx9PlusSubDeviceInfo[j].machineTarget_[0] == '\0') { - continue; - } - if ((strcmp(pal::Gfx9PlusSubDeviceInfo[j].machineTarget_, - pal::Gfx9PlusSubDeviceInfo[id].machineTarget_) == 0) && - (pal::Gfx9PlusSubDeviceInfo[j].xnackEnabled_ == - pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_)) { - foundDuplicate = true; - break; - } - } - - // Don't report an offline device twice - if (foundDuplicate) { - continue; - } - - Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None; - uint ipLevelMajor = round(gfxipVersion / 100); - uint ipLevelMinor = round(gfxipVersion / 10 % 10); - switch (ipLevelMajor) { - case 9: - ipLevel = Pal::GfxIpLevel::GfxIp9; - break; - case 10: - switch (ipLevelMinor) { - case 0: - ShouldNotReachHere(); - break; - case 1: - ipLevel = Pal::GfxIpLevel::GfxIp10_1; - break; - case 2: - ShouldNotReachHere(); - break; - case 3: - ipLevel = Pal::GfxIpLevel::GfxIp10_3; - break; - case 4: - ShouldNotReachHere(); - break; - default: - ShouldNotReachHere(); - break; - } - break; - case 11: - switch (ipLevelMinor) { - case 0: - ShouldNotReachHere(); - break; - default: - ShouldNotReachHere(); - break; - } - break; - default: - ShouldNotReachHere(); - break; - } - - NullDevice* dev = new NullDevice(); - if (nullptr != dev) { - if (!dev->create(id, ipLevel)) { - delete dev; - } else { - dev->registerDevice(); - } - } - } - return true; } -bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) { - // Update HW info for the device - if ((GPU_ENABLE_PAL == 1) && (ipLevel == Pal::GfxIpLevel::_None)) { - hwInfo_ = &DeviceInfo[id]; - } else if (ipLevel >= Pal::GfxIpLevel::GfxIp9) { - hwInfo_ = &Gfx9PlusSubDeviceInfo[id]; - } else { - return false; - } - - Pal::AsicRevision asicRevision = hwInfo_->asicRevision_; - - if (amd::IS_HIP && IS_MAINLINE && - (asicRevision != Pal::AsicRevision::Vega20)) { +bool NullDevice::create(const char* palName, const amd::Isa& isa, Pal::GfxIpLevel ipLevel, + Pal::AsicRevision asicRevision) { + if (!isa.runtimePalSupported()) { + LogPrintfError("Offline PAL device %s is not supported", isa.targetId()); return false; } online_ = false; + palName_ = palName; Pal::DeviceProperties properties = {}; // Use fake GFX IP for the device init @@ -274,12 +241,19 @@ bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) { Pal::WorkStationCaps wscaps = {}; // Create setting for the offline target - if ((palSettings == nullptr) || !palSettings->create(properties, heaps, wscaps)) { + if ((palSettings == nullptr) || + !palSettings->create(properties, heaps, wscaps, isa.xnack() == amd::Isa::Feature::Enabled)) { + LogPrintfError("Unable to create PAL setting for offline PAL device %s", isa.targetId()); return false; } if (!ValidateComgr()) { - LogError("Code object manager initialization failed!"); + LogPrintfError("Code object manager initialization failed for offline PAL device %s", isa.targetId()); + return false; + } + + if (!amd::Device::create(isa)) { + LogPrintfError("Unable to setup device for PAL offline device %s", isa.targetId()); return false; } @@ -306,7 +280,7 @@ bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) { acl_error error; compiler_ = aclCompilerInit(&opts, &error); if (error != ACL_SUCCESS) { - LogError("Error initializing the compiler"); + LogPrintfError("Error initializing the compiler for offline PAL device %s", isa.targetId()); return false; } #endif // defined(WITH_COMPILER_LIB) @@ -511,34 +485,12 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.platform_ = AMD_PLATFORM; - if (settings().useLightning_) { - ::strncpy(info_.name_, hwInfo()->machineTargetLC_, sizeof(info_.name_) - 1); - - if (hwInfo()->srameccSumpported_) { - if (palProp.gfxipProperties.shaderCore.flags.eccProtectedGprs) { - ::strcat(info_.name_, ":sramecc+"); - } else { - ::strcat(info_.name_, ":sramecc-"); - } - } - - if (hwInfo()->xnackSupported_) { - if (hwInfo()->xnackEnabled_) { - ::strcat(info_.name_, ":xnack+"); - } else { - ::strcat(info_.name_, ":xnack-"); - } - } - - ::strncpy(info_.targetId_, "amdgcn-amd-amdhsa--", sizeof(info_.targetId_) - 1); - ::strcat(info_.targetId_, info_.name_); - } else { - ::strncpy(info_.name_, hwInfo()->machineTarget_, sizeof(info_.name_) - 1); - } - + ::strncpy(info_.name_, settings().useLightning_ ? isa().targetId() : palName_, + sizeof(info_.name_)); + ::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1); ::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1); - ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)", - settings().useLightning_ ? ",LC" : ",HSAIL"); + ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)%s", + settings().useLightning_ ? ",LC" : ",HSAIL", isOnline() ? "" : " [Offline]"); info_.profile_ = "FULL_PROFILE"; if (settings().oclVersion_ >= OpenCL20) { @@ -625,23 +577,20 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, ? (2 * palProp.gfxipProperties.shaderCore.numSimdsPerCu) : palProp.gfxipProperties.shaderCore.numSimdsPerCu; info_.cuPerShaderArray_ = palProp.gfxipProperties.shaderCore.numCusPerShaderArray; - info_.simdWidth_ = hwInfo()->simdWidth_; + info_.simdWidth_ = isa().simdWidth(); info_.simdInstructionWidth_ = 1; info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : palProp.gfxipProperties.shaderCore.nativeWavefrontSize; info_.availableSGPRs_ = palProp.gfxipProperties.shaderCore.numAvailableSgprs; info_.globalMemChannelBanks_ = 4; - info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_; + info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth(); info_.localMemSizePerCU_ = palProp.gfxipProperties.shaderCore.ldsSizePerCu; - info_.localMemBanks_ = hwInfo()->localMemBanks_; + info_.localMemBanks_ = isa().localMemBanks(); - uint gfxipVersion = - settings().useLightning_ ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_; - - info_.gfxipMajor_ = gfxipVersion / 100; - info_.gfxipMinor_ = gfxipVersion / 10 % 10; - info_.gfxipStepping_ = gfxipVersion % 10; + info_.gfxipMajor_ = isa().versionMajor(); + info_.gfxipMinor_ = isa().versionMinor(); + info_.gfxipStepping_ = isa().versionStepping(); info_.timeStampFrequency_ = 1000000; info_.numAsyncQueues_ = numComputeRings; @@ -860,9 +809,6 @@ uint32_t gStartDevice = 0; uint32_t gNumDevices = 0; bool Device::create(Pal::IDevice* device) { - if (!amd::Device::create()) { - return false; - } resourceList_ = new std::unordered_set(); if (nullptr == resourceList_) { return false; @@ -884,23 +830,25 @@ bool Device::create(Pal::IDevice* device) { // XNACK flag should be set for PageMigration or IOMMUv2 support. // Note: Navi2x should have a fix in HW. - bool isXNACKSupported = (ipLevel_ <= Pal::GfxIpLevel::GfxIp10_1) && + bool isXNACKEnabled = (static_cast(properties().gpuMemoryProperties.flags.pageMigrationEnabled || properties().gpuMemoryProperties.flags.iommuv2Support)); - // Update HW info for the device - if ((GPU_ENABLE_PAL == 1) && (properties().revision <= Pal::AsicRevision::Polaris12)) { - hwInfo_ = &DeviceInfo[static_cast(properties().revision)]; - } else if (ipLevel_ >= Pal::GfxIpLevel::GfxIp9) { - // For compiler sub targets - for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) { - if ((Gfx9PlusSubDeviceInfo[id].asicRevision_ == asicRevision_) && - (Gfx9PlusSubDeviceInfo[id].xnackEnabled_ == isXNACKSupported)) { - hwInfo_ = &Gfx9PlusSubDeviceInfo[id]; - break; - } - } - } else { + bool isSRAMECCEnabled = properties().gfxipProperties.shaderCore.flags.eccProtectedGprs; + + const amd::Isa* isa; + std::tie(isa, palName_) = findIsa(asicRevision_, isSRAMECCEnabled, isXNACKEnabled); + if (!isa) { + LogPrintfError("Unsupported PAL device with ASIC revision #%d", asicRevision_); + return false; + } + if (!isa->runtimePalSupported()) { + LogPrintfError("Unsupported PAL device with ISA %s", isa->targetId()); + return false; + } + + if (!amd::Device::create(*isa)) { + LogPrintfError("Unable to setup device for PAL device %s", isa->targetId()); return false; } @@ -953,8 +901,9 @@ bool Device::create(Pal::IDevice* device) { iDev()->QueryWorkStationCaps(&wscaps); pal::Settings* gpuSettings = reinterpret_cast(settings_); - if ((gpuSettings == nullptr) || - !gpuSettings->create(properties(), heaps_, wscaps, appProfile_.reportAsOCL12Device())) { + if (!gpuSettings || + !gpuSettings->create(properties(), heaps_, wscaps, isa->xnack() == amd::Isa::Feature::Enabled, + appProfile_.reportAsOCL12Device())) { return false; } diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp index 915250e5ef..4bfa687770 100644 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -64,8 +64,10 @@ class NullDevice : public amd::Device { NullDevice(); //! Creates an offline device with the specified target - bool create(uint id, //!< index in the AMDDeviceInfo[] - Pal::GfxIpLevel ipLevel //!< GPU ip level + bool create(const char* palName, //!< Device name + const amd::Isa& isa, //!< Device ISA + Pal::GfxIpLevel ipLevel, //!< GPU ip level + Pal::AsicRevision asicRevision //!< PAL ASIC revision ); //! Instantiate a new virtual device @@ -114,8 +116,6 @@ class NullDevice : public amd::Device { Pal::GfxIpLevel ipLevel() const { return ipLevel_; } Pal::AsicRevision asicRevision() const { return asicRevision_; } - const AMDDeviceInfo* hwInfo() const { return hwInfo_; } - //! Empty implementation on Null device virtual bool globalFreeMemory(size_t* freeMemory) const { return false; } @@ -139,7 +139,7 @@ class NullDevice : public amd::Device { Pal::AsicRevision asicRevision_; //!< ASIC revision Pal::GfxIpLevel ipLevel_; //!< Device IP level - const AMDDeviceInfo* hwInfo_; //!< Device HW info structure + const char* palName_; //!< Device name //! Fills OpenCL device info structure void fillDeviceInfo(const Pal::DeviceProperties& palProp, //!< PAL device properties diff --git a/projects/clr/rocclr/device/pal/palprogram.cpp b/projects/clr/rocclr/device/pal/palprogram.cpp index 2a4f0c51ea..9765dd5ece 100644 --- a/projects/clr/rocclr/device/pal/palprogram.cpp +++ b/projects/clr/rocclr/device/pal/palprogram.cpp @@ -174,7 +174,6 @@ bool Segment::freeze(bool destroySysmem) { return result; } -static constexpr const char* Carrizo = "Carrizo"; HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) : Program(device, owner), rawBinary_(nullptr), @@ -185,11 +184,6 @@ HSAILProgram::HSAILProgram(Device& device, amd::Program& owner) executable_(nullptr), loaderContext_(this) { assert(device.isOnline()); - if (dev().asicRevision() == Pal::AsicRevision::Bristol) { - machineTarget_ = Carrizo; - } else { - machineTarget_ = dev().hwInfo()->machineTarget_; - } loader_ = amd::hsa::loader::Loader::Create(&loaderContext_); } @@ -204,11 +198,6 @@ HSAILProgram::HSAILProgram(NullDevice& device, amd::Program& owner) loaderContext_(this) { assert(!device.isOnline()); isNull_ = true; - if (dev().asicRevision() == Pal::AsicRevision::Bristol) { - machineTarget_ = Carrizo; - } else { - machineTarget_ = dev().hwInfo()->machineTarget_; - } // Cannot load onto a NullDevice. loader_ = nullptr; } @@ -368,15 +357,11 @@ bool HSAILProgram::allocKernelTable() { void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); } -const aclTargetInfo& HSAILProgram::info(const char* str) { +const aclTargetInfo& HSAILProgram::info() { #if defined(WITH_COMPILER_LIB) acl_error err; - std::string arch = "hsail"; - if (dev().settings().use64BitPtr_) { - arch = "hsail64"; - } - info_ = aclGetTargetInfo(arch.c_str(), - (str && str[0] == '\0' ? palNullDevice().hwInfo()->machineTarget_ : str), &err); + info_ = aclGetTargetInfo(palNullDevice().settings().use64BitPtr_ ? "hsail64" : "hsail", + device().isa().hsailName(), &err); if (err != ACL_SUCCESS) { LogWarning("aclGetTargetInfo failed"); } @@ -531,49 +516,23 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p } hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) { - hsa_isa_t isa = {0}; - uint32_t gfxip = 0; - std::string gfx_target(name); - if (gfx_target.find("amdgcn-") == 0) { - std::string gfxip_version_str = gfx_target.substr(gfx_target.find("gfx") + 3); - gfxip = std::atoi(gfxip_version_str.c_str()); - } else { - // FIXME: Old way. To be remove. - uint32_t shift = 1; - size_t last = gfx_target.length(); - std::string ver; - do { - size_t first = gfx_target.find_last_of(':', last); - ver = gfx_target.substr(first + 1, last - first); - last = first - 1; - gfxip += static_cast(atoi(ver.c_str())) * shift; - shift *= 10; - } while (shift <= 100); - } - isa.handle = gfxip; - return isa; + const amd::Isa* isa_p = amd::Isa::findIsa(name); + return {amd::Isa::toHandle(isa_p)}; } bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) { - uint32_t gfxipVersion = program_->palNullDevice().settings().useLightning_ - ? program_->palNullDevice().hwInfo()->gfxipVersionLC_ - : program_->palNullDevice().hwInfo()->gfxipVersion_; - uint32_t majorSrc = gfxipVersion / 10; - uint32_t minorSrc = gfxipVersion % 10; - - uint32_t majorTrg = isa.handle / 10; - uint32_t minorTrg = isa.handle % 10; - - if (majorSrc != majorTrg) { + // The HSA loader uses a handle value of 0 to indicate the ISA is invalid. + const amd::Isa* code_object_isa_p = amd::Isa::fromHandle(isa.handle); + if (!code_object_isa_p || !code_object_isa_p->runtimePalSupported()) { + // The ISA is either not supported because PALHSALoaderContext::IsaFromName + // could not find it, or the PAL runtime does not support it. return false; - } else if (minorTrg == minorSrc) { - return true; - } else if (minorTrg < minorSrc) { - LogWarning("ISA downgrade for execution!"); - return true; } - - return false; + if (program_->isNull()) { + // Cannot load code onto offline devices. + return false; + } + return amd::Isa::isCompatible(*code_object_isa_p, program_->device().isa()); } void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_agent_t agent, @@ -584,7 +543,7 @@ void* PALHSALoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, hsa_ag // Note: In Linux ::posix_memalign() requires at least 16 bytes for the alignment. align = amd::alignUp(align, 16); void* ptr = amd::Os::alignedMalloc(size, align); - if ((ptr != nullptr) && zero) { + if (ptr && zero) { memset(ptr, 0, size); } return ptr; @@ -778,9 +737,6 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s return true; } - hsa_agent_t agent; - agent.handle = 1; - executable_ = loader_->CreateExecutable(HSA_PROFILE_FULL, nullptr); if (executable_ == nullptr) { buildLog_ += "Error: Executable for AMD HSA Code Object isn't created.\n"; diff --git a/projects/clr/rocclr/device/pal/palprogram.hpp b/projects/clr/rocclr/device/pal/palprogram.hpp index 58bd76ae4d..14ae14e510 100644 --- a/projects/clr/rocclr/device/pal/palprogram.hpp +++ b/projects/clr/rocclr/device/pal/palprogram.hpp @@ -202,7 +202,7 @@ class HSAILProgram : public device::Program { virtual bool createBinary(amd::option::Options* options); - virtual const aclTargetInfo& info(const char* str = ""); + virtual const aclTargetInfo& info(); virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize, amd::Os::FileDesc fdesc = amd::Os::FDescInit(), size_t foffset = 0, @@ -250,7 +250,6 @@ class LightningProgram : public HSAILProgram { LightningProgram(NullDevice& device, amd::Program& owner) : HSAILProgram(device, owner) { isLC_ = true; isHIP_ = (owner.language() == amd::Program::HIP); - machineTarget_ = palNullDevice().hwInfo()->machineTargetLC_; } LightningProgram(Device& device, amd::Program& owner) : HSAILProgram(device, owner) { diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index efa0e03c80..ae3f0e9c22 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -174,7 +174,7 @@ Settings::Settings() { bool Settings::create(const Pal::DeviceProperties& palProp, const Pal::GpuMemoryHeapProperties* heaps, const Pal::WorkStationCaps& wscaps, - bool reportAsOCL12Device) { + bool enableXNACK, bool reportAsOCL12Device) { uint32_t osVer = 0x0; // Disable thread trace by default for all devices @@ -202,19 +202,19 @@ bool Settings::create(const Pal::DeviceProperties& palProp, apuSystem_ = true; } + enableXNACK_ = enableXNACK; + hsailExplicitXnack_ = enableXNACK; + switch (palProp.revision) { - case Pal::AsicRevision::Navi23: - case Pal::AsicRevision::Navi22: - case Pal::AsicRevision::Navi21: case Pal::AsicRevision::Navi14: case Pal::AsicRevision::Navi12: case Pal::AsicRevision::Navi10: case Pal::AsicRevision::Navi10_A0: + case Pal::AsicRevision::Navi23: + case Pal::AsicRevision::Navi22: + case Pal::AsicRevision::Navi21: gfx10Plus_ = true; useLightning_ = GPU_ENABLE_LC; - hsailExplicitXnack_ = - static_cast(palProp.gpuMemoryProperties.flags.pageMigrationEnabled || - palProp.gpuMemoryProperties.flags.iommuv2Support); enableWgpMode_ = GPU_ENABLE_WGP_MODE; if (useLightning_) { enableWave32Mode_ = true; @@ -264,6 +264,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp, case Pal::AsicRevision::Polaris10: case Pal::AsicRevision::Polaris11: case Pal::AsicRevision::Polaris12: + case Pal::AsicRevision::Polaris22: // Disable tiling aperture on VI+ linearPersistentImage_ = true; // Keep this false even though we have support @@ -289,6 +290,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp, // Fall through ... case Pal::AsicRevision::Bonaire: case Pal::AsicRevision::Hawaii: + case Pal::AsicRevision::HawaiiPro: threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE; reportFMAF_ = false; if ((palProp.revision == Pal::AsicRevision::Hawaii) || aiPlus_) { diff --git a/projects/clr/rocclr/device/pal/palsettings.hpp b/projects/clr/rocclr/device/pal/palsettings.hpp index f37bfdbe18..b546dcf2db 100644 --- a/projects/clr/rocclr/device/pal/palsettings.hpp +++ b/projects/clr/rocclr/device/pal/palsettings.hpp @@ -127,6 +127,7 @@ class Settings : public device::Settings { bool create(const Pal::DeviceProperties& palProp, //!< PAL device properties const Pal::GpuMemoryHeapProperties* heaps, //!< PAL heap settings const Pal::WorkStationCaps& wscaps, //!< PAL workstation settings + bool enableXNACK, //!< XNACK is enabled on this device bool reportAsOCL12Device = false //!< Report As OpenCL1.2 Device ); diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index e983af887a..f5e1d5882a 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -934,7 +934,7 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory, amd::Image* srcImage = static_cast(srcMemory.owner()); amd::Image::Format newFormat(dstImage->getImageFormat()); bool swapLayer = - (dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10); + (dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10); // Find unsupported formats for (uint i = 0; i < RejectedFormatDataTotal; ++i) { @@ -1126,7 +1126,7 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory, amd::Image* srcImage = static_cast(srcMemory.owner()); amd::Image::Format newFormat(srcImage->getImageFormat()); bool swapLayer = - (srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10); + (srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10); // Find unsupported formats for (uint i = 0; i < RejectedFormatDataTotal; ++i) { @@ -1364,14 +1364,14 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst // Program source origin int32_t srcOrg[4] = {(int32_t)srcOrigin[0], (int32_t)srcOrigin[1], (int32_t)srcOrigin[2], 0}; - if ((srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10)) { + if ((srcImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10)) { srcOrg[3] = 1; } setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg); // Program destinaiton origin int32_t dstOrg[4] = {(int32_t)dstOrigin[0], (int32_t)dstOrigin[1], (int32_t)dstOrigin[2], 0}; - if ((dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10)) { + if ((dstImage->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10)) { dstOrg[3] = 1; } setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg); @@ -2072,7 +2072,7 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern, amd::Image* image = static_cast(memory.owner()); amd::Image::Format newFormat(image->getImageFormat()); bool swapLayer = - (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().info().gfxipMajor_ >= 10); + (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) && (dev().isa().versionMajor() >= 10); // Program the kernels workload depending on the fill dimensions fillType = FillImage; diff --git a/projects/clr/rocclr/device/rocm/roccounters.cpp b/projects/clr/rocclr/device/rocm/roccounters.cpp index 19a7080f33..3bad8898af 100644 --- a/projects/clr/rocclr/device/rocm/roccounters.cpp +++ b/projects/clr/rocclr/device/rocm/roccounters.cpp @@ -430,7 +430,7 @@ PerfCounter::PerfCounter(const Device& device, //!< A ROC device object info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id) // these block indices are valid for the SI (Gfx8) & Gfx9 devices - switch (roc_device_.deviceInfo().gfxipMajor_) { + switch (roc_device_.isa().versionMajor()) { case (8): gfxVersion_ = ROC_GFX8; if (blockIndex < viBlockIdOrcaToRocr.size()) { diff --git a/projects/clr/rocclr/device/rocm/rocdefs.hpp b/projects/clr/rocclr/device/rocm/rocdefs.hpp index 53fbbd5ac7..889d95fc4a 100644 --- a/projects/clr/rocclr/device/rocm/rocdefs.hpp +++ b/projects/clr/rocclr/device/rocm/rocdefs.hpp @@ -33,49 +33,6 @@ static constexpr uint DeviceQueueMaskSize = 32; //! Set to match the number of pipes, which is 8. static constexpr uint kMaxAsyncQueues = 8; -typedef uint HsaDeviceId; - -struct AMDDeviceInfo { - const char* machineTarget_; //!< Machine target - const char* machineTargetLC_;//!< Machine target for LC - uint simdPerCU_; //!< Number of SIMDs per CU - uint simdWidth_; //!< Number of workitems processed per SIMD - uint simdInstructionWidth_; //!< Number of instructions processed per SIMD - uint memChannelBankWidth_; //!< Memory channel bank width - uint localMemSizePerCU_; //!< Local memory size per CU - uint localMemBanks_; //!< Number of banks of local memory - uint gfxipMajor_; //!< The core engine GFXIP Major version - uint gfxipMinor_; //!< The core engine GFXIP Minor version - uint gfxipStepping_; //!< The core engine GFXIP Stepping version - uint pciDeviceId_; //!< PCIe device id -}; - -constexpr HsaDeviceId HSA_INVALID_DEVICE_ID = -1; - -static constexpr AMDDeviceInfo DeviceInfo[] = { - /* KAVERI_SPECTRE */ {"Spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0}, - /* KAVERI_SPOOKY */ {"Spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0}, - /* HAWAII */ {"Hawaii", "gfx701", 4, 16, 1, 256, 64 * Ki, 32, 7, 0, 1, 0}, - /* CARRIZO */ {"Carrizo", "gfx801", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 1, 0}, - /* TONGA */ {"Tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 2, 0}, - /* ICELAND */ {"Iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 2, 0}, - /* FIJI */ {"Fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0}, - /* ELLESMERE */ {"Ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0}, - /* BAFFIN */ {"Baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 8, 0, 3, 0}, - /* VEGA10 */ {"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 0, 0}, - /* VEGA10_HBCC */ {"gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 1, 0}, - /* RAVEN */ {"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 2, 0}, - /* VEGA12 */ {"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 4, 0}, - /* VEGA20 */ {"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 6, 0}, - /* ARCTURUS */ {"gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 9, 0, 8, 0}, - /* NAVI10 */ {"gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 0, 0}, - /* NAVI12 */ {"gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 1, 0}, - /* NAVI14 */ {"gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 10, 1, 2, 0}, - /* SIENNA_CICHILD */ {"gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 0, 0}, - /* NAVY_FLOUNDER */ {"gfx1031", "gfx1031", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 1, 0}, - /* DIMGREY CAVEFISH*/{"gfx1032", "gfx1032", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 2, 0} -}; - } // namespace roc #endif diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index b5ea2da46b..2a00cf2485 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -48,6 +48,7 @@ #include #include #include +#include #ifdef ROCCLR_SUPPORT_NUMA_POLICY #include #endif // ROCCLR_SUPPORT_NUMA_POLICY @@ -61,9 +62,9 @@ #ifndef WITHOUT_HSA_BACKEND namespace { -inline bool getIsaMeta(const char* targetId, amd_comgr_metadata_node_t& isaMeta) { +inline bool getIsaMeta(std::string isaName, amd_comgr_metadata_node_t& isaMeta) { amd_comgr_status_t status; - status = amd::Comgr::get_isa_metadata(targetId, &isaMeta); + status = amd::Comgr::get_isa_metadata(isaName.c_str(), &isaMeta); return (status == AMD_COMGR_STATUS_SUCCESS) ? true : false; } @@ -99,34 +100,13 @@ std::vector roc::Device::cpu_agents_; address Device::mg_sync_ = nullptr; -static HsaDeviceId getHsaDeviceId(hsa_agent_t device, uint32_t& pci_id) { - if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(device, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, &pci_id)) { - return HSA_INVALID_DEVICE_ID; +bool NullDevice::create(const amd::Isa &isa) { + if (!isa.runtimeRocSupported()) { + LogPrintfError("Offline HSA device %s is not supported", isa.targetId()); + return false; } - char agent_name[64] = {0}; - - if (HSA_STATUS_SUCCESS != hsa_agent_get_info(device, HSA_AGENT_INFO_NAME, agent_name)) { - return HSA_INVALID_DEVICE_ID; - } - - if (::strncmp(agent_name, "gfx", 3) != 0) { - return HSA_INVALID_DEVICE_ID; - } - - for (uint i = 0; i < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); ++i) { - if (::strcmp(agent_name, DeviceInfo[i].machineTargetLC_) == 0) { - return i; - } - } - - return HSA_INVALID_DEVICE_ID; -} - -bool NullDevice::create(const AMDDeviceInfo& deviceInfo) { online_ = false; - deviceInfo_ = deviceInfo; // Mark the device as GPU type info_.type_ = CL_DEVICE_TYPE_GPU; info_.vendorId_ = 0x1002; @@ -134,24 +114,38 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) { roc::Settings* hsaSettings = new roc::Settings(); settings_ = hsaSettings; if (!hsaSettings || - !hsaSettings->create(false, deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_)) { - LogError("Error creating settings for nullptr HSA device"); + !hsaSettings->create(false, isa.versionMajor(), isa.versionMinor(), + isa.xnack() == amd::Isa::Feature::Enabled)) { + LogPrintfError("Error creating settings for offline HSA device %s", isa.targetId()); return false; } if (!ValidateComgr()) { - LogError("Code object manager initialization failed!"); + LogPrintfError("Code object manager initialization failed for offline HSA device %s", + isa.targetId()); + return false; + } + + if (!amd::Device::create(isa)) { + LogPrintfError("Unable to setup offline HSA device %s", isa.targetId()); return false; } // Report the device name - ::strncpy(info_.name_, "AMD HSA Device", sizeof(info_.name_) - 1); + ::strncpy(info_.name_, isa.targetId(), sizeof(info_.name_) - 1); + info_.gfxipMajor_ = isa.versionMajor(); + info_.gfxipMinor_ = isa.versionMinor(); + info_.gfxipStepping_ = isa.versionStepping(); + ::strncpy(info_.targetId_, isa.isaName().c_str(), sizeof(info_.targetId_) - 1); info_.extensions_ = getExtensionString(); info_.maxWorkGroupSize_ = hsaSettings->maxWorkGroupSize_; ::strncpy(info_.vendor_, "Advanced Micro Devices, Inc.", sizeof(info_.vendor_) - 1); info_.oclcVersion_ = "OpenCL C " OPENCL_C_VERSION_STR " "; info_.spirVersions_ = ""; - ::strncpy(info_.driverVersion_, "1.0 Provisional (hsa)", sizeof(info_.driverVersion_) - 1); + std::stringstream ss; + ss << AMD_BUILD_STRING " (HSA," << (settings().useLightning_ ? "LC" : "HSAIL"); + ss << ") [Offline]"; + ::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1); info_.version_ = "OpenCL " OPENCL_VERSION_STR " "; return true; } @@ -160,6 +154,7 @@ Device::Device(hsa_agent_t bkendDevice) : mapCacheOps_(nullptr) , mapCache_(nullptr) , _bkendDevice(bkendDevice) + , pciDeviceId_(0) , gpuvm_segment_max_alloc_(0) , alloc_granularity_(0) , context_(nullptr) @@ -311,37 +306,35 @@ bool NullDevice::init() { return false; } - // Return without initializing offline device list - return true; - -#if defined(WITH_COMPILER_LIB) - // If there is an HSA enabled device online then skip any offline device - std::vector devices; - devices = getDevices(CL_DEVICE_TYPE_GPU, false); - - // Load the offline devices - // Iterate through the set of available offline devices - for (uint id = 0; id < sizeof(DeviceInfo) / sizeof(AMDDeviceInfo); id++) { + // Create offline devices for all ISAs not already associated with an online + // device. This allows code objects to be compiled for all supported ISAs. + std::vector devices = getDevices(CL_DEVICE_TYPE_GPU, false); + for (const amd::Isa *isa = amd::Isa::begin(); isa != amd::Isa::end(); isa++) { + if (!isa->runtimeRocSupported()) { + continue; + } bool isOnline = false; // Check if the particular device is online - for (unsigned int i = 0; i < devices.size(); i++) { - if (::strcmp(static_cast(devices[i])->deviceInfo_.machineTarget_, - DeviceInfo[id].machineTarget_) == 0) { + for (size_t i = 0; i < devices.size(); i++) { + if (&(devices[i]->isa()) == isa) { isOnline = true; + break; } } if (isOnline) { continue; } - NullDevice* nullDevice = new NullDevice(); - if (!nullDevice->create(DeviceInfo[id])) { - LogError("Error creating new instance of Device."); - delete nullDevice; + std::unique_ptr nullDevice(new NullDevice()); + if (!nullDevice) { + LogPrintfError("Error allocating new instance of offline HSA device %s", isa->targetId()); return false; } - nullDevice->registerDevice(); + if (!nullDevice->create(*isa)) { + LogPrintfError("Skipping creating new instance of offline HSA sevice %s", isa->targetId()); + continue; + } + nullDevice.release()->registerDevice(); } -#endif // defined(WITH_COMPILER_LIB) return true; } @@ -516,22 +509,11 @@ bool Device::init() { for (auto agent : gpu_agents_) { std::unique_ptr roc_device(new Device(agent)); - if (!roc_device) { LogError("Error creating new instance of Device on then heap."); - return false; - } - - uint32_t pci_id; - HsaDeviceId deviceId = getHsaDeviceId(agent, pci_id); - if (deviceId == HSA_INVALID_DEVICE_ID) { - LogPrintfError("Invalid HSA device %x", pci_id); continue; } - roc_device->deviceInfo_ = DeviceInfo[deviceId]; - roc_device->deviceInfo_.pciDeviceId_ = pci_id; - if (!roc_device->create()) { LogError("Error creating new instance of Device."); continue; @@ -585,16 +567,84 @@ void Device::tearDown() { } bool Device::create() { + char agent_name[64] = {0}; + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_NAME, agent_name)) { + LogError("Unable to get HSA device name"); + return false; + } + + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CHIP_ID, + &pciDeviceId_)) { + LogPrintfError("Unable to get PCI ID of HSA device %s", agent_name); + return false; + } + + struct agent_isas_t { + uint count; + hsa_isa_t first_isa; + } agent_isas = {0, {0}}; + if (HSA_STATUS_SUCCESS != + hsa_agent_iterate_isas(_bkendDevice, + [](hsa_isa_t isa, void* data) { + agent_isas_t* agent_isas = static_cast(data); + if (agent_isas->count++ == 0) { + agent_isas->first_isa = isa; + } + return HSA_STATUS_SUCCESS; + }, + &agent_isas)) { + LogPrintfError("Unable to iterate supported ISAs for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); + return false; + } + if (agent_isas.count != 1) { + LogPrintfError("HSA device %s (PCI ID %x) has %u ISAs but can only support a single ISA", + agent_name, pciDeviceId_, agent_isas.count); + return false; + } + + uint32_t isa_name_length = 0; + if (HSA_STATUS_SUCCESS != + hsa_isa_get_info_alt(agent_isas.first_isa, (hsa_isa_info_t)HSA_ISA_INFO_NAME_LENGTH, + &isa_name_length)) { + LogPrintfError("Unable to get ISA name length for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); + return false; + } + + std::vector isa_name(isa_name_length + 1, '\0'); + if (HSA_STATUS_SUCCESS != + hsa_isa_get_info_alt(agent_isas.first_isa, (hsa_isa_info_t)HSA_ISA_INFO_NAME, + isa_name.data())) { + LogPrintfError("Unable to get ISA name for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); + return false; + } + + const amd::Isa *isa = amd::Isa::findIsa(isa_name.data()); + if (!isa || !isa->runtimeRocSupported()) { + LogPrintfError("Unsupported HSA device %s (PCI ID %x) for ISA %s", agent_name, pciDeviceId_, + isa_name.data()); + return false; + } + if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_PROFILE, &agent_profile_)) { + LogPrintfError("Unable to get profile for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_); return false; } uint32_t coop_groups = 0; // Check cooperative groups for HIP only - if (amd::IS_HIP && (HSA_STATUS_SUCCESS != - hsa_agent_get_info(_bkendDevice, - static_cast(HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES), &coop_groups))) { + if (amd::IS_HIP && + (HSA_STATUS_SUCCESS != + hsa_agent_get_info(_bkendDevice, + static_cast(HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES), + &coop_groups))) { + LogPrintfError( + "Unable to determine if cooperative queues are supported for HSA device %s (PCI ID %x)", + agent_name, pciDeviceId_); return false; } @@ -603,17 +653,23 @@ bool Device::create() { roc::Settings* hsaSettings = new roc::Settings(); settings_ = hsaSettings; if (!hsaSettings || - !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), deviceInfo_.gfxipMajor_, - deviceInfo_.gfxipMinor_, coop_groups)) { + !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(), + isa->versionMinor(), isa->xnack() == amd::Isa::Feature::Enabled, + coop_groups)) { + LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); return false; } if (!ValidateComgr()) { - LogError("Code object manager initialization failed!"); + LogPrintfError("Code object manager initialization failed for HSA device %s (PCI ID %x)", + agent_name, pciDeviceId_); return false; } - if (!amd::Device::create()) { + if (!amd::Device::create(*isa)) { + LogPrintfError("Unable to setup device for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); return false; } @@ -621,6 +677,8 @@ bool Device::create() { if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, static_cast(HSA_AMD_AGENT_INFO_BDFID), &hsa_bdf_id)) { + LogPrintfError("Unable to determine BFD ID for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); return false; } @@ -632,6 +690,8 @@ bool Device::create() { if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, static_cast(HSA_AMD_AGENT_INFO_DOMAIN), &pci_domain_id)) { + LogPrintfError("Unable to determine domain ID for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); return false; } info_.pciDomainID = pci_domain_id; @@ -650,7 +710,8 @@ bool Device::create() { #endif if (populateOCLDeviceConstants() == false) { - LogError("populateOCLDeviceConstants failed!"); + LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name, + pciDeviceId_); return false; } @@ -995,35 +1056,11 @@ Memory* Device::getGpuMemory(amd::Memory* mem) const { bool Device::populateOCLDeviceConstants() { info_.available_ = true; - hsa_isa_t isa = {0}; - if (hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_ISA, &isa) != HSA_STATUS_SUCCESS) { - return false; - } - - uint32_t isaNameLength = 0; - if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &isaNameLength) != HSA_STATUS_SUCCESS) { - return false; - } - - if ((isaNameLength + 1) > sizeof(info_.targetId_)) { - return false; - } - - if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, info_.targetId_) != HSA_STATUS_SUCCESS) { - return false; - } - info_.targetId_[isaNameLength] = '\0'; - - char *gfxSubString = ::strstr(info_.targetId_, "gfx"); - if (nullptr == gfxSubString) { - return false; - } - ::strncpy(info_.name_, gfxSubString, sizeof(info_.name_) - 1); - - info_.gfxipMajor_ = deviceInfo_.gfxipMajor_; - info_.gfxipMinor_ = deviceInfo_.gfxipMinor_; - info_.gfxipStepping_ = deviceInfo_.gfxipStepping_; - + ::strncpy(info_.name_, isa().targetId(), sizeof(info_.name_) - 1); + info_.gfxipMajor_ = isa().versionMajor(); + info_.gfxipMinor_ = isa().versionMinor(); + info_.gfxipStepping_ = isa().versionStepping(); + ::strncpy(info_.targetId_, isa().isaName().c_str(), sizeof(info_.targetId_) - 1); char device_name[64] = {0}; if (HSA_STATUS_SUCCESS == hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, @@ -1072,7 +1109,7 @@ bool Device::populateOCLDeviceConstants() { } //TODO: add the assert statement for Raven - if ((info_.gfxipMajor_*100 + info_.gfxipMinor_*10 + info_.gfxipStepping_) != 902) { + if (!(isa().versionMajor() == 9 && isa().versionMinor() == 0 && isa().versionStepping() == 2)) { assert(info_.maxEngineClockFrequency_ > 0); } @@ -1258,7 +1295,7 @@ bool Device::populateOCLDeviceConstants() { ::strncpy(info_.driverVersion_, ss.str().c_str(), sizeof(info_.driverVersion_) - 1); // Enable OpenCL 2.0 for Vega10+ - if (deviceInfo_.gfxipMajor_ >= 9) { + if (isa().versionMajor() >= 9) { info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " "; } else { info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " "; @@ -1394,14 +1431,14 @@ bool Device::populateOCLDeviceConstants() { } if (amd::IS_HIP) { // Report atomics capability based on GFX IP, control on Hawaii - if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipMajor_ >= 8) { + if (info_.hostUnifiedMemory_ || isa().versionMajor() >= 8) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } else if (!settings().useLightning_) { // Report atomics capability based on GFX IP, control on Hawaii // and Vega10. - if (info_.hostUnifiedMemory_ || (deviceInfo_.gfxipMajor_ == 8)) { + if (info_.hostUnifiedMemory_ || (isa().versionMajor() == 8)) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } @@ -1409,10 +1446,10 @@ bool Device::populateOCLDeviceConstants() { if (settings().checkExtension(ClAmdDeviceAttributeQuery)) { info_.simdPerCU_ = settings().enableWgpMode_ - ? (2 * deviceInfo_.simdPerCU_) - : deviceInfo_.simdPerCU_; - info_.simdWidth_ = deviceInfo_.simdWidth_; - info_.simdInstructionWidth_ = deviceInfo_.simdInstructionWidth_; + ? (2 * isa().simdPerCU()) + : isa().simdPerCU(); + info_.simdWidth_ = isa().simdWidth(); + info_.simdInstructionWidth_ = isa().simdInstructionWidth(); if (HSA_STATUS_SUCCESS != hsa_agent_get_info(_bkendDevice, HSA_AGENT_INFO_WAVEFRONT_SIZE, &info_.wavefrontWidth_)) { return false; @@ -1454,16 +1491,16 @@ bool Device::populateOCLDeviceConstants() { info_.l2CacheSize_ = cache_sizes[1]; info_.timeStampFrequency_ = 1000000; info_.globalMemChannelBanks_ = 4; - info_.globalMemChannelBankWidth_ = deviceInfo_.memChannelBankWidth_; - info_.localMemSizePerCU_ = deviceInfo_.localMemSizePerCU_; - info_.localMemBanks_ = deviceInfo_.localMemBanks_; + info_.globalMemChannelBankWidth_ = isa().memChannelBankWidth(); + info_.localMemSizePerCU_ = isa().localMemSizePerCU(); + info_.localMemBanks_ = isa().localMemBanks(); info_.numAsyncQueues_ = kMaxAsyncQueues; info_.numRTQueues_ = info_.numAsyncQueues_; info_.numRTCUs_ = info_.maxComputeUnits_; //TODO: set to true once thread trace support is available info_.threadTraceEnable_ = false; - info_.pcieDeviceId_ = deviceInfo_.pciDeviceId_; + info_.pcieDeviceId_ = pciDeviceId_; info_.cooperativeGroups_ = settings().enableCoopGroups_; info_.cooperativeMultiDeviceGroups_ = settings().enableCoopMultiDeviceGroups_; } @@ -1481,7 +1518,7 @@ bool Device::populateOCLDeviceConstants() { // Get Values from from Comgr amd_comgr_metadata_node_t isaMeta; - if (getIsaMeta(info_.targetId_, isaMeta)) { + if (getIsaMeta(std::move(isa().isaName()), isaMeta)) { std::string vgprValue; info_.availableVGPRs_ = (getValueFromIsaMeta(isaMeta, "AddressableNumVGPRs", vgprValue)) ? (atoi(vgprValue.c_str()) * info_.simdPerCU_) @@ -1595,14 +1632,11 @@ bool Device::bindExternalDevice(uint flags, void* const gfxDevice[], void* gfxCo return false; } - bool match = true; - match &= info_.deviceTopology_.pcie.bus == info.pci_bus; - match &= info_.deviceTopology_.pcie.device == info.pci_device; - match &= info_.deviceTopology_.pcie.function == info.pci_function; - match &= info_.vendorId_ == info.vendor_id; - match &= deviceInfo_.pciDeviceId_ == info.device_id; + return info_.deviceTopology_.pcie.bus == info.pci_bus && + info_.deviceTopology_.pcie.device == info.pci_device && + info_.deviceTopology_.pcie.function == info.pci_function && + info_.vendorId_ == info.vendor_id && pciDeviceId_ == info.device_id; - return match; #endif } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 30db05e573..d33386481b 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -110,7 +110,7 @@ class NullDevice : public amd::Device { NullDevice(){}; //! create the device - bool create(const AMDDeviceInfo& deviceInfo); + bool create(const amd::Isa &isa); //! Initialise all the offline devices that can be used for compilation static bool init(); @@ -126,7 +126,6 @@ class NullDevice : public amd::Device { //! Construct an HSAIL program object from the ELF assuming it is valid virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = nullptr); - const AMDDeviceInfo& deviceInfo() const { return deviceInfo_; } // List of dummy functions which are disabled for NullDevice @@ -232,8 +231,6 @@ class NullDevice : public amd::Device { static bool destroyCompiler(); //! Handle to the the compiler static Compiler* compilerHandle_; - //! Device Id for an HsaDevice - AMDDeviceInfo deviceInfo_; private: static constexpr bool offlineDevice_ = true; @@ -515,6 +512,7 @@ class Device : public NullDevice { std::vector enabled_p2p_devices_; //!< List of user enabled P2P devices for this device mutable std::mutex lock_allow_access_; //!< To serialize allow_access calls hsa_agent_t _bkendDevice; + uint32_t pciDeviceId_; hsa_agent_t* p2p_agents_list_; hsa_profile_t agent_profile_; hsa_amd_memory_pool_t group_segment_; diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp index d8f4a0a6ef..14b58bb3ea 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp @@ -1049,7 +1049,7 @@ bool Image::createInteropImage() { } if (obj->getGLTarget() == GL_TEXTURE_CUBE_MAP) { - desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipMajor_); + desc.setFace(obj->getCubemapFace(), dev().isa().versionMajor()); } hsa_status_t err = diff --git a/projects/clr/rocclr/device/rocm/rocprogram.cpp b/projects/clr/rocclr/device/rocm/rocprogram.cpp index 6d4e0850c0..2d547f953c 100644 --- a/projects/clr/rocclr/device/rocm/rocprogram.cpp +++ b/projects/clr/rocclr/device/rocm/rocprogram.cpp @@ -234,9 +234,8 @@ bool Program::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, return true; } -HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) { - machineTarget_ = rocNullDevice().deviceInfo().machineTarget_; -} +HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) + : roc::Program(device, owner) {} HSAILProgram::~HSAILProgram() { #if defined(WITH_COMPILER_LIB) @@ -440,7 +439,6 @@ LightningProgram::LightningProgram(roc::NullDevice& device, amd::Program& owner) : roc::Program(device, owner) { isLC_ = true; isHIP_ = (owner.language() == amd::Program::HIP); - machineTarget_ = rocNullDevice().deviceInfo().machineTargetLC_; } bool LightningProgram::createBinary(amd::option::Options* options) { diff --git a/projects/clr/rocclr/device/rocm/rocprogram.hpp b/projects/clr/rocclr/device/rocm/rocprogram.hpp index 2cc4bb6ab4..36b5aa0bfb 100644 --- a/projects/clr/rocclr/device/rocm/rocprogram.hpp +++ b/projects/clr/rocclr/device/rocm/rocprogram.hpp @@ -74,7 +74,7 @@ class Program : public device::Program { ); virtual bool createBinary(amd::option::Options* options) = 0; - virtual const aclTargetInfo& info(const char* str = "") { return info_; } + virtual const aclTargetInfo& info() { return info_; } protected: //! Disable default copy constructor diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index a008558551..4222061abb 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -93,7 +93,8 @@ Settings::Settings() { barrier_sync_ = (!flagIsDefault(ROC_BARRIER_SYNC)) ? ROC_BARRIER_SYNC : true; } -bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups) { +bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, bool enableXNACK, + bool coop_groups) { customHostAllocator_ = false; if (fullProfile) { @@ -105,7 +106,8 @@ bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coo pinnedXferSize_ = std::max(pinnedXferSize_, pinnedMinXferSize_); stagedXferSize_ = std::max(stagedXferSize_, pinnedMinXferSize_ + 4 * Ki); } - enableXNACK_ = apuSystem_ ? 1 : 0 ; // enable xnack for APU system + enableXNACK_ = enableXNACK; + hsailExplicitXnack_ = enableXNACK; // Enable extensions enableExtension(ClKhrByteAddressableStore); diff --git a/projects/clr/rocclr/device/rocm/rocsettings.hpp b/projects/clr/rocclr/device/rocm/rocsettings.hpp index c12aaf2aa4..e0e29c0d27 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.hpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.hpp @@ -89,7 +89,8 @@ class Settings : public device::Settings { Settings(); //! Creates settings - bool create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups = false); + bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, bool enableXNACK, + bool coop_groups = false); private: //! Disable copy constructor