diff --git a/rocclr/runtime/device/pal/paldefs.hpp b/rocclr/runtime/device/pal/paldefs.hpp index 52e7ac723f..5cfecc2721 100644 --- a/rocclr/runtime/device/pal/paldefs.hpp +++ b/rocclr/runtime/device/pal/paldefs.hpp @@ -115,69 +115,74 @@ const static uint DeviceQueueMaskSize = 32; struct AMDDeviceInfo { const char* targetName_; //!< Target name const char* machineTarget_; //!< Machine target + const char* machineTargetLC_;//!< Machine target for LC uint simdPerCU_; //!< Number of SIMDs per CU uint simdWidth_; //!< Number of workitems processed per SIMD uint simdInstructionWidth_; //!< Number of instructions processed per SIMD uint memChannelBankWidth_; //!< Memory channel bank width uint localMemSizePerCU_; //!< Local memory size per CU uint localMemBanks_; //!< Number of banks of local memory + uint gfxipVersionLC_; //!< The core engine GFXIP version for LC uint gfxipVersion_; //!< The core engine GFXIP version bool xnackEnabled_; //!< Enable XNACK feature }; static const AMDDeviceInfo DeviceInfo[] = { - /* Unknown */ {"", "unknown", 4, 16, 1, 256, 64 * Ki, 32, 0, false}, - /* Tahiti */ {"", "tahiti", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, - /* Pitcairn */ {"", "pitcairn", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, - /* Capeverde */ {"", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false}, - /* Oland */ {"", "oland", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, - /* Hainan */ {"", "hainan", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, + /* Unknown */ {"", "unknown", "", 4, 16, 1, 256, 64 * Ki, 32, 0, false}, + /* Tahiti */ {"", "tahiti", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, + /* Pitcairn */ {"", "pitcairn", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, + /* Capeverde */ {"", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false}, + /* Oland */ {"", "oland", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, + /* Hainan */ {"", "hainan", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false}, - /* Bonaire */ {"Bonaire", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false}, - /* Hawaii */ {"Hawaii", "hawaii", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, - /* Hawaii */ {"", "grenada", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, - /* Hawaii */ {"", "maui", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, + /* Bonaire */ {"Bonaire", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false}, + /* Hawaii */ {"Hawaii", "hawaii", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, + /* Hawaii */ {"", "grenada", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, + /* Hawaii */ {"", "maui", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, - /* Kalindi */ {"Kalindi", "kalindi", 4, 16, 1, 256, 64 * Ki, 32, 702, false}, - /* Godavari */ {"Mullins", "mullins", 4, 16, 1, 256, 64 * Ki, 32, 702, false}, - /* Spectre */ {"Spectre", "spectre", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, - /* Spooky */ {"Spooky", "spooky", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, + /* Kalindi */ {"Kalindi", "kalindi", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false}, + /* Godavari */ {"Mullins", "mullins", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false}, + /* Spectre */ {"Spectre", "spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, + /* Spooky */ {"Spooky", "spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false}, - /* Carrizo */ {"Carrizo", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false}, - /* Bristol */ {"Bristol Ridge", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false}, - /* Stoney */ {"Stoney", "stoney", 4, 16, 1, 256, 64 * Ki, 32, 810, false}, + /* Carrizo */ {"Carrizo", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false}, + /* Bristol */ {"Bristol Ridge", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false}, + /* Stoney */ {"Stoney", "stoney", "", 4, 16, 1, 256, 64 * Ki, 32, 810, false}, - /* Iceland */ {"Iceland", "iceland", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false}, - /* Tonga */ {"Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false}, - /* Fiji */ {"Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false}, - /* Ellesmere */ {"Ellesmere", "ellesmere", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false}, - /* Baffin */ {"Baffin", "baffin", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false}, - /* Lexa */ {"gfx804", "gfx804", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(804, 804), false}, + /* Iceland */ {"Iceland", "iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false}, + /* Tonga */ {"Tonga", "tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false}, + /* Fiji */ {"Fiji", "fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false}, + /* Ellesmere */ {"Ellesmere", "ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false}, + /* Baffin */ {"Baffin", "baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false}, + /* Lexa */ {"gfx804", "gfx804", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false}, }; // Ordering as per AsicRevision# in //depot/stg/pal/inc/core/palDevice.h and // http://confluence.amd.com/pages/viewpage.action?spaceKey=ASLC&title=AMDGPU+Target+Names static const AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = { - /* Vega10 */{"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, false}, - /* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), LIGHTNING_SWITCH("gfx900","gfx901"), - 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(900, 901), true}, - /* Vega12 */{"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, false}, - /* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), LIGHTNING_SWITCH("gfx904","gfx905"), - 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(904, 905), true}, - /* Vega20 */{"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, false}, - /* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), LIGHTNING_SWITCH("gfx906","gfx907"), - 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(906, 907), true}, - /* Raven */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false}, - /* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"), - 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true}, - /* Raven2 */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false}, - /* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"), - 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true}, - /* Navi10 */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, false}, - /* Navi10 XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, true}, - /* Navi10Lite */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, false}, - /* Navi10Lite XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, true}, - + /* Vega10 */{"gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 900, false}, + /* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), "gfx901", "gfx900", + 4, 16, 1, 256, 64 * Ki, 32, 900, 901, true}, + /* Vega12 */{"gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 904, false}, + /* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), "gfx905", "gfx904", + 4, 16, 1, 256, 64 * Ki, 32, 904, 905, true}, + /* Vega20 */{"gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 906, false}, + /* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), "gfx907", "gfx906", + 4, 16, 1, 256, 64 * Ki, 32, 906, 907, true}, + /* Raven */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false}, + /* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902", + 4, 16, 1, 256, 64 * Ki, 32, 902, 903, true}, + /* Raven2 */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false}, + /* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902", + 4, 16, 1, 256, 64 * Ki, 32, 902, 903, true}, + /* Navi10 */{ "", "", + "", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, false}, + /* Navi10 XNACK */{ "", "", + "", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, true}, + /* Navi10Lite */{ "", "", + "", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, false}, + /* Navi10Lite XNACK */{ "", "", + "", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, true}, }; // Supported OpenCL versions diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp index a16eef782b..49ba683736 100644 --- a/rocclr/runtime/device/pal/paldevice.cpp +++ b/rocclr/runtime/device/pal/paldevice.cpp @@ -117,7 +117,8 @@ bool NullDevice::init() { id < sizeof(Gfx9PlusSubDeviceInfo)/sizeof(AMDDeviceInfo); ++id) { bool foundActive = false; bool foundDuplicate = false; - uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_; + uint gfxipVersion = IS_LIGHTNING ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ : + pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_; if (pal::Gfx9PlusSubDeviceInfo[id].targetName_[0] == '\0') { continue; @@ -127,8 +128,10 @@ bool NullDevice::init() { for (uint i = 0; i < devices.size(); ++i) { driverVersion = static_cast(devices[i])->info().driverVersion_; if (driverVersion.find("PAL") != std::string::npos) { - if (static_cast(devices[i])->hwInfo()->gfxipVersion_ == - gfxipVersion) { + uint gfxIpCurrent = IS_LIGHTNING ? + static_cast(devices[i])->hwInfo()->gfxipVersionLC_ : + static_cast(devices[i])->hwInfo()->gfxipVersion_; + if (gfxIpCurrent == gfxipVersion) { foundActive = true; break; } @@ -161,8 +164,8 @@ bool NullDevice::init() { } Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None; - uint ipLevelMajor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 100); - uint ipLevelMinor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 10 % 10); + uint ipLevelMajor = round(gfxipVersion / 100); + uint ipLevelMinor = round(gfxipVersion / 10 % 10); switch (ipLevelMajor) { case 9: ipLevel = Pal::GfxIpLevel::GfxIp9; @@ -181,7 +184,7 @@ bool NullDevice::init() { Pal::AsicRevision revision = Pal::AsicRevision::Unknown; uint xNACKSupported = pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_ ? 1 : 0; - switch (pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_) { + switch (gfxipVersion) { case 901: case 900: revision = Pal::AsicRevision::Vega10; @@ -271,9 +274,9 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel, #if defined(WITH_LIGHTNING_COMPILER) // create compilation object with cache support - int gfxipMajor = hwInfo_->gfxipVersion_ / 100; - int gfxipMinor = hwInfo_->gfxipVersion_ / 10 % 10; - int gfxipStepping = hwInfo_->gfxipVersion_ % 10; + int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100; + int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10; + int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10; // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) // with dash as delimiter to be compatible with Windows directory name @@ -594,7 +597,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_; info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_; info_.localMemBanks_ = hwInfo()->localMemBanks_; - info_.gfxipVersion_ = hwInfo()->gfxipVersion_; + info_.gfxipVersion_ = IS_LIGHTNING ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_; info_.timeStampFrequency_ = 1000000; info_.numAsyncQueues_ = numComputeRings; @@ -932,9 +935,9 @@ bool Device::create(Pal::IDevice* device) { #if defined(WITH_LIGHTNING_COMPILER) // create compilation object with cache support - int gfxipMajor = hwInfo()->gfxipVersion_ / 100; - int gfxipMinor = hwInfo()->gfxipVersion_ / 10 % 10; - int gfxipStepping = hwInfo()->gfxipVersion_ % 10; + int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100; + int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10; + int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10; // Use compute capability as target (AMD:AMDGPU:major:minor:stepping) // with dash as delimiter to be compatible with Windows directory name diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp index c2d85194f4..299d9d783b 100644 --- a/rocclr/runtime/device/pal/palprogram.cpp +++ b/rocclr/runtime/device/pal/palprogram.cpp @@ -592,8 +592,11 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) { } bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) { - uint32_t majorSrc = program_->dev().hwInfo()->gfxipVersion_ / 10; - uint32_t minorSrc = program_->dev().hwInfo()->gfxipVersion_ % 10; + uint32_t gfxipVersion = IS_LIGHTNING ? + program_->dev().hwInfo()->gfxipVersionLC_ : + program_->dev().hwInfo()->gfxipVersion_; + uint32_t majorSrc = gfxipVersion / 10; + uint32_t minorSrc = gfxipVersion % 10; uint32_t majorTrg = isa.handle / 10; uint32_t minorTrg = isa.handle % 10; @@ -969,7 +972,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) { inputs.push_back(ocml_bc); // open the control functions - auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersion_); + auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersionLC_); if (!isa_version.first) { buildLog_ += "Error: Linking for this device is not supported\n"; return false; @@ -992,7 +995,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) { auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero || AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 || - (dev().hwInfo()->gfxipVersion_ < 900 && + (dev().hwInfo()->gfxipVersionLC_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)); Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC, daz_opt.first, daz_opt.second); @@ -1057,7 +1060,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) { // Set the machine target std::ostringstream mCPU; - mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersion_; + mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersionLC_; codegenOptions.append(mCPU.str()); // Set xnack option if needed diff --git a/rocclr/runtime/device/pal/palprogram.hpp b/rocclr/runtime/device/pal/palprogram.hpp index da50786dbc..7bd70bbce1 100644 --- a/rocclr/runtime/device/pal/palprogram.hpp +++ b/rocclr/runtime/device/pal/palprogram.hpp @@ -234,14 +234,14 @@ class LightningProgram : public HSAILProgram { : HSAILProgram(device) { isLC_ = true; xnackEnabled_ = dev().hwInfo()->xnackEnabled_; - machineTarget_ = dev().hwInfo()->machineTarget_; + machineTarget_ = dev().hwInfo()->machineTargetLC_; } LightningProgram(Device& device) : HSAILProgram(device) { isLC_ = true; xnackEnabled_ = dev().hwInfo()->xnackEnabled_; - machineTarget_ = dev().hwInfo()->machineTarget_; + machineTarget_ = dev().hwInfo()->machineTargetLC_; } virtual ~LightningProgram() {}