From 42e7f37f69631e0603841dc6e4cfc64c3a67a4b5 Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 26 Sep 2018 17:10:57 -0400
Subject: [PATCH] P4 to Git Change 1611180 by gandryey@gera-ocl-lc on
2018/09/26 16:54:02
SWDEV-79445 - OCL generic changes and code clean-up
- Fix Ellesmere compilation with LC path. Switch to multiple device info fields to support LC and HSAIL at the same time
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#110 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#33 edit
---
rocclr/runtime/device/pal/paldefs.hpp | 91 +++++++++++++-----------
rocclr/runtime/device/pal/paldevice.cpp | 29 ++++----
rocclr/runtime/device/pal/palprogram.cpp | 13 ++--
rocclr/runtime/device/pal/palprogram.hpp | 4 +-
4 files changed, 74 insertions(+), 63 deletions(-)
diff --git a/rocclr/runtime/device/pal/paldefs.hpp b/rocclr/runtime/device/pal/paldefs.hpp
index 52e7ac723f..5cfecc2721 100644
--- a/rocclr/runtime/device/pal/paldefs.hpp
+++ b/rocclr/runtime/device/pal/paldefs.hpp
@@ -115,69 +115,74 @@ const static uint DeviceQueueMaskSize = 32;
struct AMDDeviceInfo {
const char* targetName_; //!< Target name
const char* machineTarget_; //!< Machine target
+ const char* machineTargetLC_;//!< Machine target for LC
uint simdPerCU_; //!< Number of SIMDs per CU
uint simdWidth_; //!< Number of workitems processed per SIMD
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
uint memChannelBankWidth_; //!< Memory channel bank width
uint localMemSizePerCU_; //!< Local memory size per CU
uint localMemBanks_; //!< Number of banks of local memory
+ uint gfxipVersionLC_; //!< The core engine GFXIP version for LC
uint gfxipVersion_; //!< The core engine GFXIP version
bool xnackEnabled_; //!< Enable XNACK feature
};
static const AMDDeviceInfo DeviceInfo[] = {
- /* Unknown */ {"", "unknown", 4, 16, 1, 256, 64 * Ki, 32, 0, false},
- /* Tahiti */ {"", "tahiti", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
- /* Pitcairn */ {"", "pitcairn", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
- /* Capeverde */ {"", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
- /* Oland */ {"", "oland", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
- /* Hainan */ {"", "hainan", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
+ /* Unknown */ {"", "unknown", "", 4, 16, 1, 256, 64 * Ki, 32, 0, false},
+ /* Tahiti */ {"", "tahiti", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
+ /* Pitcairn */ {"", "pitcairn", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
+ /* Capeverde */ {"", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
+ /* Oland */ {"", "oland", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
+ /* Hainan */ {"", "hainan", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
- /* Bonaire */ {"Bonaire", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
- /* Hawaii */ {"Hawaii", "hawaii", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
- /* Hawaii */ {"", "grenada", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
- /* Hawaii */ {"", "maui", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
+ /* Bonaire */ {"Bonaire", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
+ /* Hawaii */ {"Hawaii", "hawaii", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
+ /* Hawaii */ {"", "grenada", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
+ /* Hawaii */ {"", "maui", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
- /* Kalindi */ {"Kalindi", "kalindi", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
- /* Godavari */ {"Mullins", "mullins", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
- /* Spectre */ {"Spectre", "spectre", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
- /* Spooky */ {"Spooky", "spooky", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
+ /* Kalindi */ {"Kalindi", "kalindi", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
+ /* Godavari */ {"Mullins", "mullins", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
+ /* Spectre */ {"Spectre", "spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
+ /* Spooky */ {"Spooky", "spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
- /* Carrizo */ {"Carrizo", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
- /* Bristol */ {"Bristol Ridge", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
- /* Stoney */ {"Stoney", "stoney", 4, 16, 1, 256, 64 * Ki, 32, 810, false},
+ /* Carrizo */ {"Carrizo", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
+ /* Bristol */ {"Bristol Ridge", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
+ /* Stoney */ {"Stoney", "stoney", "", 4, 16, 1, 256, 64 * Ki, 32, 810, false},
- /* Iceland */ {"Iceland", "iceland", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false},
- /* Tonga */ {"Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false},
- /* Fiji */ {"Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
- /* Ellesmere */ {"Ellesmere", "ellesmere", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
- /* Baffin */ {"Baffin", "baffin", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
- /* Lexa */ {"gfx804", "gfx804", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(804, 804), false},
+ /* Iceland */ {"Iceland", "iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
+ /* Tonga */ {"Tonga", "tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
+ /* Fiji */ {"Fiji", "fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+ /* Ellesmere */ {"Ellesmere", "ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+ /* Baffin */ {"Baffin", "baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+ /* Lexa */ {"gfx804", "gfx804", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
};
// Ordering as per AsicRevision# in //depot/stg/pal/inc/core/palDevice.h and
// http://confluence.amd.com/pages/viewpage.action?spaceKey=ASLC&title=AMDGPU+Target+Names
static const AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = {
- /* Vega10 */{"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, false},
- /* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), LIGHTNING_SWITCH("gfx900","gfx901"),
- 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(900, 901), true},
- /* Vega12 */{"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, false},
- /* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), LIGHTNING_SWITCH("gfx904","gfx905"),
- 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(904, 905), true},
- /* Vega20 */{"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, false},
- /* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), LIGHTNING_SWITCH("gfx906","gfx907"),
- 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(906, 907), true},
- /* Raven */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false},
- /* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"),
- 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true},
- /* Raven2 */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false},
- /* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"),
- 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true},
- /* Navi10 */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, false},
- /* Navi10 XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, true},
- /* Navi10Lite */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, false},
- /* Navi10Lite XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, true},
-
+ /* Vega10 */{"gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 900, false},
+ /* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), "gfx901", "gfx900",
+ 4, 16, 1, 256, 64 * Ki, 32, 900, 901, true},
+ /* Vega12 */{"gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 904, false},
+ /* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), "gfx905", "gfx904",
+ 4, 16, 1, 256, 64 * Ki, 32, 904, 905, true},
+ /* Vega20 */{"gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 906, false},
+ /* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), "gfx907", "gfx906",
+ 4, 16, 1, 256, 64 * Ki, 32, 906, 907, true},
+ /* Raven */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
+ /* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
+ 4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
+ /* Raven2 */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
+ /* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
+ 4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
+ /* Navi10 */{ "", "",
+ "", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, false},
+ /* Navi10 XNACK */{ "", "",
+ "", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, true},
+ /* Navi10Lite */{ "", "",
+ "", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, false},
+ /* Navi10Lite XNACK */{ "", "",
+ "", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, true},
};
// Supported OpenCL versions
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index a16eef782b..49ba683736 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -117,7 +117,8 @@ bool NullDevice::init() {
id < sizeof(Gfx9PlusSubDeviceInfo)/sizeof(AMDDeviceInfo); ++id) {
bool foundActive = false;
bool foundDuplicate = false;
- uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
+ uint gfxipVersion = IS_LIGHTNING ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ :
+ pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
if (pal::Gfx9PlusSubDeviceInfo[id].targetName_[0] == '\0') {
continue;
@@ -127,8 +128,10 @@ bool NullDevice::init() {
for (uint i = 0; i < devices.size(); ++i) {
driverVersion = static_cast(devices[i])->info().driverVersion_;
if (driverVersion.find("PAL") != std::string::npos) {
- if (static_cast(devices[i])->hwInfo()->gfxipVersion_ ==
- gfxipVersion) {
+ uint gfxIpCurrent = IS_LIGHTNING ?
+ static_cast(devices[i])->hwInfo()->gfxipVersionLC_ :
+ static_cast(devices[i])->hwInfo()->gfxipVersion_;
+ if (gfxIpCurrent == gfxipVersion) {
foundActive = true;
break;
}
@@ -161,8 +164,8 @@ bool NullDevice::init() {
}
Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
- uint ipLevelMajor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 100);
- uint ipLevelMinor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 10 % 10);
+ uint ipLevelMajor = round(gfxipVersion / 100);
+ uint ipLevelMinor = round(gfxipVersion / 10 % 10);
switch (ipLevelMajor) {
case 9:
ipLevel = Pal::GfxIpLevel::GfxIp9;
@@ -181,7 +184,7 @@ bool NullDevice::init() {
Pal::AsicRevision revision = Pal::AsicRevision::Unknown;
uint xNACKSupported = pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_ ? 1 : 0;
- switch (pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_) {
+ switch (gfxipVersion) {
case 901:
case 900:
revision = Pal::AsicRevision::Vega10;
@@ -271,9 +274,9 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
- int gfxipMajor = hwInfo_->gfxipVersion_ / 100;
- int gfxipMinor = hwInfo_->gfxipVersion_ / 10 % 10;
- int gfxipStepping = hwInfo_->gfxipVersion_ % 10;
+ int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
+ int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
+ int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
@@ -594,7 +597,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_;
info_.localMemBanks_ = hwInfo()->localMemBanks_;
- info_.gfxipVersion_ = hwInfo()->gfxipVersion_;
+ info_.gfxipVersion_ = IS_LIGHTNING ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
info_.timeStampFrequency_ = 1000000;
info_.numAsyncQueues_ = numComputeRings;
@@ -932,9 +935,9 @@ bool Device::create(Pal::IDevice* device) {
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
- int gfxipMajor = hwInfo()->gfxipVersion_ / 100;
- int gfxipMinor = hwInfo()->gfxipVersion_ / 10 % 10;
- int gfxipStepping = hwInfo()->gfxipVersion_ % 10;
+ int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
+ int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
+ int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp
index c2d85194f4..299d9d783b 100644
--- a/rocclr/runtime/device/pal/palprogram.cpp
+++ b/rocclr/runtime/device/pal/palprogram.cpp
@@ -592,8 +592,11 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
}
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
- uint32_t majorSrc = program_->dev().hwInfo()->gfxipVersion_ / 10;
- uint32_t minorSrc = program_->dev().hwInfo()->gfxipVersion_ % 10;
+ uint32_t gfxipVersion = IS_LIGHTNING ?
+ program_->dev().hwInfo()->gfxipVersionLC_ :
+ program_->dev().hwInfo()->gfxipVersion_;
+ uint32_t majorSrc = gfxipVersion / 10;
+ uint32_t minorSrc = gfxipVersion % 10;
uint32_t majorTrg = isa.handle / 10;
uint32_t minorTrg = isa.handle % 10;
@@ -969,7 +972,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
inputs.push_back(ocml_bc);
// open the control functions
- auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersion_);
+ auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersionLC_);
if (!isa_version.first) {
buildLog_ += "Error: Linking for this device is not supported\n";
return false;
@@ -992,7 +995,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
- (dev().hwInfo()->gfxipVersion_ < 900 &&
+ (dev().hwInfo()->gfxipVersionLC_ < 900 &&
AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC, daz_opt.first, daz_opt.second);
@@ -1057,7 +1060,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
// Set the machine target
std::ostringstream mCPU;
- mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersion_;
+ mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersionLC_;
codegenOptions.append(mCPU.str());
// Set xnack option if needed
diff --git a/rocclr/runtime/device/pal/palprogram.hpp b/rocclr/runtime/device/pal/palprogram.hpp
index da50786dbc..7bd70bbce1 100644
--- a/rocclr/runtime/device/pal/palprogram.hpp
+++ b/rocclr/runtime/device/pal/palprogram.hpp
@@ -234,14 +234,14 @@ class LightningProgram : public HSAILProgram {
: HSAILProgram(device) {
isLC_ = true;
xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
- machineTarget_ = dev().hwInfo()->machineTarget_;
+ machineTarget_ = dev().hwInfo()->machineTargetLC_;
}
LightningProgram(Device& device)
: HSAILProgram(device) {
isLC_ = true;
xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
- machineTarget_ = dev().hwInfo()->machineTarget_;
+ machineTarget_ = dev().hwInfo()->machineTargetLC_;
}
virtual ~LightningProgram() {}