P4 to Git Change 1611180 by gandryey@gera-ocl-lc on 2018/09/26 16:54:02
SWDEV-79445 - OCL generic changes and code clean-up - Fix Ellesmere compilation with LC path. Switch to multiple device info fields to support LC and HSAIL at the same time Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#40 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#110 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#77 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#33 edit
Этот коммит содержится в:
@@ -115,69 +115,74 @@ const static uint DeviceQueueMaskSize = 32;
|
||||
struct AMDDeviceInfo {
|
||||
const char* targetName_; //!< Target name
|
||||
const char* machineTarget_; //!< Machine target
|
||||
const char* machineTargetLC_;//!< Machine target for LC
|
||||
uint simdPerCU_; //!< Number of SIMDs per CU
|
||||
uint simdWidth_; //!< Number of workitems processed per SIMD
|
||||
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemSizePerCU_; //!< Local memory size per CU
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
uint gfxipVersionLC_; //!< The core engine GFXIP version for LC
|
||||
uint gfxipVersion_; //!< The core engine GFXIP version
|
||||
bool xnackEnabled_; //!< Enable XNACK feature
|
||||
};
|
||||
|
||||
static const AMDDeviceInfo DeviceInfo[] = {
|
||||
/* Unknown */ {"", "unknown", 4, 16, 1, 256, 64 * Ki, 32, 0, false},
|
||||
/* Tahiti */ {"", "tahiti", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Pitcairn */ {"", "pitcairn", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Capeverde */ {"", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
|
||||
/* Oland */ {"", "oland", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Hainan */ {"", "hainan", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Unknown */ {"", "unknown", "", 4, 16, 1, 256, 64 * Ki, 32, 0, false},
|
||||
/* Tahiti */ {"", "tahiti", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Pitcairn */ {"", "pitcairn", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Capeverde */ {"", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
|
||||
/* Oland */ {"", "oland", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
/* Hainan */ {"", "hainan", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
|
||||
|
||||
/* Bonaire */ {"Bonaire", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
|
||||
/* Hawaii */ {"Hawaii", "hawaii", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Hawaii */ {"", "grenada", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Hawaii */ {"", "maui", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Bonaire */ {"Bonaire", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
|
||||
/* Hawaii */ {"Hawaii", "hawaii", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Hawaii */ {"", "grenada", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Hawaii */ {"", "maui", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
|
||||
/* Kalindi */ {"Kalindi", "kalindi", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
|
||||
/* Godavari */ {"Mullins", "mullins", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
|
||||
/* Spectre */ {"Spectre", "spectre", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Spooky */ {"Spooky", "spooky", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Kalindi */ {"Kalindi", "kalindi", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
|
||||
/* Godavari */ {"Mullins", "mullins", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
|
||||
/* Spectre */ {"Spectre", "spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
/* Spooky */ {"Spooky", "spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
|
||||
|
||||
/* Carrizo */ {"Carrizo", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
|
||||
/* Bristol */ {"Bristol Ridge", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
|
||||
/* Stoney */ {"Stoney", "stoney", 4, 16, 1, 256, 64 * Ki, 32, 810, false},
|
||||
/* Carrizo */ {"Carrizo", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
|
||||
/* Bristol */ {"Bristol Ridge", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
|
||||
/* Stoney */ {"Stoney", "stoney", "", 4, 16, 1, 256, 64 * Ki, 32, 810, false},
|
||||
|
||||
/* Iceland */ {"Iceland", "iceland", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false},
|
||||
/* Tonga */ {"Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false},
|
||||
/* Fiji */ {"Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
|
||||
/* Ellesmere */ {"Ellesmere", "ellesmere", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
|
||||
/* Baffin */ {"Baffin", "baffin", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
|
||||
/* Lexa */ {"gfx804", "gfx804", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(804, 804), false},
|
||||
/* Iceland */ {"Iceland", "iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
|
||||
/* Tonga */ {"Tonga", "tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
|
||||
/* Fiji */ {"Fiji", "fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
|
||||
/* Ellesmere */ {"Ellesmere", "ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
|
||||
/* Baffin */ {"Baffin", "baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
|
||||
/* Lexa */ {"gfx804", "gfx804", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
|
||||
};
|
||||
|
||||
// Ordering as per AsicRevision# in //depot/stg/pal/inc/core/palDevice.h and
|
||||
// http://confluence.amd.com/pages/viewpage.action?spaceKey=ASLC&title=AMDGPU+Target+Names
|
||||
static const AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = {
|
||||
/* Vega10 */{"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, false},
|
||||
/* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), LIGHTNING_SWITCH("gfx900","gfx901"),
|
||||
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(900, 901), true},
|
||||
/* Vega12 */{"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, false},
|
||||
/* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), LIGHTNING_SWITCH("gfx904","gfx905"),
|
||||
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(904, 905), true},
|
||||
/* Vega20 */{"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, false},
|
||||
/* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), LIGHTNING_SWITCH("gfx906","gfx907"),
|
||||
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(906, 907), true},
|
||||
/* Raven */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false},
|
||||
/* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"),
|
||||
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true},
|
||||
/* Raven2 */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false},
|
||||
/* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"),
|
||||
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true},
|
||||
/* Navi10 */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, false},
|
||||
/* Navi10 XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, true},
|
||||
/* Navi10Lite */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, false},
|
||||
/* Navi10Lite XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, true},
|
||||
|
||||
/* Vega10 */{"gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 900, false},
|
||||
/* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), "gfx901", "gfx900",
|
||||
4, 16, 1, 256, 64 * Ki, 32, 900, 901, true},
|
||||
/* Vega12 */{"gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 904, false},
|
||||
/* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), "gfx905", "gfx904",
|
||||
4, 16, 1, 256, 64 * Ki, 32, 904, 905, true},
|
||||
/* Vega20 */{"gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 906, false},
|
||||
/* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), "gfx907", "gfx906",
|
||||
4, 16, 1, 256, 64 * Ki, 32, 906, 907, true},
|
||||
/* Raven */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
|
||||
/* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
|
||||
4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
|
||||
/* Raven2 */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
|
||||
/* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
|
||||
4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
|
||||
/* Navi10 */{ "", "",
|
||||
"", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, false},
|
||||
/* Navi10 XNACK */{ "", "",
|
||||
"", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, true},
|
||||
/* Navi10Lite */{ "", "",
|
||||
"", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, false},
|
||||
/* Navi10Lite XNACK */{ "", "",
|
||||
"", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, true},
|
||||
};
|
||||
|
||||
// Supported OpenCL versions
|
||||
|
||||
@@ -117,7 +117,8 @@ bool NullDevice::init() {
|
||||
id < sizeof(Gfx9PlusSubDeviceInfo)/sizeof(AMDDeviceInfo); ++id) {
|
||||
bool foundActive = false;
|
||||
bool foundDuplicate = false;
|
||||
uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
|
||||
uint gfxipVersion = IS_LIGHTNING ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ :
|
||||
pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
|
||||
|
||||
if (pal::Gfx9PlusSubDeviceInfo[id].targetName_[0] == '\0') {
|
||||
continue;
|
||||
@@ -127,8 +128,10 @@ bool NullDevice::init() {
|
||||
for (uint i = 0; i < devices.size(); ++i) {
|
||||
driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
|
||||
if (driverVersion.find("PAL") != std::string::npos) {
|
||||
if (static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_ ==
|
||||
gfxipVersion) {
|
||||
uint gfxIpCurrent = IS_LIGHTNING ?
|
||||
static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersionLC_ :
|
||||
static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_;
|
||||
if (gfxIpCurrent == gfxipVersion) {
|
||||
foundActive = true;
|
||||
break;
|
||||
}
|
||||
@@ -161,8 +164,8 @@ bool NullDevice::init() {
|
||||
}
|
||||
|
||||
Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
|
||||
uint ipLevelMajor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 100);
|
||||
uint ipLevelMinor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 10 % 10);
|
||||
uint ipLevelMajor = round(gfxipVersion / 100);
|
||||
uint ipLevelMinor = round(gfxipVersion / 10 % 10);
|
||||
switch (ipLevelMajor) {
|
||||
case 9:
|
||||
ipLevel = Pal::GfxIpLevel::GfxIp9;
|
||||
@@ -181,7 +184,7 @@ bool NullDevice::init() {
|
||||
Pal::AsicRevision revision = Pal::AsicRevision::Unknown;
|
||||
uint xNACKSupported = pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_ ? 1 : 0;
|
||||
|
||||
switch (pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_) {
|
||||
switch (gfxipVersion) {
|
||||
case 901:
|
||||
case 900:
|
||||
revision = Pal::AsicRevision::Vega10;
|
||||
@@ -271,9 +274,9 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
// create compilation object with cache support
|
||||
int gfxipMajor = hwInfo_->gfxipVersion_ / 100;
|
||||
int gfxipMinor = hwInfo_->gfxipVersion_ / 10 % 10;
|
||||
int gfxipStepping = hwInfo_->gfxipVersion_ % 10;
|
||||
int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
|
||||
int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
|
||||
int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
|
||||
|
||||
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
|
||||
// with dash as delimiter to be compatible with Windows directory name
|
||||
@@ -594,7 +597,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
|
||||
info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_;
|
||||
info_.localMemBanks_ = hwInfo()->localMemBanks_;
|
||||
info_.gfxipVersion_ = hwInfo()->gfxipVersion_;
|
||||
info_.gfxipVersion_ = IS_LIGHTNING ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
|
||||
|
||||
info_.timeStampFrequency_ = 1000000;
|
||||
info_.numAsyncQueues_ = numComputeRings;
|
||||
@@ -932,9 +935,9 @@ bool Device::create(Pal::IDevice* device) {
|
||||
|
||||
#if defined(WITH_LIGHTNING_COMPILER)
|
||||
// create compilation object with cache support
|
||||
int gfxipMajor = hwInfo()->gfxipVersion_ / 100;
|
||||
int gfxipMinor = hwInfo()->gfxipVersion_ / 10 % 10;
|
||||
int gfxipStepping = hwInfo()->gfxipVersion_ % 10;
|
||||
int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
|
||||
int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
|
||||
int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
|
||||
|
||||
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
|
||||
// with dash as delimiter to be compatible with Windows directory name
|
||||
|
||||
@@ -592,8 +592,11 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
|
||||
}
|
||||
|
||||
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
|
||||
uint32_t majorSrc = program_->dev().hwInfo()->gfxipVersion_ / 10;
|
||||
uint32_t minorSrc = program_->dev().hwInfo()->gfxipVersion_ % 10;
|
||||
uint32_t gfxipVersion = IS_LIGHTNING ?
|
||||
program_->dev().hwInfo()->gfxipVersionLC_ :
|
||||
program_->dev().hwInfo()->gfxipVersion_;
|
||||
uint32_t majorSrc = gfxipVersion / 10;
|
||||
uint32_t minorSrc = gfxipVersion % 10;
|
||||
|
||||
uint32_t majorTrg = isa.handle / 10;
|
||||
uint32_t minorTrg = isa.handle % 10;
|
||||
@@ -969,7 +972,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
|
||||
inputs.push_back(ocml_bc);
|
||||
|
||||
// open the control functions
|
||||
auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersion_);
|
||||
auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersionLC_);
|
||||
if (!isa_version.first) {
|
||||
buildLog_ += "Error: Linking for this device is not supported\n";
|
||||
return false;
|
||||
@@ -992,7 +995,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
|
||||
|
||||
auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
|
||||
AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
|
||||
(dev().hwInfo()->gfxipVersion_ < 900 &&
|
||||
(dev().hwInfo()->gfxipVersionLC_ < 900 &&
|
||||
AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
|
||||
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC, daz_opt.first, daz_opt.second);
|
||||
|
||||
@@ -1057,7 +1060,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
|
||||
|
||||
// Set the machine target
|
||||
std::ostringstream mCPU;
|
||||
mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersion_;
|
||||
mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersionLC_;
|
||||
codegenOptions.append(mCPU.str());
|
||||
|
||||
// Set xnack option if needed
|
||||
|
||||
@@ -234,14 +234,14 @@ class LightningProgram : public HSAILProgram {
|
||||
: HSAILProgram(device) {
|
||||
isLC_ = true;
|
||||
xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
|
||||
machineTarget_ = dev().hwInfo()->machineTarget_;
|
||||
machineTarget_ = dev().hwInfo()->machineTargetLC_;
|
||||
}
|
||||
|
||||
LightningProgram(Device& device)
|
||||
: HSAILProgram(device) {
|
||||
isLC_ = true;
|
||||
xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
|
||||
machineTarget_ = dev().hwInfo()->machineTarget_;
|
||||
machineTarget_ = dev().hwInfo()->machineTargetLC_;
|
||||
}
|
||||
virtual ~LightningProgram() {}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user