P4 to Git Change 1611180 by gandryey@gera-ocl-lc on 2018/09/26 16:54:02

SWDEV-79445 - OCL generic changes and code clean-up
	- Fix Ellesmere compilation with LC path. Switch to multiple device info fields to support LC and HSAIL at the same time

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#110 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#33 edit
Этот коммит содержится в:
foreman
2018-09-26 17:10:57 -04:00
родитель e17b448874
Коммит 42e7f37f69
4 изменённых файлов: 74 добавлений и 63 удалений
+48 -43
Просмотреть файл
@@ -115,69 +115,74 @@ const static uint DeviceQueueMaskSize = 32;
struct AMDDeviceInfo {
const char* targetName_; //!< Target name
const char* machineTarget_; //!< Machine target
const char* machineTargetLC_;//!< Machine target for LC
uint simdPerCU_; //!< Number of SIMDs per CU
uint simdWidth_; //!< Number of workitems processed per SIMD
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
uint memChannelBankWidth_; //!< Memory channel bank width
uint localMemSizePerCU_; //!< Local memory size per CU
uint localMemBanks_; //!< Number of banks of local memory
uint gfxipVersionLC_; //!< The core engine GFXIP version for LC
uint gfxipVersion_; //!< The core engine GFXIP version
bool xnackEnabled_; //!< Enable XNACK feature
};
static const AMDDeviceInfo DeviceInfo[] = {
/* Unknown */ {"", "unknown", 4, 16, 1, 256, 64 * Ki, 32, 0, false},
/* Tahiti */ {"", "tahiti", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Pitcairn */ {"", "pitcairn", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Capeverde */ {"", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
/* Oland */ {"", "oland", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Hainan */ {"", "hainan", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Unknown */ {"", "unknown", "", 4, 16, 1, 256, 64 * Ki, 32, 0, false},
/* Tahiti */ {"", "tahiti", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Pitcairn */ {"", "pitcairn", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Capeverde */ {"", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
/* Oland */ {"", "oland", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Hainan */ {"", "hainan", "", 4, 16, 1, 256, 64 * Ki, 32, 600, false},
/* Bonaire */ {"Bonaire", "bonaire", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
/* Hawaii */ {"Hawaii", "hawaii", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Hawaii */ {"", "grenada", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Hawaii */ {"", "maui", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Bonaire */ {"Bonaire", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, false},
/* Hawaii */ {"Hawaii", "hawaii", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Hawaii */ {"", "grenada", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Hawaii */ {"", "maui", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Kalindi */ {"Kalindi", "kalindi", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
/* Godavari */ {"Mullins", "mullins", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
/* Spectre */ {"Spectre", "spectre", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Spooky */ {"Spooky", "spooky", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Kalindi */ {"Kalindi", "kalindi", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
/* Godavari */ {"Mullins", "mullins", "", 4, 16, 1, 256, 64 * Ki, 32, 702, false},
/* Spectre */ {"Spectre", "spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Spooky */ {"Spooky", "spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 701, false},
/* Carrizo */ {"Carrizo", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
/* Bristol */ {"Bristol Ridge", "carrizo", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
/* Stoney */ {"Stoney", "stoney", 4, 16, 1, 256, 64 * Ki, 32, 810, false},
/* Carrizo */ {"Carrizo", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
/* Bristol */ {"Bristol Ridge", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, false},
/* Stoney */ {"Stoney", "stoney", "", 4, 16, 1, 256, 64 * Ki, 32, 810, false},
/* Iceland */ {"Iceland", "iceland", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false},
/* Tonga */ {"Tonga", "tonga", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(802, 800), false},
/* Fiji */ {"Fiji", "fiji", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
/* Ellesmere */ {"Ellesmere", "ellesmere", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
/* Baffin */ {"Baffin", "baffin", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(803, 804), false},
/* Lexa */ {"gfx804", "gfx804", 4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(804, 804), false},
/* Iceland */ {"Iceland", "iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
/* Tonga */ {"Tonga", "tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
/* Fiji */ {"Fiji", "fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
/* Ellesmere */ {"Ellesmere", "ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
/* Baffin */ {"Baffin", "baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
/* Lexa */ {"gfx804", "gfx804", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
};
// Ordering as per AsicRevision# in //depot/stg/pal/inc/core/palDevice.h and
// http://confluence.amd.com/pages/viewpage.action?spaceKey=ASLC&title=AMDGPU+Target+Names
static const AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = {
/* Vega10 */{"gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, false},
/* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), LIGHTNING_SWITCH("gfx900","gfx901"),
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(900, 901), true},
/* Vega12 */{"gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, false},
/* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), LIGHTNING_SWITCH("gfx904","gfx905"),
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(904, 905), true},
/* Vega20 */{"gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, false},
/* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), LIGHTNING_SWITCH("gfx906","gfx907"),
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(906, 907), true},
/* Raven */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false},
/* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"),
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true},
/* Raven2 */{"gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, false},
/* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), LIGHTNING_SWITCH("gfx902","gfx903"),
4, 16, 1, 256, 64 * Ki, 32, LIGHTNING_SWITCH(902, 903), true},
/* Navi10 */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, false},
/* Navi10 XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1010, true},
/* Navi10Lite */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, false},
/* Navi10Lite XNACK */{ "", "", 4, 16, 1, 256, 64 * Ki, 32, 1000, true},
/* Vega10 */{"gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 900, false},
/* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), "gfx901", "gfx900",
4, 16, 1, 256, 64 * Ki, 32, 900, 901, true},
/* Vega12 */{"gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 904, false},
/* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), "gfx905", "gfx904",
4, 16, 1, 256, 64 * Ki, 32, 904, 905, true},
/* Vega20 */{"gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 906, false},
/* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), "gfx907", "gfx906",
4, 16, 1, 256, 64 * Ki, 32, 906, 907, true},
/* Raven */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
/* Raven XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
/* Raven2 */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
/* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
/* Navi10 */{ "", "",
"", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, false},
/* Navi10 XNACK */{ "", "",
"", 4, 16, 1, 256, 64 * Ki, 32, 1010, 1010, true},
/* Navi10Lite */{ "", "",
"", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, false},
/* Navi10Lite XNACK */{ "", "",
"", 4, 16, 1, 256, 64 * Ki, 32, 1000, 1000, true},
};
// Supported OpenCL versions
+16 -13
Просмотреть файл
@@ -117,7 +117,8 @@ bool NullDevice::init() {
id < sizeof(Gfx9PlusSubDeviceInfo)/sizeof(AMDDeviceInfo); ++id) {
bool foundActive = false;
bool foundDuplicate = false;
uint gfxipVersion = pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
uint gfxipVersion = IS_LIGHTNING ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ :
pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
if (pal::Gfx9PlusSubDeviceInfo[id].targetName_[0] == '\0') {
continue;
@@ -127,8 +128,10 @@ bool NullDevice::init() {
for (uint i = 0; i < devices.size(); ++i) {
driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
if (driverVersion.find("PAL") != std::string::npos) {
if (static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_ ==
gfxipVersion) {
uint gfxIpCurrent = IS_LIGHTNING ?
static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersionLC_ :
static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_;
if (gfxIpCurrent == gfxipVersion) {
foundActive = true;
break;
}
@@ -161,8 +164,8 @@ bool NullDevice::init() {
}
Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
uint ipLevelMajor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 100);
uint ipLevelMinor = round(pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_ / 10 % 10);
uint ipLevelMajor = round(gfxipVersion / 100);
uint ipLevelMinor = round(gfxipVersion / 10 % 10);
switch (ipLevelMajor) {
case 9:
ipLevel = Pal::GfxIpLevel::GfxIp9;
@@ -181,7 +184,7 @@ bool NullDevice::init() {
Pal::AsicRevision revision = Pal::AsicRevision::Unknown;
uint xNACKSupported = pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_ ? 1 : 0;
switch (pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_) {
switch (gfxipVersion) {
case 901:
case 900:
revision = Pal::AsicRevision::Vega10;
@@ -271,9 +274,9 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
int gfxipMajor = hwInfo_->gfxipVersion_ / 100;
int gfxipMinor = hwInfo_->gfxipVersion_ / 10 % 10;
int gfxipStepping = hwInfo_->gfxipVersion_ % 10;
int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
int gfxipStepping = hwInfo_->gfxipVersionLC_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
@@ -594,7 +597,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_;
info_.localMemBanks_ = hwInfo()->localMemBanks_;
info_.gfxipVersion_ = hwInfo()->gfxipVersion_;
info_.gfxipVersion_ = IS_LIGHTNING ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
info_.timeStampFrequency_ = 1000000;
info_.numAsyncQueues_ = numComputeRings;
@@ -932,9 +935,9 @@ bool Device::create(Pal::IDevice* device) {
#if defined(WITH_LIGHTNING_COMPILER)
// create compilation object with cache support
int gfxipMajor = hwInfo()->gfxipVersion_ / 100;
int gfxipMinor = hwInfo()->gfxipVersion_ / 10 % 10;
int gfxipStepping = hwInfo()->gfxipVersion_ % 10;
int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
int gfxipStepping = hwInfo()->gfxipVersionLC_ % 10;
// Use compute capability as target (AMD:AMDGPU:major:minor:stepping)
// with dash as delimiter to be compatible with Windows directory name
+8 -5
Просмотреть файл
@@ -592,8 +592,11 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
}
bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
uint32_t majorSrc = program_->dev().hwInfo()->gfxipVersion_ / 10;
uint32_t minorSrc = program_->dev().hwInfo()->gfxipVersion_ % 10;
uint32_t gfxipVersion = IS_LIGHTNING ?
program_->dev().hwInfo()->gfxipVersionLC_ :
program_->dev().hwInfo()->gfxipVersion_;
uint32_t majorSrc = gfxipVersion / 10;
uint32_t minorSrc = gfxipVersion % 10;
uint32_t majorTrg = isa.handle / 10;
uint32_t minorTrg = isa.handle % 10;
@@ -969,7 +972,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
inputs.push_back(ocml_bc);
// open the control functions
auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersion_);
auto isa_version = get_oclc_isa_version(dev().hwInfo()->gfxipVersionLC_);
if (!isa_version.first) {
buildLog_ += "Error: Linking for this device is not supported\n";
return false;
@@ -992,7 +995,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
auto daz_opt = get_oclc_daz_opt(options->oVariables->DenormsAreZero ||
AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 ||
(dev().hwInfo()->gfxipVersion_ < 900 &&
(dev().hwInfo()->gfxipVersionLC_ < 900 &&
AMD_GPU_FORCE_SINGLE_FP_DENORM < 0));
Data* daz_opt_bc = C->NewBufferReference(DT_LLVM_BC, daz_opt.first, daz_opt.second);
@@ -1057,7 +1060,7 @@ bool LightningProgram::linkImpl(amd::option::Options* options) {
// Set the machine target
std::ostringstream mCPU;
mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersion_;
mCPU << " -mcpu=gfx" << dev().hwInfo()->gfxipVersionLC_;
codegenOptions.append(mCPU.str());
// Set xnack option if needed
+2 -2
Просмотреть файл
@@ -234,14 +234,14 @@ class LightningProgram : public HSAILProgram {
: HSAILProgram(device) {
isLC_ = true;
xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
machineTarget_ = dev().hwInfo()->machineTarget_;
machineTarget_ = dev().hwInfo()->machineTargetLC_;
}
LightningProgram(Device& device)
: HSAILProgram(device) {
isLC_ = true;
xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
machineTarget_ = dev().hwInfo()->machineTarget_;
machineTarget_ = dev().hwInfo()->machineTargetLC_;
}
virtual ~LightningProgram() {}