device/rocm: split gfxVersion to major/minor/stepping

Change-Id: I1e437eaee30794147713d9516229211670f01d90


[ROCm/clr commit: b4f1239f34]
This commit is contained in:
Jason Tang
2020-05-04 16:34:19 -04:00
zatwierdzone przez Jason Tang
rodzic 08036e1ffe
commit e49bddae07
6 zmienionych plików z 44 dodań i 49 usunięć
@@ -430,7 +430,7 @@ PerfCounter::PerfCounter(const Device& device, //!< A ROC device object
info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id)
// these block indices are valid for the SI (Gfx8) & Gfx9 devices
switch (roc_device_.deviceInfo().gfxipVersion_ / 100) {
switch (roc_device_.deviceInfo().gfxipMajor_) {
case (8):
gfxVersion_ = ROC_GFX8;
if (blockIndex < viBlockIdOrcaToRocr.size()) {
@@ -44,7 +44,9 @@ struct AMDDeviceInfo {
uint memChannelBankWidth_; //!< Memory channel bank width
uint localMemSizePerCU_; //!< Local memory size per CU
uint localMemBanks_; //!< Number of banks of local memory
uint gfxipVersion_; //!< The core engine GFXIP version
uint gfxipMajor_; //!< The core engine GFXIP Major version
uint gfxipMinor_; //!< The core engine GFXIP Minor version
uint gfxipStepping_; //!< The core engine GFXIP Stepping version
uint pciDeviceId_; //!< PCIe device id
};
@@ -133,7 +133,7 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
settings_ = new Settings();
roc::Settings* hsaSettings = static_cast<roc::Settings*>(settings_);
if ((hsaSettings == nullptr) || !hsaSettings->create(false, deviceInfo_.gfxipVersion_)) {
if ((hsaSettings == nullptr) || !hsaSettings->create(false, deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_)) {
LogError("Error creating settings for nullptr HSA device");
return false;
}
@@ -526,7 +526,6 @@ bool Device::init() {
std::string str(isaName);
unsigned gfxipVersionNum = (unsigned)-1;
if (str.find("amdgcn-") == 0) {
// New way.
std::vector<std::string> tokens;
@@ -548,38 +547,35 @@ bool Device::init() {
}
std::string gfxipVersionStr = tokens[4].substr(tokens[4].find("gfx") + 3);
gfxipVersionNum = std::atoi(gfxipVersionStr.c_str());
std::string steppingStr(&gfxipVersionStr.back());
roc_device->deviceInfo_.gfxipStepping_ = std::stoi(steppingStr, nullptr, 16);
gfxipVersionStr.pop_back();
std::string minorStr(&gfxipVersionStr.back());
roc_device->deviceInfo_.gfxipMinor_ = std::stoi(minorStr);
gfxipVersionStr.pop_back();
roc_device->deviceInfo_.gfxipMajor_ = std::stoi(gfxipVersionStr);
} else {
// FIXME(kzhuravl): Old way. Remove.
std::vector<std::string> tokens;
size_t end, pos = 0;
do {
end = str.find_first_of(':', pos);
tokens.push_back(str.substr(pos, end - pos));
pos = end + 1;
} while (end != std::string::npos);
if (tokens.size() != 5 || tokens[0] != "AMD" || tokens[1] != "AMDGPU") {
LogError("Not an AMD:AMDGPU ISA name");
continue;
}
uint major = atoi(tokens[2].c_str());
uint minor = atoi(tokens[3].c_str());
uint stepping = atoi(tokens[4].c_str());
if (minor >= 10 && stepping >= 10) {
LogError("Invalid ISA string");
continue;
}
gfxipVersionNum = major * 100 + minor * 10 + stepping;
ShouldNotReachHere();
}
assert(gfxipVersionNum != (unsigned)-1);
roc_device->deviceInfo_.gfxipVersion_ = gfxipVersionNum;
// TODO: set sramEccEnabled flag based on target string suffix
// when ROCr resumes reporting sram-ecc support
bool sramEccEnabled = (gfxipVersionNum == 906 || gfxipVersionNum == 908) ? true : false;
bool sramEccEnabled = false;
if ((roc_device->deviceInfo_.gfxipMajor_ == 9) && (roc_device->deviceInfo_.gfxipMinor_ == 0)) {
switch (roc_device->deviceInfo_.gfxipStepping_) {
case 6:
case 8:
sramEccEnabled = true;
break;
default:
break;
}
}
if (!roc_device->create(sramEccEnabled)) {
LogError("Error creating new instance of Device.");
continue;
@@ -646,7 +642,7 @@ bool Device::create(bool sramEccEnabled) {
roc::Settings* hsaSettings = static_cast<roc::Settings*>(settings_);
if ((hsaSettings == nullptr) ||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL),
deviceInfo_.gfxipVersion_, coop_groups)) {
deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_, coop_groups)) {
return false;
}
@@ -1026,12 +1022,10 @@ bool Device::populateOCLDeviceConstants() {
roc::Settings* hsa_settings = static_cast<roc::Settings*>(settings_);
int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
std::ostringstream oss;
oss << "gfx" << gfxipMajor << gfxipMinor << gfxipStepping;
oss << "gfx" << deviceInfo_.gfxipMajor_ << deviceInfo_.gfxipMinor_ << std::hex << deviceInfo_.gfxipStepping_;
if (settings().useLightning_ && hsa_settings->enableXNACK_) {
oss << "+xnack";
}
@@ -1088,7 +1082,7 @@ bool Device::populateOCLDeviceConstants() {
}
//TODO: add the assert statement for Raven
if (deviceInfo_.gfxipVersion_ != 902) {
if ((deviceInfo_.gfxipMajor_*100 + deviceInfo_.gfxipMinor_*10 + deviceInfo_.gfxipStepping_) != 902) {
assert(info_.maxEngineClockFrequency_ > 0);
}
@@ -1270,7 +1264,7 @@ bool Device::populateOCLDeviceConstants() {
strcpy(info_.driverVersion_, ss.str().c_str());
// Enable OpenCL 2.0 for Vega10+
if (deviceInfo_.gfxipVersion_ >= 900) {
if (deviceInfo_.gfxipMajor_ >= 9) {
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " ";
} else {
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
@@ -1397,15 +1391,14 @@ bool Device::populateOCLDeviceConstants() {
}
if (amd::IS_HIP) {
// Report atomics capability based on GFX IP, control on Hawaii
if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipVersion_ >= 800) {
if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipMajor_ >= 8) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
}
else if (!settings().useLightning_) {
// Report atomics capability based on GFX IP, control on Hawaii
// and Vega10.
if (info_.hostUnifiedMemory_ ||
((deviceInfo_.gfxipVersion_ >= 800) && (deviceInfo_.gfxipVersion_ < 900))) {
if (info_.hostUnifiedMemory_ || (deviceInfo_.gfxipMajor_ == 8)) {
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
}
}
@@ -1445,7 +1438,7 @@ bool Device::populateOCLDeviceConstants() {
info_.globalMemChannelBankWidth_ = deviceInfo_.memChannelBankWidth_;
info_.localMemSizePerCU_ = deviceInfo_.localMemSizePerCU_;
info_.localMemBanks_ = deviceInfo_.localMemBanks_;
info_.gfxipVersion_ = deviceInfo_.gfxipVersion_;
info_.gfxipVersion_ = deviceInfo_.gfxipMajor_ * 100 + deviceInfo_.gfxipMinor_ * 10 + deviceInfo_.gfxipStepping_;
info_.numAsyncQueues_ = kMaxAsyncQueues;
info_.numRTQueues_ = info_.numAsyncQueues_;
info_.numRTCUs_ = info_.maxComputeUnits_;
@@ -1043,7 +1043,7 @@ bool Image::createInteropImage() {
}
if (obj->getGLTarget() == GL_TEXTURE_CUBE_MAP) {
desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipVersion_ / 100);
desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipMajor_);
}
hsa_status_t err =
@@ -88,7 +88,7 @@ Settings::Settings() {
imageBufferWar_ = false;
}
bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) {
bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups) {
customHostAllocator_ = false;
if (fullProfile) {
@@ -131,7 +131,7 @@ bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) {
if (useLightning_) {
enableExtension(ClAmdAssemblyProgram);
// enable subnormals for gfx900 and later
if (gfxipVersion >= 900) {
if (gfxipMajor >= 9) {
singleFpDenorm_ = true;
enableCoopGroups_ = coop_groups;
enableCoopMultiDeviceGroups_ = coop_groups;
@@ -141,10 +141,10 @@ bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) {
enableExtension(ClAmdFp64);
}
if (gfxipVersion >= 1000) {
if (gfxipMajor >= 10) {
enableWave32Mode_ = true;
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
if (gfxipVersion >= 1010) {
if (gfxipMinor >= 10) {
// GFX10.1 HW doesn't support custom pitch. Enable double copy workaround
// TODO: This should be updated when ROCr support custom pitch
imageBufferWar_ = GPU_IMAGE_BUFFER_WAR;
@@ -79,7 +79,7 @@ class Settings : public device::Settings {
Settings();
//! Creates settings
bool create(bool fullProfile, int gfxipVersion, bool coop_groups = false);
bool create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups = false);
private:
//! Disable copy constructor