device/rocm: split gfxVersion to major/minor/stepping
Change-Id: I1e437eaee30794147713d9516229211670f01d90
[ROCm/clr commit: b4f1239f34]
This commit is contained in:
zatwierdzone przez
Jason Tang
rodzic
08036e1ffe
commit
e49bddae07
@@ -430,7 +430,7 @@ PerfCounter::PerfCounter(const Device& device, //!< A ROC device object
|
||||
info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id)
|
||||
|
||||
// these block indices are valid for the SI (Gfx8) & Gfx9 devices
|
||||
switch (roc_device_.deviceInfo().gfxipVersion_ / 100) {
|
||||
switch (roc_device_.deviceInfo().gfxipMajor_) {
|
||||
case (8):
|
||||
gfxVersion_ = ROC_GFX8;
|
||||
if (blockIndex < viBlockIdOrcaToRocr.size()) {
|
||||
|
||||
@@ -44,7 +44,9 @@ struct AMDDeviceInfo {
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemSizePerCU_; //!< Local memory size per CU
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
uint gfxipVersion_; //!< The core engine GFXIP version
|
||||
uint gfxipMajor_; //!< The core engine GFXIP Major version
|
||||
uint gfxipMinor_; //!< The core engine GFXIP Minor version
|
||||
uint gfxipStepping_; //!< The core engine GFXIP Stepping version
|
||||
uint pciDeviceId_; //!< PCIe device id
|
||||
};
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) {
|
||||
|
||||
settings_ = new Settings();
|
||||
roc::Settings* hsaSettings = static_cast<roc::Settings*>(settings_);
|
||||
if ((hsaSettings == nullptr) || !hsaSettings->create(false, deviceInfo_.gfxipVersion_)) {
|
||||
if ((hsaSettings == nullptr) || !hsaSettings->create(false, deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_)) {
|
||||
LogError("Error creating settings for nullptr HSA device");
|
||||
return false;
|
||||
}
|
||||
@@ -526,7 +526,6 @@ bool Device::init() {
|
||||
|
||||
std::string str(isaName);
|
||||
|
||||
unsigned gfxipVersionNum = (unsigned)-1;
|
||||
if (str.find("amdgcn-") == 0) {
|
||||
// New way.
|
||||
std::vector<std::string> tokens;
|
||||
@@ -548,38 +547,35 @@ bool Device::init() {
|
||||
}
|
||||
|
||||
std::string gfxipVersionStr = tokens[4].substr(tokens[4].find("gfx") + 3);
|
||||
gfxipVersionNum = std::atoi(gfxipVersionStr.c_str());
|
||||
|
||||
std::string steppingStr(&gfxipVersionStr.back());
|
||||
roc_device->deviceInfo_.gfxipStepping_ = std::stoi(steppingStr, nullptr, 16);
|
||||
gfxipVersionStr.pop_back();
|
||||
|
||||
std::string minorStr(&gfxipVersionStr.back());
|
||||
roc_device->deviceInfo_.gfxipMinor_ = std::stoi(minorStr);
|
||||
gfxipVersionStr.pop_back();
|
||||
|
||||
roc_device->deviceInfo_.gfxipMajor_ = std::stoi(gfxipVersionStr);
|
||||
|
||||
} else {
|
||||
// FIXME(kzhuravl): Old way. Remove.
|
||||
std::vector<std::string> tokens;
|
||||
size_t end, pos = 0;
|
||||
do {
|
||||
end = str.find_first_of(':', pos);
|
||||
tokens.push_back(str.substr(pos, end - pos));
|
||||
pos = end + 1;
|
||||
} while (end != std::string::npos);
|
||||
|
||||
if (tokens.size() != 5 || tokens[0] != "AMD" || tokens[1] != "AMDGPU") {
|
||||
LogError("Not an AMD:AMDGPU ISA name");
|
||||
continue;
|
||||
}
|
||||
|
||||
uint major = atoi(tokens[2].c_str());
|
||||
uint minor = atoi(tokens[3].c_str());
|
||||
uint stepping = atoi(tokens[4].c_str());
|
||||
if (minor >= 10 && stepping >= 10) {
|
||||
LogError("Invalid ISA string");
|
||||
continue;
|
||||
}
|
||||
gfxipVersionNum = major * 100 + minor * 10 + stepping;
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
assert(gfxipVersionNum != (unsigned)-1);
|
||||
|
||||
roc_device->deviceInfo_.gfxipVersion_ = gfxipVersionNum;
|
||||
|
||||
// TODO: set sramEccEnabled flag based on target string suffix
|
||||
// when ROCr resumes reporting sram-ecc support
|
||||
bool sramEccEnabled = (gfxipVersionNum == 906 || gfxipVersionNum == 908) ? true : false;
|
||||
bool sramEccEnabled = false;
|
||||
if ((roc_device->deviceInfo_.gfxipMajor_ == 9) && (roc_device->deviceInfo_.gfxipMinor_ == 0)) {
|
||||
switch (roc_device->deviceInfo_.gfxipStepping_) {
|
||||
case 6:
|
||||
case 8:
|
||||
sramEccEnabled = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!roc_device->create(sramEccEnabled)) {
|
||||
LogError("Error creating new instance of Device.");
|
||||
continue;
|
||||
@@ -646,7 +642,7 @@ bool Device::create(bool sramEccEnabled) {
|
||||
roc::Settings* hsaSettings = static_cast<roc::Settings*>(settings_);
|
||||
if ((hsaSettings == nullptr) ||
|
||||
!hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL),
|
||||
deviceInfo_.gfxipVersion_, coop_groups)) {
|
||||
deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_, coop_groups)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1026,12 +1022,10 @@ bool Device::populateOCLDeviceConstants() {
|
||||
|
||||
roc::Settings* hsa_settings = static_cast<roc::Settings*>(settings_);
|
||||
|
||||
int gfxipMajor = deviceInfo_.gfxipVersion_ / 100;
|
||||
int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10;
|
||||
int gfxipStepping = deviceInfo_.gfxipVersion_ % 10;
|
||||
|
||||
std::ostringstream oss;
|
||||
oss << "gfx" << gfxipMajor << gfxipMinor << gfxipStepping;
|
||||
|
||||
oss << "gfx" << deviceInfo_.gfxipMajor_ << deviceInfo_.gfxipMinor_ << std::hex << deviceInfo_.gfxipStepping_;
|
||||
|
||||
if (settings().useLightning_ && hsa_settings->enableXNACK_) {
|
||||
oss << "+xnack";
|
||||
}
|
||||
@@ -1088,7 +1082,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
|
||||
//TODO: add the assert statement for Raven
|
||||
if (deviceInfo_.gfxipVersion_ != 902) {
|
||||
if ((deviceInfo_.gfxipMajor_*100 + deviceInfo_.gfxipMinor_*10 + deviceInfo_.gfxipStepping_) != 902) {
|
||||
assert(info_.maxEngineClockFrequency_ > 0);
|
||||
}
|
||||
|
||||
@@ -1270,7 +1264,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
strcpy(info_.driverVersion_, ss.str().c_str());
|
||||
|
||||
// Enable OpenCL 2.0 for Vega10+
|
||||
if (deviceInfo_.gfxipVersion_ >= 900) {
|
||||
if (deviceInfo_.gfxipMajor_ >= 9) {
|
||||
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " ";
|
||||
} else {
|
||||
info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " ";
|
||||
@@ -1397,15 +1391,14 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
if (amd::IS_HIP) {
|
||||
// Report atomics capability based on GFX IP, control on Hawaii
|
||||
if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipVersion_ >= 800) {
|
||||
if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipMajor_ >= 8) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
}
|
||||
else if (!settings().useLightning_) {
|
||||
// Report atomics capability based on GFX IP, control on Hawaii
|
||||
// and Vega10.
|
||||
if (info_.hostUnifiedMemory_ ||
|
||||
((deviceInfo_.gfxipVersion_ >= 800) && (deviceInfo_.gfxipVersion_ < 900))) {
|
||||
if (info_.hostUnifiedMemory_ || (deviceInfo_.gfxipMajor_ == 8)) {
|
||||
info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS;
|
||||
}
|
||||
}
|
||||
@@ -1445,7 +1438,7 @@ bool Device::populateOCLDeviceConstants() {
|
||||
info_.globalMemChannelBankWidth_ = deviceInfo_.memChannelBankWidth_;
|
||||
info_.localMemSizePerCU_ = deviceInfo_.localMemSizePerCU_;
|
||||
info_.localMemBanks_ = deviceInfo_.localMemBanks_;
|
||||
info_.gfxipVersion_ = deviceInfo_.gfxipVersion_;
|
||||
info_.gfxipVersion_ = deviceInfo_.gfxipMajor_ * 100 + deviceInfo_.gfxipMinor_ * 10 + deviceInfo_.gfxipStepping_;
|
||||
info_.numAsyncQueues_ = kMaxAsyncQueues;
|
||||
info_.numRTQueues_ = info_.numAsyncQueues_;
|
||||
info_.numRTCUs_ = info_.maxComputeUnits_;
|
||||
|
||||
@@ -1043,7 +1043,7 @@ bool Image::createInteropImage() {
|
||||
}
|
||||
|
||||
if (obj->getGLTarget() == GL_TEXTURE_CUBE_MAP) {
|
||||
desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipVersion_ / 100);
|
||||
desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipMajor_);
|
||||
}
|
||||
|
||||
hsa_status_t err =
|
||||
|
||||
@@ -88,7 +88,7 @@ Settings::Settings() {
|
||||
imageBufferWar_ = false;
|
||||
}
|
||||
|
||||
bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) {
|
||||
bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups) {
|
||||
customHostAllocator_ = false;
|
||||
|
||||
if (fullProfile) {
|
||||
@@ -131,7 +131,7 @@ bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) {
|
||||
if (useLightning_) {
|
||||
enableExtension(ClAmdAssemblyProgram);
|
||||
// enable subnormals for gfx900 and later
|
||||
if (gfxipVersion >= 900) {
|
||||
if (gfxipMajor >= 9) {
|
||||
singleFpDenorm_ = true;
|
||||
enableCoopGroups_ = coop_groups;
|
||||
enableCoopMultiDeviceGroups_ = coop_groups;
|
||||
@@ -141,10 +141,10 @@ bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) {
|
||||
enableExtension(ClAmdFp64);
|
||||
}
|
||||
|
||||
if (gfxipVersion >= 1000) {
|
||||
if (gfxipMajor >= 10) {
|
||||
enableWave32Mode_ = true;
|
||||
enableWgpMode_ = GPU_ENABLE_WGP_MODE;
|
||||
if (gfxipVersion >= 1010) {
|
||||
if (gfxipMinor >= 10) {
|
||||
// GFX10.1 HW doesn't support custom pitch. Enable double copy workaround
|
||||
// TODO: This should be updated when ROCr support custom pitch
|
||||
imageBufferWar_ = GPU_IMAGE_BUFFER_WAR;
|
||||
|
||||
@@ -79,7 +79,7 @@ class Settings : public device::Settings {
|
||||
Settings();
|
||||
|
||||
//! Creates settings
|
||||
bool create(bool fullProfile, int gfxipVersion, bool coop_groups = false);
|
||||
bool create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups = false);
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
|
||||
Reference in New Issue
Block a user