diff --git a/projects/clr/rocclr/device/rocm/roccounters.cpp b/projects/clr/rocclr/device/rocm/roccounters.cpp index b0856018b3..acd57c8169 100644 --- a/projects/clr/rocclr/device/rocm/roccounters.cpp +++ b/projects/clr/rocclr/device/rocm/roccounters.cpp @@ -430,7 +430,7 @@ PerfCounter::PerfCounter(const Device& device, //!< A ROC device object info_.eventIndex_ = eventIndex; // Counter Event Selection (counter_id) // these block indices are valid for the SI (Gfx8) & Gfx9 devices - switch (roc_device_.deviceInfo().gfxipVersion_ / 100) { + switch (roc_device_.deviceInfo().gfxipMajor_) { case (8): gfxVersion_ = ROC_GFX8; if (blockIndex < viBlockIdOrcaToRocr.size()) { diff --git a/projects/clr/rocclr/device/rocm/rocdefs.hpp b/projects/clr/rocclr/device/rocm/rocdefs.hpp index ed894e743f..4cfe819868 100644 --- a/projects/clr/rocclr/device/rocm/rocdefs.hpp +++ b/projects/clr/rocclr/device/rocm/rocdefs.hpp @@ -44,7 +44,9 @@ struct AMDDeviceInfo { uint memChannelBankWidth_; //!< Memory channel bank width uint localMemSizePerCU_; //!< Local memory size per CU uint localMemBanks_; //!< Number of banks of local memory - uint gfxipVersion_; //!< The core engine GFXIP version + uint gfxipMajor_; //!< The core engine GFXIP Major version + uint gfxipMinor_; //!< The core engine GFXIP Minor version + uint gfxipStepping_; //!< The core engine GFXIP Stepping version uint pciDeviceId_; //!< PCIe device id }; diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 84600fb9a7..284023526d 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -133,7 +133,7 @@ bool NullDevice::create(const AMDDeviceInfo& deviceInfo) { settings_ = new Settings(); roc::Settings* hsaSettings = static_cast(settings_); - if ((hsaSettings == nullptr) || !hsaSettings->create(false, deviceInfo_.gfxipVersion_)) { + if ((hsaSettings == nullptr) || !hsaSettings->create(false, deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_)) { LogError("Error creating settings for nullptr HSA device"); return false; } @@ -526,7 +526,6 @@ bool Device::init() { std::string str(isaName); - unsigned gfxipVersionNum = (unsigned)-1; if (str.find("amdgcn-") == 0) { // New way. std::vector tokens; @@ -548,38 +547,35 @@ bool Device::init() { } std::string gfxipVersionStr = tokens[4].substr(tokens[4].find("gfx") + 3); - gfxipVersionNum = std::atoi(gfxipVersionStr.c_str()); + + std::string steppingStr(&gfxipVersionStr.back()); + roc_device->deviceInfo_.gfxipStepping_ = std::stoi(steppingStr, nullptr, 16); + gfxipVersionStr.pop_back(); + + std::string minorStr(&gfxipVersionStr.back()); + roc_device->deviceInfo_.gfxipMinor_ = std::stoi(minorStr); + gfxipVersionStr.pop_back(); + + roc_device->deviceInfo_.gfxipMajor_ = std::stoi(gfxipVersionStr); + } else { - // FIXME(kzhuravl): Old way. Remove. - std::vector tokens; - size_t end, pos = 0; - do { - end = str.find_first_of(':', pos); - tokens.push_back(str.substr(pos, end - pos)); - pos = end + 1; - } while (end != std::string::npos); - - if (tokens.size() != 5 || tokens[0] != "AMD" || tokens[1] != "AMDGPU") { - LogError("Not an AMD:AMDGPU ISA name"); - continue; - } - - uint major = atoi(tokens[2].c_str()); - uint minor = atoi(tokens[3].c_str()); - uint stepping = atoi(tokens[4].c_str()); - if (minor >= 10 && stepping >= 10) { - LogError("Invalid ISA string"); - continue; - } - gfxipVersionNum = major * 100 + minor * 10 + stepping; + ShouldNotReachHere(); } - assert(gfxipVersionNum != (unsigned)-1); - - roc_device->deviceInfo_.gfxipVersion_ = gfxipVersionNum; // TODO: set sramEccEnabled flag based on target string suffix // when ROCr resumes reporting sram-ecc support - bool sramEccEnabled = (gfxipVersionNum == 906 || gfxipVersionNum == 908) ? true : false; + bool sramEccEnabled = false; + if ((roc_device->deviceInfo_.gfxipMajor_ == 9) && (roc_device->deviceInfo_.gfxipMinor_ == 0)) { + switch (roc_device->deviceInfo_.gfxipStepping_) { + case 6: + case 8: + sramEccEnabled = true; + break; + default: + break; + } + } + if (!roc_device->create(sramEccEnabled)) { LogError("Error creating new instance of Device."); continue; @@ -646,7 +642,7 @@ bool Device::create(bool sramEccEnabled) { roc::Settings* hsaSettings = static_cast(settings_); if ((hsaSettings == nullptr) || !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), - deviceInfo_.gfxipVersion_, coop_groups)) { + deviceInfo_.gfxipMajor_, deviceInfo_.gfxipMinor_, coop_groups)) { return false; } @@ -1026,12 +1022,10 @@ bool Device::populateOCLDeviceConstants() { roc::Settings* hsa_settings = static_cast(settings_); - int gfxipMajor = deviceInfo_.gfxipVersion_ / 100; - int gfxipMinor = deviceInfo_.gfxipVersion_ / 10 % 10; - int gfxipStepping = deviceInfo_.gfxipVersion_ % 10; - std::ostringstream oss; - oss << "gfx" << gfxipMajor << gfxipMinor << gfxipStepping; + + oss << "gfx" << deviceInfo_.gfxipMajor_ << deviceInfo_.gfxipMinor_ << std::hex << deviceInfo_.gfxipStepping_; + if (settings().useLightning_ && hsa_settings->enableXNACK_) { oss << "+xnack"; } @@ -1088,7 +1082,7 @@ bool Device::populateOCLDeviceConstants() { } //TODO: add the assert statement for Raven - if (deviceInfo_.gfxipVersion_ != 902) { + if ((deviceInfo_.gfxipMajor_*100 + deviceInfo_.gfxipMinor_*10 + deviceInfo_.gfxipStepping_) != 902) { assert(info_.maxEngineClockFrequency_ > 0); } @@ -1270,7 +1264,7 @@ bool Device::populateOCLDeviceConstants() { strcpy(info_.driverVersion_, ss.str().c_str()); // Enable OpenCL 2.0 for Vega10+ - if (deviceInfo_.gfxipVersion_ >= 900) { + if (deviceInfo_.gfxipMajor_ >= 9) { info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"2.0" " "; } else { info_.version_ = "OpenCL " /*OPENCL_VERSION_STR*/"1.2" " "; @@ -1397,15 +1391,14 @@ bool Device::populateOCLDeviceConstants() { } if (amd::IS_HIP) { // Report atomics capability based on GFX IP, control on Hawaii - if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipVersion_ >= 800) { + if (info_.hostUnifiedMemory_ || deviceInfo_.gfxipMajor_ >= 8) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } else if (!settings().useLightning_) { // Report atomics capability based on GFX IP, control on Hawaii // and Vega10. - if (info_.hostUnifiedMemory_ || - ((deviceInfo_.gfxipVersion_ >= 800) && (deviceInfo_.gfxipVersion_ < 900))) { + if (info_.hostUnifiedMemory_ || (deviceInfo_.gfxipMajor_ == 8)) { info_.svmCapabilities_ |= CL_DEVICE_SVM_ATOMICS; } } @@ -1445,7 +1438,7 @@ bool Device::populateOCLDeviceConstants() { info_.globalMemChannelBankWidth_ = deviceInfo_.memChannelBankWidth_; info_.localMemSizePerCU_ = deviceInfo_.localMemSizePerCU_; info_.localMemBanks_ = deviceInfo_.localMemBanks_; - info_.gfxipVersion_ = deviceInfo_.gfxipVersion_; + info_.gfxipVersion_ = deviceInfo_.gfxipMajor_ * 100 + deviceInfo_.gfxipMinor_ * 10 + deviceInfo_.gfxipStepping_; info_.numAsyncQueues_ = kMaxAsyncQueues; info_.numRTQueues_ = info_.numAsyncQueues_; info_.numRTCUs_ = info_.maxComputeUnits_; diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp index 5d28b9f866..532af16652 100644 --- a/projects/clr/rocclr/device/rocm/rocmemory.cpp +++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp @@ -1043,7 +1043,7 @@ bool Image::createInteropImage() { } if (obj->getGLTarget() == GL_TEXTURE_CUBE_MAP) { - desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipVersion_ / 100); + desc.setFace(obj->getCubemapFace(), dev().deviceInfo().gfxipMajor_); } hsa_status_t err = diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index 928c0f9fa4..43be33c413 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -88,7 +88,7 @@ Settings::Settings() { imageBufferWar_ = false; } -bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) { +bool Settings::create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups) { customHostAllocator_ = false; if (fullProfile) { @@ -131,7 +131,7 @@ bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) { if (useLightning_) { enableExtension(ClAmdAssemblyProgram); // enable subnormals for gfx900 and later - if (gfxipVersion >= 900) { + if (gfxipMajor >= 9) { singleFpDenorm_ = true; enableCoopGroups_ = coop_groups; enableCoopMultiDeviceGroups_ = coop_groups; @@ -141,10 +141,10 @@ bool Settings::create(bool fullProfile, int gfxipVersion, bool coop_groups) { enableExtension(ClAmdFp64); } - if (gfxipVersion >= 1000) { + if (gfxipMajor >= 10) { enableWave32Mode_ = true; enableWgpMode_ = GPU_ENABLE_WGP_MODE; - if (gfxipVersion >= 1010) { + if (gfxipMinor >= 10) { // GFX10.1 HW doesn't support custom pitch. Enable double copy workaround // TODO: This should be updated when ROCr support custom pitch imageBufferWar_ = GPU_IMAGE_BUFFER_WAR; diff --git a/projects/clr/rocclr/device/rocm/rocsettings.hpp b/projects/clr/rocclr/device/rocm/rocsettings.hpp index 8744d2ccb0..19fdccc777 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.hpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.hpp @@ -79,7 +79,7 @@ class Settings : public device::Settings { Settings(); //! Creates settings - bool create(bool fullProfile, int gfxipVersion, bool coop_groups = false); + bool create(bool fullProfile, int gfxipMajor, int gfxipMinor, bool coop_groups = false); private: //! Disable copy constructor