diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 2e0cf01cf3..6ea126a9e3 100755 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -483,8 +483,6 @@ struct Info : public amd::EmbeddedObject { uint32_t localMemSizePerCU_; //! Number of banks of local memory uint32_t localMemBanks_; - //! The core engine GFXIP version - uint32_t gfxipVersion_; //! The core engine major/minor/stepping uint32_t gfxipMajor_; uint32_t gfxipMinor_; diff --git a/rocclr/device/devprogram.cpp b/rocclr/device/devprogram.cpp index cc752c0e24..abbcd8f75c 100755 --- a/rocclr/device/devprogram.cpp +++ b/rocclr/device/devprogram.cpp @@ -1109,7 +1109,7 @@ bool Program::linkImplLC(amd::option::Options* options) { linkOptions.push_back("correctly_rounded_sqrt"); } if (options->oVariables->DenormsAreZero || AMD_GPU_FORCE_SINGLE_FP_DENORM == 0 || - (device().info().gfxipVersion_ < 900 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) { + (device().info().gfxipMajor_ < 9 && AMD_GPU_FORCE_SINGLE_FP_DENORM < 0)) { linkOptions.push_back("daz_opt"); } if (options->oVariables->FiniteMathOnly || options->oVariables->FastRelaxedMath) { diff --git a/rocclr/device/gpu/gpudevice.cpp b/rocclr/device/gpu/gpudevice.cpp index 028bdb9a6a..7bf80cbeef 100644 --- a/rocclr/device/gpu/gpudevice.cpp +++ b/rocclr/device/gpu/gpudevice.cpp @@ -593,7 +593,9 @@ void NullDevice::fillDeviceInfo(const CALdeviceattribs& calAttr, const gslMemInf info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_; info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_; info_.localMemBanks_ = hwInfo()->localMemBanks_; - info_.gfxipVersion_ = hwInfo()->gfxipVersion_; + info_.gfxipMajor_ = hwInfo()->gfxipVersion_ / 100; + info_.gfxipMinor_ = hwInfo()->gfxipVersion_ / 10 % 10; + info_.gfxipStepping_ = hwInfo()->gfxipVersion_ % 10; info_.numAsyncQueues_ = numComputeRings; @@ -1351,7 +1353,7 @@ gpu::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const { (owner.forceSysMemAlloc() || (owner.getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) ? Resource::Remote : Resource::Local; - + // Check if runtime can force a tiny buffer into USWC memory if ((size <= (GPU_MAX_REMOTE_MEM_SIZE * Ki)) && (type == Resource::Local) && (owner.getMemFlags() & CL_MEM_READ_ONLY)) { diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index 8e40f1d453..c2b247760f 100755 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -679,12 +679,13 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_; info_.localMemSizePerCU_ = palProp.gfxipProperties.shaderCore.ldsSizePerCu; info_.localMemBanks_ = hwInfo()->localMemBanks_; - info_.gfxipVersion_ = + + uint gfxipVersion = settings().useLightning_ ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_; - info_.gfxipMajor_ = info_.gfxipVersion_ / 100; - info_.gfxipMinor_ = info_.gfxipVersion_ / 10 % 10; - info_.gfxipStepping_ = info_.gfxipVersion_ % 10; + info_.gfxipMajor_ = gfxipVersion / 100; + info_.gfxipMinor_ = gfxipVersion / 10 % 10; + info_.gfxipStepping_ = gfxipVersion % 10; info_.timeStampFrequency_ = 1000000; info_.numAsyncQueues_ = numComputeRings; diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 29455531b0..058c98807d 100755 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -986,9 +986,6 @@ bool Device::populateOCLDeviceConstants() { info_.gfxipMinor_ = deviceInfo_.gfxipMinor_; info_.gfxipStepping_ = deviceInfo_.gfxipStepping_; - // TODO: gfxipVersion_ will be removed when Target ID feature is fully implemented - info_.gfxipVersion_ = info_.gfxipMajor_ * 100 + info_.gfxipMinor_ * 10 + info_.gfxipStepping_; - char device_name[64] = {0}; if (HSA_STATUS_SUCCESS == hsa_agent_get_info(_bkendDevice, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME,